Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

294 строки
8.9KB

  1. #!/bin/python3
  2. import re
  3. import argparse
  4. import logging
  5. import asyncio
  6. import subprocess
  7. import copy
  8. import aiohttp
  9. from pathlib import Path
  10. from xml.etree import ElementTree as ET
  11. ET.register_namespace('', 'http://maven.apache.org/POM/4.0.0')
  12. baseurl = 'https://search.maven.org'
  13. base_pom_path = Path('poms')
  14. done: set[str] = set()
  15. done_lock = asyncio.Lock()
  16. num_workers = 50
  17. class PackagePOM:
  18. _dependencyManagement: list['Package'] = None
  19. def __init__(self, package: 'Package', pom: str):
  20. logger.debug(f'{package}: Parsing POM')
  21. self.raw_root = ET.fromstring(pom)
  22. packaging = self.raw_root.find('packaging')
  23. self.is_bom = True if packaging is not None and packaging.text == 'pom' else False
  24. if self.is_bom:
  25. root_copy = copy.deepcopy(self.raw_root)
  26. depman = root_copy.find('dependencyManagement')
  27. root_copy.extend(depman.findall('*'))
  28. root_copy.remove(depman)
  29. self.generated_root = root_copy
  30. else:
  31. self.generated_root = ET.fromstring(
  32. f"""
  33. <project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
  34. https://maven.apache.org/xsd/maven-4.0.0.xsd"
  35. xmlns="http://maven.apache.org/POM/4.0.0"
  36. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  37. <modelVersion>4.0.0</modelVersion>
  38. <groupId>tmp.{package.groupId}</groupId>
  39. <artifactId>placeholder-{package.artifactId}</artifactId>
  40. <version>{package.version}</version>
  41. <name>Package {package.artifactId}</name>
  42. <dependencies>
  43. <dependency>
  44. <groupId>{package.groupId}</groupId>
  45. <artifactId>{package.artifactId}</artifactId>
  46. <version>{package.version}</version>
  47. </dependency>
  48. </dependencies>
  49. </project>
  50. """
  51. )
  52. def write(self, f):
  53. tree = ET.ElementTree(self.generated_root)
  54. ET.indent(tree)
  55. tree.write(f)
  56. def get_property(self, prop: str):
  57. elem = self.raw_root.find(f'.//properties/{prop}')
  58. import pdb; pdb.set_trace()
  59. if elem is not None:
  60. return elem.text
  61. else:
  62. return None
  63. @property
  64. def dependencyManagement(self) -> list['Package']:
  65. if self._dependencyManagement is not None:
  66. return self._dependencyManagement
  67. self._dependencyManagement = []
  68. def prop_replace(match):
  69. prop = match.group(1)
  70. value = self.get_property(match.group(1))
  71. logger.debug(f'Replacing property {prop} with {value}')
  72. return value
  73. for dep in self.raw_root.find('dependencyManagement/dependencies') or []:
  74. package = Package(
  75. *[
  76. re.sub(
  77. r'\$\{([^\}]*)\}',
  78. prop_replace,
  79. dep.find(tag).text,
  80. )
  81. for tag in [
  82. 'groupId',
  83. 'artifactId',
  84. 'version',
  85. ]
  86. ]
  87. )
  88. self._dependencyManagement.append(package)
  89. return self._dependencyManagement
  90. class Package:
  91. _pom: PackagePOM = None
  92. _verified: bool = False
  93. def __init__(self, groupId: str, artifactId: str, version: str = None):
  94. self.groupId = groupId
  95. self.artifactId = artifactId
  96. self.version = version
  97. def __str__(self) -> str:
  98. return f'{self.groupId}:{self.artifactId}:{self.version or "----"}'
  99. def __eq__(self, other) -> bool:
  100. return (
  101. self.groupId == other.groupId
  102. and self.artifactId == other.artifactId
  103. and self.version == other.version
  104. )
  105. def __hash__(self) -> str:
  106. return hash((self.groupId, self.artifactId, self.version))
  107. @property
  108. async def pom(self) -> ET:
  109. if self._pom is not None:
  110. return self._pom
  111. if self.version is None:
  112. self._query_maven()
  113. group_path = self.groupId.replace(".", "/")
  114. pom_path = f'{self.artifactId}-{self.version}.pom'
  115. filepath = f'{group_path}/{self.artifactId}/{self.version}/{pom_path}'
  116. pom_url = f'{baseurl}/remotecontent?filepath={filepath}'
  117. logger.debug(f'{self}: Downloading pom from {pom_url}')
  118. async with aiohttp.ClientSession() as session:
  119. async with session.get(pom_url) as response:
  120. if response.status == 200:
  121. logger.debug(f'{self}: POM downloaded')
  122. self._pom = PackagePOM(self, await response.text())
  123. else:
  124. logger.warning(f'{self}: HTTP error {response.status} downloading pom')
  125. return self._pom
  126. @property
  127. def _urlquery(self) -> str:
  128. q = f'g:{self.groupId}+AND+a:{self.artifactId}'
  129. if self.version is not None:
  130. q += f'+AND+v:{self.version}'
  131. return q
  132. async def _query_maven(self) -> None:
  133. url = f'{baseurl}/solrsearch/select?q={self._urlquery}&rows=1&wt=json'
  134. logger.debug(f'{self}: Querying maven at url {url}')
  135. async with aiohttp.ClientSession() as session:
  136. async with session.get(url) as response:
  137. if response.status == 200:
  138. message = await response.json()
  139. num = message['response']['numFound']
  140. if num:
  141. logger.debug(f'{self}: Query successful')
  142. self._verified = True
  143. if self.version is None:
  144. version = message['response']['docs'][0]['latestVersion']
  145. logger.debug(f'{self}: Using newest version {version}')
  146. self.version = version
  147. else:
  148. logger.warning(f'{self}: No matching packages found')
  149. self._verified = False
  150. else:
  151. self._verified = False
  152. logger.warning(f'{self}: HTTP error {response.status} downloading pom')
  153. async def verify(self) -> bool:
  154. if not self._verified:
  155. await self._query_maven()
  156. return self._verified
  157. def load_package_list(list_path: Path, queue: asyncio.Queue) -> None:
  158. logger.info(f'Parsing {list_path}')
  159. with list_path.open('r') as f:
  160. for line in f.readlines():
  161. sections = line.strip().split(':')
  162. if len(sections) < 2 or len(sections) > 3:
  163. logger.warning(f'Invalid package format "{line}". It should be "groupID:artifactID" or "groupID:artifactID:version"')
  164. continue
  165. package = Package(
  166. sections[0],
  167. sections[1],
  168. sections[2] if len(sections) == 3 else None,
  169. )
  170. queue.put_nowait(package)
  171. async def download(package: Package, queue: asyncio.Queue) -> None:
  172. async with done_lock:
  173. skip = str(package) in done
  174. if skip:
  175. logger.info(f'{package}: Already downloaded. Skipping.')
  176. elif await package.verify():
  177. async with done_lock:
  178. done.add(str(package))
  179. pom_dir = base_pom_path / str(package)
  180. pom_path = pom_dir / 'pom.xml'
  181. pom_dir.mkdir(exist_ok=True)
  182. pom = await package.pom
  183. if not pom:
  184. return
  185. pom.write(pom_path)
  186. logger.info(f'{package}: Downloaded')
  187. if not pom.is_bom:
  188. for dep in pom.dependencyManagement:
  189. logger.info(f'{package}: Handling transitive dependency {dep}')
  190. await queue.put(dep)
  191. else:
  192. logger.warning(f'{package}: Package not found. Check package name and internet connection')
  193. async def worker(queue: asyncio.Queue) -> None:
  194. while True:
  195. package = await queue.get()
  196. await download(package, queue)
  197. queue.task_done()
  198. async def main() -> None:
  199. queue = asyncio.Queue()
  200. tasks = []
  201. load_package_list(Path('package-list.txt'), queue)
  202. for i in range(num_workers):
  203. tasks.append(
  204. asyncio.create_task(
  205. worker(queue)
  206. )
  207. )
  208. await queue.join()
  209. for task in tasks:
  210. task.cancel()
  211. await asyncio.gather(*tasks, return_exceptions=True)
  212. subprocess.call(['sh', 'generate_master_pom.sh'])
  213. logger = logging.getLogger(__name__)
  214. if __name__ == '__main__':
  215. parser = argparse.ArgumentParser()
  216. parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0)
  217. args = parser.parse_args()
  218. if args.verbosity == 0:
  219. log_level = 'WARNING'
  220. elif args.verbosity == 1:
  221. log_level = 'INFO'
  222. else:
  223. log_level = 'DEBUG'
  224. logging.basicConfig(level=log_level)
  225. asyncio.run(main())