Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

535 wiersze
18KB

  1. #!/bin/python3
  2. import re
  3. import copy
  4. import random
  5. import argparse
  6. import logging
  7. import asyncio
  8. import subprocess
  9. import copy
  10. import aiohttp
  11. from pathlib import Path
  12. from xml.etree import ElementTree as ET
  13. ns = {'': 'http://maven.apache.org/POM/4.0.0'}
  14. ET.register_namespace('', ns[''])
  15. baseurl = 'https://search.maven.org'
  16. base_pom_path = Path('poms')
  17. mirrors = [
  18. "https://repo.maven.apache.org/maven2",
  19. "https://repo1.maven.org/maven2",
  20. "https://oss.sonatype.org/content/repositories/snapshots",
  21. "https://packages.confluent.io/maven",
  22. "https://registry.quarkus.io/maven",
  23. "https://plugins.gradle.org/m2",
  24. ]
  25. done: set[str] = set()
  26. done_lock = asyncio.Lock()
  27. in_progress: set[str] = set()
  28. in_progress_lock = asyncio.Lock()
  29. gradle_packages: set[str] = set()
  30. gradle_packages_lock = asyncio.Lock()
  31. num_workers = 50
  32. global_properties: dict[str, dict[str, str]] = {}
  33. class TooManyRequestsException(Exception):
  34. pass
  35. class PackageError(Exception):
  36. pass
  37. class WaitForPackage(Exception):
  38. def __init__(self, package):
  39. self.package = package
  40. class PackagePOM:
  41. def __init__(self, package: 'Package', pom: str):
  42. self._package = package
  43. logger.debug(f'{package}: Parsing POM')
  44. self.raw_root = ET.fromstring(pom)
  45. self.parent: Package | None = None
  46. if (parent_tag := self.raw_root.find('parent', ns)) is not None:
  47. parent_group_tag = parent_tag.find('groupId', ns)
  48. parent_artifact_tag = parent_tag.find('artifactId', ns)
  49. parent_version_tag = parent_tag.find('version', ns)
  50. parent_group = parent_group_tag.text if parent_group_tag is not None else None
  51. parent_artifact = parent_artifact_tag.text if parent_artifact_tag is not None else None
  52. parent_version = parent_version_tag.text if parent_version_tag is not None else None
  53. logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}')
  54. if parent_group is not None and parent_artifact is not None and parent_version is not None:
  55. parent = Package(
  56. parent_group,
  57. parent_artifact,
  58. parent_version,
  59. )
  60. if str(parent) in done:
  61. self.parent = parent
  62. else:
  63. raise WaitForPackage(parent)
  64. else:
  65. raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}')
  66. logger.debug(f'{package}: Parsing properties')
  67. parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)]
  68. self.properties = self.resolve_props(parent_props)
  69. global_properties[str(package)] = self.properties
  70. logger.debug(f'{package}: Parsing packaging')
  71. if (packaging := self.raw_root.find('packaging', ns)) is not None:
  72. self.packaging = packaging.text
  73. else:
  74. self.packaging = '??'
  75. self.is_bom = self.packaging == 'pom'
  76. self.gradle_packages = [str(package)]
  77. if self.packaging == 'pom':
  78. root_copy = copy.deepcopy(self.raw_root)
  79. dependencies = root_copy.find('dependencies', ns) or ET.SubElement(root_copy, 'dependencies')
  80. self.gradle_packages.extend(
  81. [
  82. f'{dep.find("groupId").text}:{dep.find("artifactId").text}:{dep.find("version").text}'
  83. for dep in dependencies.findall('dependency')
  84. ]
  85. )
  86. logger.debug(f'{package}: POM parsed')
  87. def resolve_props(self, initial: dict[str, str]):
  88. props = initial
  89. for prop_tag in self.raw_root.findall('.//properties/*', ns):
  90. prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '')
  91. value = prop_tag.text if prop_tag.text is not None else ''
  92. logger.debug(f'{self._package}: Setting prop {prop}={value}')
  93. props[prop] = value
  94. changed = True
  95. while changed:
  96. changed = False
  97. for prop, value in props.items():
  98. new_value = self.prop_replace(value, props)
  99. if new_value != value:
  100. changed = True
  101. logger.debug(f'{self._package}: Setting prop {prop}={new_value}')
  102. props[prop] = new_value
  103. return props
  104. def prop_replace(self, text, props: dict[str, str] | None = None) -> str:
  105. def lookup_prop(match) -> str:
  106. prop = match.group(1)
  107. if prop == 'project.groupId':
  108. value = str(self._package.groupId)
  109. elif prop == 'project.artifactId':
  110. value = str(self._package.artifactId)
  111. elif prop == 'project.version':
  112. value = str(self._package.version)
  113. elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'):
  114. value = ''
  115. elif prop in ['project.basedir', 'basedir', 'user.home', 'debug.port']:
  116. value = ''
  117. else:
  118. try:
  119. value = props[prop] if props is not None else self.properties[prop]
  120. except KeyError:
  121. logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""')
  122. value = ''
  123. logger.debug(f'{self._package}: Replacing property {prop} with {value}')
  124. return value
  125. return re.sub(
  126. r'\$\{([^\}]*)\}',
  127. lookup_prop,
  128. text,
  129. )
  130. def _package_from_xml_dep(self, dep: ET.Element) -> 'Package':
  131. def prop_replace_tag(tag) -> str:
  132. return self.prop_replace(
  133. elem.text or '' if (elem := dep.find(tag, ns)) is not None else '',
  134. )
  135. return Package(
  136. groupId=prop_replace_tag('groupId'),
  137. artifactId=prop_replace_tag('artifactId'),
  138. version=prop_replace_tag('version'),
  139. )
  140. @property
  141. def dependency_management(self) -> list['Package']:
  142. dependencies: list[Package] = []
  143. for dep in self.raw_root.find('dependencyManagement/dependencies', ns) or []:
  144. package = self._package_from_xml_dep(dep)
  145. dependencies.append(package)
  146. return dependencies
  147. class Package:
  148. _pom: PackagePOM | None = None
  149. _verified: bool = False
  150. def __init__(self, groupId: str, artifactId: str, version: str | None = None, implicit: bool = False):
  151. self.groupId = groupId
  152. self.artifactId = artifactId
  153. self.version = version if version and not version.isspace() else None
  154. self.implicit = implicit
  155. def __str__(self) -> str:
  156. return f'{self.groupId}:{self.artifactId}:{self.version or "----"}'
  157. def __eq__(self, other) -> bool:
  158. return (
  159. self.groupId == other.groupId
  160. and self.artifactId == other.artifactId
  161. and self.version == other.version
  162. )
  163. def __hash__(self) -> int:
  164. return hash((self.groupId, self.artifactId, self.version))
  165. @property
  166. def dir_path(self):
  167. group_path = self.groupId.replace(".", "/")
  168. return f'{group_path}/{self.artifactId}/{self.version}'
  169. @property
  170. def base_filename(self):
  171. return f'{self.artifactId}-{self.version}'
  172. async def download_file(self, extension):
  173. filepath = f'{self.dir_path}/{self.base_filename}.{extension}'
  174. async with aiohttp.ClientSession() as session:
  175. for mirror in mirrors:
  176. pom_url = f'{mirror}/{filepath}'
  177. logger.debug(f'{self}: Downloading {extension} from {pom_url}')
  178. async with session.get(pom_url) as response:
  179. if response.status == 200:
  180. logger.debug(f'{self}: {extension} downloaded')
  181. return await response.text()
  182. break
  183. elif response.status == 429:
  184. raise TooManyRequestsException()
  185. else:
  186. logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}')
  187. else:
  188. logger.warning(f'{self}: File download of {extension} failed for all mirrors')
  189. return None
  190. @property
  191. async def pom(self) -> PackagePOM:
  192. if self._pom is not None:
  193. return self._pom
  194. if self.version is None:
  195. await self._query_maven()
  196. self._pom = PackagePOM(self, await self.download_file('pom'))
  197. return self._pom
  198. @property
  199. def _urlquery(self) -> str:
  200. q = f'g:{self.groupId}+AND+a:{self.artifactId}'
  201. if self.version is not None:
  202. q += f'+AND+v:{self.version}'
  203. return q
  204. async def _query_maven(self) -> None:
  205. self._verified = False
  206. async with aiohttp.ClientSession() as session:
  207. for mirror in mirrors:
  208. url = f'{mirror}/{self.groupId.replace(".", "/")}/{self.artifactId}/maven-metadata.xml'
  209. logger.debug(f'{self}: Querying maven at url {url}')
  210. async with session.get(url) as response:
  211. if response.status == 200:
  212. response_text = await response.text()
  213. metadata = ET.fromstring(response_text)
  214. if metadata is not None:
  215. logger.debug(f'{self}: Metadata found')
  216. if self.version is None:
  217. release_tag = metadata.find('./versioning/release')
  218. latest_tag = metadata.find('./versioning/latest')
  219. version = release_tag.text if release_tag is not None else latest_tag.text if latest_tag is not None else None
  220. if version is not None:
  221. logger.debug(f'{self}: Using newest version {version}')
  222. self.version = version
  223. self._verified = True
  224. return
  225. else:
  226. logger.info(f'{self}: Could not find latest version in metadata from mirror {mirror}')
  227. else:
  228. if metadata.find(f'./versioning/versions/version[.="{self.version}"]') is not None:
  229. logger.debug(f'{self}: Version {self.version} is valid')
  230. self._verified = True
  231. return
  232. else:
  233. logger.info(f'{self}: Could not find version {self.version} in metadata from mirror {mirror}')
  234. else:
  235. logger.warning('{self}: Invalid XML for maven metadata: {response_text}')
  236. elif response.status == 429:
  237. raise TooManyRequestsException()
  238. else:
  239. logger.info(f'{self}: HTTP error {response.status} downloading maven metadata from {url}')
  240. else:
  241. if self.implicit:
  242. logger.info(f'{self}: Package not found in any mirror')
  243. else:
  244. logger.warning(f'{self}: Package not found in any mirror')
  245. async def verify(self) -> bool:
  246. if not self._verified:
  247. await self._query_maven()
  248. return self._verified
  249. def load_package_list(list_path: Path, queue: asyncio.Queue) -> None:
  250. logger.info(f'Parsing {list_path}')
  251. with list_path.open('r') as f:
  252. for line in f.readlines():
  253. sections = line.strip().split(':')
  254. if len(sections) < 2 or len(sections) > 3:
  255. logger.warning(f'Invalid package format "{line}". It should be "groupID:artifactID" or "groupID:artifactID:version"')
  256. continue
  257. package = Package(
  258. sections[0],
  259. sections[1],
  260. sections[2] if len(sections) == 3 else None,
  261. )
  262. queue.put_nowait(package)
  263. continue
  264. if not package.artifactId.endswith('-jvm'):
  265. queue.put_nowait(
  266. Package(
  267. package.groupId,
  268. f'{package.artifactId}-jvm',
  269. package.version,
  270. True,
  271. )
  272. )
  273. async def create_gradle_script() -> str:
  274. async with gradle_packages_lock:
  275. return """// Generated, do not edit
  276. plugins {
  277. kotlin("jvm") version "1.7.20"
  278. }
  279. repositories {
  280. maven {
  281. url=uri("http://localhost:9001/releases")
  282. isAllowInsecureProtocol=true
  283. }
  284. }
  285. val deps = listOf(
  286. """ + ',\n '.join(f'"{dep}"' for dep in sorted(gradle_packages)) + """
  287. ).map {
  288. configurations.create(it.replace(':', '_')) to it
  289. }
  290. dependencies {
  291. deps.forEach { (conf, dep) ->
  292. conf(dep)
  293. }
  294. }
  295. tasks.register("downloadDependencies") {
  296. doLast {
  297. deps.forEach { (conf, dep) ->
  298. conf.files.forEach { file ->
  299. copy {
  300. from(file)
  301. into("data/")
  302. }
  303. }
  304. }
  305. }
  306. }
  307. """
  308. async def download(package: Package, queue: asyncio.Queue) -> None:
  309. async with done_lock:
  310. is_done = str(package) in done
  311. async with in_progress_lock:
  312. is_in_progress = str(package) in in_progress
  313. if is_done:
  314. logger.info(f'{package}: Already downloaded. Skipping.')
  315. elif is_in_progress:
  316. logger.info(f'{package}: Already in progress. Skipping.')
  317. else:
  318. async with in_progress_lock:
  319. in_progress.add(str(package))
  320. for _ in range(50):
  321. try:
  322. verified = await package.verify()
  323. break
  324. except TooManyRequestsException:
  325. logger.info(f'{package}: Too many requests. Delaying next attempt')
  326. await asyncio.sleep(3*random.random() + 0.2)
  327. else:
  328. logger.error(f'{package}: Verification failed after 50 tries')
  329. exit(1)
  330. if verified:
  331. for _ in range(50):
  332. try:
  333. pom = await package.pom
  334. break
  335. except TooManyRequestsException:
  336. logger.info(f'{package}: Too many requests. Delaying next attempt')
  337. await asyncio.sleep(3*random.random() + 0.2)
  338. except WaitForPackage as e:
  339. logger.info(f'{package}: Waiting for {e.package}')
  340. async with in_progress_lock:
  341. if str(package) in in_progress:
  342. in_progress.remove(str(package))
  343. if str(e.package) not in in_progress:
  344. await queue.put(e.package)
  345. await queue.put(package)
  346. return
  347. else:
  348. logger.error(f'{package}: POM parsing failed after 50 tries')
  349. exit(1)
  350. if not pom:
  351. logger.warn(f'{package}: No pom')
  352. return
  353. async with gradle_packages_lock:
  354. gradle_packages.update(pom.gradle_packages)
  355. if not pom.is_bom:
  356. for dep in pom.dependency_management:
  357. logger.info(f'{package}: Handling transitive dependency {dep}')
  358. await queue.put(dep)
  359. async with done_lock:
  360. logger.debug(f'{package}: Marking done')
  361. p = copy.copy(package)
  362. p.version = None
  363. done.add(str(package))
  364. done.add(str(p))
  365. async with in_progress_lock:
  366. if str(package) in in_progress:
  367. in_progress.remove(str(package))
  368. else:
  369. p = copy.copy(package)
  370. p.version = None
  371. if str(p) in in_progress:
  372. in_progress.remove(str(p))
  373. else:
  374. logger.warning(f'{package}: Package is done, but not marked as in progress')
  375. async def worker(queue: asyncio.Queue) -> None:
  376. while True:
  377. package = await queue.get()
  378. while True:
  379. try:
  380. await download(package, queue)
  381. break
  382. except PackageError:
  383. logger.exception(f'{package}: Error while processing package')
  384. break
  385. except Exception:
  386. logger.exception(f'{package}: Unknown error while processing package')
  387. break
  388. queue.task_done()
  389. async def main() -> None:
  390. queue: asyncio.Queue = asyncio.Queue()
  391. tasks = []
  392. load_package_list(Path('package-list.txt'), queue)
  393. logger.debug(f'Starting {num_workers} workers')
  394. for i in range(num_workers):
  395. tasks.append(
  396. asyncio.create_task(
  397. worker(queue)
  398. )
  399. )
  400. await queue.join()
  401. logger.debug('Queue is empty. Cancelling workers')
  402. for task in tasks:
  403. task.cancel()
  404. await asyncio.gather(*tasks, return_exceptions=True)
  405. logger.info('Generating build.gradle.kts')
  406. gradle_script = await create_gradle_script()
  407. Path("test/build.gradle.kts").write_text(gradle_script)
  408. logger = logging.getLogger(__name__)
  409. if __name__ == '__main__':
  410. parser = argparse.ArgumentParser()
  411. parser.add_argument('-w', '--workers', type=int, default=num_workers)
  412. parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0)
  413. args = parser.parse_args()
  414. if args.verbosity == 0:
  415. log_level = 'WARNING'
  416. elif args.verbosity == 1:
  417. log_level = 'INFO'
  418. else:
  419. log_level = 'DEBUG'
  420. logging.basicConfig(level=log_level)
  421. num_workers = args.workers
  422. asyncio.run(main())