Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

578 lines
20KB

  1. #!/bin/python3
  2. import re
  3. import copy
  4. import random
  5. import argparse
  6. import logging
  7. import asyncio
  8. import subprocess
  9. import copy
  10. import aiohttp
  11. from pathlib import Path
  12. from xml.etree import ElementTree as ET
  13. ns = {'': 'http://maven.apache.org/POM/4.0.0'}
  14. ET.register_namespace('', ns[''])
  15. baseurl = 'https://search.maven.org'
  16. base_pom_path = Path('poms')
  17. mirrors = [
  18. "https://repo.maven.apache.org/maven2",
  19. "https://repo1.maven.org/maven2",
  20. "https://oss.sonatype.org/content/repositories/snapshots",
  21. "https://packages.confluent.io/maven",
  22. "https://registry.quarkus.io/maven",
  23. "https://plugins.gradle.org/m2",
  24. ]
  25. done: set[str] = set()
  26. done_lock = asyncio.Lock()
  27. in_progress: set[str] = set()
  28. in_progress_lock = asyncio.Lock()
  29. gradle_packages: set[str] = set()
  30. gradle_packages_lock = asyncio.Lock()
  31. global_properties: dict[str, dict[str, str]] = {}
  32. class TooManyRequestsException(Exception):
  33. pass
  34. class PackageError(Exception):
  35. pass
  36. class WaitForPackage(Exception):
  37. def __init__(self, package):
  38. self.package = package
  39. def find_tag_text(parent, tag) -> str | None:
  40. elem = parent.find(tag, ns)
  41. return elem.text if elem is not None else None
  42. class PackagePOM:
  43. def __init__(self, package: 'Package', pom: str):
  44. self._package = package
  45. logger.debug(f'{package}: Parsing POM')
  46. self.raw_root = ET.fromstring(pom)
  47. self.parent: Package | None = None
  48. if (parent_tag := self.raw_root.find('parent', ns)) is not None:
  49. parent_group = find_tag_text(parent_tag, 'groupId')
  50. parent_artifact = find_tag_text(parent_tag, 'artifactId')
  51. parent_version = find_tag_text(parent_tag, 'version')
  52. logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}')
  53. if parent_group is not None and parent_artifact is not None and parent_version is not None:
  54. parent = Package(
  55. parent_group,
  56. parent_artifact,
  57. parent_version,
  58. )
  59. if str(parent) in done:
  60. self.parent = parent
  61. else:
  62. raise WaitForPackage(parent)
  63. else:
  64. raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}')
  65. logger.debug(f'{package}: Parsing properties')
  66. parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)]
  67. self.properties = self.resolve_props(parent_props)
  68. global_properties[str(package)] = self.properties
  69. logger.debug(f'{package}: Parsing packaging')
  70. if (packaging := self.raw_root.find('packaging', ns)) is not None:
  71. self.packaging = packaging.text
  72. else:
  73. self.packaging = '??'
  74. self.is_bom = self.packaging == 'pom'
  75. self.gradle_packages = [str(package)]
  76. if self.is_bom:
  77. logger.debug(f'{package}: Parsing dependencyManagement')
  78. if (dependencyManagement := self.raw_root.find('dependencyManagement', ns)):
  79. if (dependencies := dependencyManagement.find('dependencies', ns)):
  80. packages = []
  81. for dep in dependencies.findall('dependency', ns):
  82. groupId = find_tag_text(dep, 'groupId')
  83. artifactId = find_tag_text(dep, 'artifactId')
  84. version = find_tag_text(dep, 'version')
  85. if groupId is not None and artifactId is not None and version is not None:
  86. groupId = self.prop_replace(groupId)
  87. artifactId = self.prop_replace(artifactId)
  88. version = self.prop_replace(version)
  89. packages.append(f'{groupId}:{artifactId}:{version}')
  90. logger.debug(f'{package}: Adding {len(packages)} package(s) from dependencyManagement')
  91. self.gradle_packages.extend(packages)
  92. else:
  93. logger.warn(f'{package}: dependencyManagement has no dependencies')
  94. else:
  95. logger.warn(f'{package}: BOM has no dependencyManagement')
  96. logger.debug(f'{package}: POM parsed')
  97. def resolve_props(self, initial: dict[str, str]):
  98. props = initial
  99. for prop_tag in self.raw_root.findall('.//properties/*', ns):
  100. prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '')
  101. value = prop_tag.text if prop_tag.text is not None else ''
  102. logger.debug(f'{self._package}: Setting prop {prop}={value}')
  103. props[prop] = value
  104. changed = True
  105. while changed:
  106. changed = False
  107. for prop, value in props.items():
  108. new_value = self.prop_replace(value, props)
  109. if new_value != value:
  110. changed = True
  111. logger.debug(f'{self._package}: Setting prop {prop}={new_value}')
  112. props[prop] = new_value
  113. return props
  114. def prop_replace(self, text, props: dict[str, str] | None = None) -> str:
  115. def lookup_prop(match) -> str:
  116. prop = match.group(1)
  117. if prop == 'project.groupId':
  118. value = str(self._package.groupId)
  119. elif prop == 'project.artifactId':
  120. value = str(self._package.artifactId)
  121. elif prop == 'project.version':
  122. value = str(self._package.version)
  123. elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'):
  124. value = ''
  125. elif prop in ['project.basedir', 'basedir', 'user.home', 'debug.port']:
  126. value = ''
  127. else:
  128. try:
  129. value = props[prop] if props is not None else self.properties[prop]
  130. except KeyError:
  131. logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""')
  132. value = ''
  133. logger.debug(f'{self._package}: Replacing property {prop} with {value}')
  134. return value
  135. return re.sub(
  136. r'\$\{([^\}]*)\}',
  137. lookup_prop,
  138. text,
  139. )
  140. def _package_from_xml_dep(self, dep: ET.Element) -> 'Package':
  141. def prop_replace_tag(tag) -> str:
  142. return self.prop_replace(
  143. elem.text or '' if (elem := dep.find(tag, ns)) is not None else '',
  144. )
  145. return Package(
  146. groupId=prop_replace_tag('groupId'),
  147. artifactId=prop_replace_tag('artifactId'),
  148. version=prop_replace_tag('version'),
  149. )
  150. @property
  151. def dependency_management(self) -> list['Package']:
  152. dependencies: list[Package] = []
  153. for dep in self.raw_root.find('dependencyManagement/dependencies', ns) or []:
  154. package = self._package_from_xml_dep(dep)
  155. dependencies.append(package)
  156. return dependencies
  157. class Package:
  158. _pom: PackagePOM | None = None
  159. _verified: bool = False
  160. def __init__(self, groupId: str, artifactId: str, version: str | None = None, implicit: bool = False):
  161. self.groupId = groupId
  162. self.artifactId = artifactId
  163. self.version = version if version and not version.isspace() else None
  164. self.implicit = implicit
  165. def __str__(self) -> str:
  166. return f'{self.groupId}:{self.artifactId}:{self.version or "----"}'
  167. def __eq__(self, other) -> bool:
  168. return (
  169. self.groupId == other.groupId
  170. and self.artifactId == other.artifactId
  171. and self.version == other.version
  172. )
  173. def __hash__(self) -> int:
  174. return hash((self.groupId, self.artifactId, self.version))
  175. @property
  176. def dir_path(self):
  177. group_path = self.groupId.replace(".", "/")
  178. return f'{group_path}/{self.artifactId}/{self.version}'
  179. @property
  180. def base_filename(self):
  181. return f'{self.artifactId}-{self.version}'
  182. async def download_file(self, extension):
  183. filepath = f'{self.dir_path}/{self.base_filename}.{extension}'
  184. async with aiohttp.ClientSession() as session:
  185. for mirror in mirrors:
  186. pom_url = f'{mirror}/{filepath}'
  187. logger.debug(f'{self}: Downloading {extension} from {pom_url}')
  188. async with session.get(pom_url) as response:
  189. if response.status == 200:
  190. logger.debug(f'{self}: {extension} downloaded')
  191. return await response.text()
  192. break
  193. elif response.status == 429:
  194. raise TooManyRequestsException()
  195. else:
  196. logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}')
  197. else:
  198. logger.warning(f'{self}: File download of {extension} failed for all mirrors')
  199. return None
  200. @property
  201. async def pom(self) -> PackagePOM:
  202. if self._pom is not None:
  203. return self._pom
  204. if self.version is None:
  205. await self._query_maven()
  206. self._pom = PackagePOM(self, await self.download_file('pom'))
  207. return self._pom
  208. @property
  209. def _urlquery(self) -> str:
  210. q = f'g:{self.groupId}+AND+a:{self.artifactId}'
  211. if self.version is not None:
  212. q += f'+AND+v:{self.version}'
  213. return q
  214. async def _query_maven(self) -> None:
  215. self._verified = False
  216. async with aiohttp.ClientSession() as session:
  217. for mirror in mirrors:
  218. url = f'{mirror}/{self.groupId.replace(".", "/")}/{self.artifactId}/maven-metadata.xml'
  219. logger.debug(f'{self}: Querying maven at url {url}')
  220. async with session.get(url) as response:
  221. if response.status == 200:
  222. response_text = await response.text()
  223. metadata = ET.fromstring(response_text)
  224. if metadata is not None:
  225. logger.debug(f'{self}: Metadata found')
  226. if self.version is None:
  227. release_tag = metadata.find('./versioning/release')
  228. latest_tag = metadata.find('./versioning/latest')
  229. version = release_tag.text if release_tag is not None else latest_tag.text if latest_tag is not None else None
  230. if version is not None:
  231. logger.debug(f'{self}: Using newest version {version}')
  232. self.version = version
  233. self._verified = True
  234. return
  235. else:
  236. logger.info(f'{self}: Could not find latest version in metadata from mirror {mirror}')
  237. else:
  238. if metadata.find(f'./versioning/versions/version[.="{self.version}"]') is not None:
  239. logger.debug(f'{self}: Version {self.version} is valid')
  240. self._verified = True
  241. return
  242. else:
  243. logger.info(f'{self}: Could not find version {self.version} in metadata from mirror {mirror}')
  244. else:
  245. logger.warning('{self}: Invalid XML for maven metadata: {response_text}')
  246. elif response.status == 429:
  247. raise TooManyRequestsException()
  248. else:
  249. logger.info(f'{self}: HTTP error {response.status} downloading maven metadata from {url}')
  250. else:
  251. if self.implicit:
  252. logger.info(f'{self}: Package not found in any mirror')
  253. else:
  254. logger.warning(f'{self}: Package not found in any mirror')
  255. async def verify(self) -> bool:
  256. if not self._verified:
  257. await self._query_maven()
  258. return self._verified
  259. def load_package_list(list_path: Path, queue: asyncio.Queue) -> None:
  260. logger.info(f'Parsing {list_path}')
  261. with list_path.open('r') as f:
  262. for line in f.readlines():
  263. sections = line.strip().split(':')
  264. if len(sections) < 2 or len(sections) > 3:
  265. logger.warning(f'Invalid package format "{line}". It should be "groupID:artifactID" or "groupID:artifactID:version"')
  266. continue
  267. package = Package(
  268. sections[0],
  269. sections[1],
  270. sections[2] if len(sections) == 3 else None,
  271. )
  272. queue.put_nowait(package)
  273. continue
  274. if not package.artifactId.endswith('-jvm'):
  275. queue.put_nowait(
  276. Package(
  277. package.groupId,
  278. f'{package.artifactId}-jvm',
  279. package.version,
  280. True,
  281. )
  282. )
  283. def create_gradle_build(packages, repo) -> str:
  284. return """// Generated, do not edit
  285. plugins {
  286. kotlin("jvm") version "1.7.20"
  287. }
  288. repositories {
  289. maven {
  290. url=uri("http://""" + repo + """/releases")
  291. isAllowInsecureProtocol=true
  292. }
  293. }
  294. val deps = listOf(
  295. """ + ',\n '.join(f'"{dep}"' for dep in sorted(packages)) + """
  296. ).flatMap {
  297. listOf(it, it + ":sources", it + ":javadoc")
  298. }.map {
  299. configurations.create(it.replace(':', '_')) to it
  300. }
  301. dependencies {
  302. deps.forEach { (conf, dep) ->
  303. conf(dep)
  304. }
  305. }
  306. tasks.register("downloadDependencies") {
  307. val logger = getLogger()
  308. doLast {
  309. deps.forEach { (conf, dep) ->
  310. try {
  311. conf.files
  312. } catch (e: Exception) {
  313. if (dep.endsWith(":sources")) {
  314. logger.warn("Package '$dep' has no sources")
  315. } else if (dep.endsWith(":javadoc")) {
  316. logger.warn("Package '$dep' has no javadoc")
  317. } else {
  318. logger.warn("Error while fetching '$dep': $e")
  319. }
  320. }
  321. }
  322. }
  323. }
  324. """
  325. def create_gradle_settings(repo: str) -> str:
  326. return """// Generated, do not edit
  327. rootProject.name = "gradle sync job"
  328. pluginManagement {
  329. repositories {
  330. maven {
  331. url=uri("http://""" + repo + """/releases")
  332. isAllowInsecureProtocol=true
  333. }
  334. }
  335. }
  336. """
  337. async def download(package: Package, queue: asyncio.Queue) -> None:
  338. async with done_lock:
  339. is_done = str(package) in done
  340. async with in_progress_lock:
  341. is_in_progress = str(package) in in_progress
  342. if is_done:
  343. logger.info(f'{package}: Already downloaded. Skipping.')
  344. elif is_in_progress:
  345. logger.info(f'{package}: Already in progress. Skipping.')
  346. else:
  347. async with in_progress_lock:
  348. in_progress.add(str(package))
  349. for _ in range(50):
  350. try:
  351. verified = await package.verify()
  352. break
  353. except TooManyRequestsException:
  354. logger.info(f'{package}: Too many requests. Delaying next attempt')
  355. await asyncio.sleep(3*random.random() + 0.2)
  356. else:
  357. logger.error(f'{package}: Verification failed after 50 tries')
  358. exit(1)
  359. if verified:
  360. for _ in range(50):
  361. try:
  362. pom = await package.pom
  363. break
  364. except TooManyRequestsException:
  365. logger.info(f'{package}: Too many requests. Delaying next attempt')
  366. await asyncio.sleep(3*random.random() + 0.2)
  367. except WaitForPackage as e:
  368. logger.info(f'{package}: Waiting for {e.package}')
  369. async with in_progress_lock:
  370. if str(package) in in_progress:
  371. in_progress.remove(str(package))
  372. if str(e.package) not in in_progress:
  373. await queue.put(e.package)
  374. await queue.put(package)
  375. return
  376. else:
  377. logger.error(f'{package}: POM parsing failed after 50 tries')
  378. exit(1)
  379. if not pom:
  380. logger.warn(f'{package}: No pom')
  381. return
  382. async with gradle_packages_lock:
  383. gradle_packages.update(pom.gradle_packages)
  384. if not pom.is_bom:
  385. for dep in pom.dependency_management:
  386. logger.info(f'{package}: Handling transitive dependency {dep}')
  387. await queue.put(dep)
  388. async with done_lock:
  389. logger.debug(f'{package}: Marking done')
  390. p = copy.copy(package)
  391. p.version = None
  392. done.add(str(package))
  393. done.add(str(p))
  394. async with in_progress_lock:
  395. if str(package) in in_progress:
  396. in_progress.remove(str(package))
  397. else:
  398. p = copy.copy(package)
  399. p.version = None
  400. if str(p) in in_progress:
  401. in_progress.remove(str(p))
  402. else:
  403. logger.warning(f'{package}: Package is done, but not marked as in progress')
  404. async def worker(queue: asyncio.Queue) -> None:
  405. while True:
  406. package = await queue.get()
  407. while True:
  408. try:
  409. await download(package, queue)
  410. break
  411. except PackageError:
  412. logger.exception(f'{package}: Error while processing package')
  413. break
  414. except Exception:
  415. logger.exception(f'{package}: Unknown error while processing package')
  416. break
  417. queue.task_done()
  418. async def main(package_list: Path, output_dir: Path, num_workers: int, gradle_repo: str) -> None:
  419. queue: asyncio.Queue = asyncio.Queue()
  420. tasks = []
  421. load_package_list(package_list, queue)
  422. logger.debug(f'Starting {num_workers} workers')
  423. for i in range(num_workers):
  424. tasks.append(
  425. asyncio.create_task(
  426. worker(queue)
  427. )
  428. )
  429. await queue.join()
  430. logger.debug('Queue is empty. Cancelling workers')
  431. for task in tasks:
  432. task.cancel()
  433. await asyncio.gather(*tasks, return_exceptions=True)
  434. async with gradle_packages_lock:
  435. logger.info('Generating build.gradle.kts')
  436. (output_dir / 'build.gradle.kts').write_text(create_gradle_build(gradle_packages, gradle_repo))
  437. logger.info('Generating settings.gradle.kts')
  438. (output_dir / 'settings.gradle.kts').write_text(create_gradle_settings(gradle_repo))
  439. logger = logging.getLogger(__name__)
  440. if __name__ == '__main__':
  441. parser = argparse.ArgumentParser()
  442. parser.add_argument('-w', '--workers', type=int, default=20)
  443. parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0)
  444. parser.add_argument('--repo', type=str, help="The repository gradle should use", required=True)
  445. parser.add_argument('--output_dir', type=Path, help="The directory to put the generated gradle files in", default=Path('.'), required=False)
  446. parser.add_argument('package_list', type=Path, help="The list of packages to download")
  447. args = parser.parse_args()
  448. if args.verbosity == 0:
  449. log_level = 'WARNING'
  450. elif args.verbosity == 1:
  451. log_level = 'INFO'
  452. else:
  453. log_level = 'DEBUG'
  454. logging.basicConfig(level=log_level)
  455. asyncio.run(
  456. main(args.package_list, args.output_dir, args.workers, args.repo)
  457. )