25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

550 satır
18KB

  1. #!/bin/python3
  2. import re
  3. import copy
  4. import random
  5. import argparse
  6. import logging
  7. import asyncio
  8. import subprocess
  9. import copy
  10. import aiohttp
  11. from pathlib import Path
  12. from xml.etree import ElementTree as ET
  13. ns = {'': 'http://maven.apache.org/POM/4.0.0'}
  14. ET.register_namespace('', ns[''])
  15. baseurl = 'https://search.maven.org'
  16. base_pom_path = Path('poms')
  17. mirrors = [
  18. "https://repo.maven.apache.org/maven2",
  19. "https://repo1.maven.org/maven2",
  20. "https://oss.sonatype.org/content/repositories/snapshots",
  21. "https://packages.confluent.io/maven",
  22. "https://registry.quarkus.io/maven",
  23. "https://plugins.gradle.org/m2",
  24. ]
  25. done: set[str] = set()
  26. done_lock = asyncio.Lock()
  27. in_progress: set[str] = set()
  28. in_progress_lock = asyncio.Lock()
  29. gradle_packages: set[str] = set()
  30. gradle_packages_lock = asyncio.Lock()
  31. global_properties: dict[str, dict[str, str]] = {}
  32. class TooManyRequestsException(Exception):
  33. pass
  34. class PackageError(Exception):
  35. pass
  36. class WaitForPackage(Exception):
  37. def __init__(self, package):
  38. self.package = package
  39. class PackagePOM:
  40. def __init__(self, package: 'Package', pom: str):
  41. self._package = package
  42. logger.debug(f'{package}: Parsing POM')
  43. self.raw_root = ET.fromstring(pom)
  44. self.parent: Package | None = None
  45. if (parent_tag := self.raw_root.find('parent', ns)) is not None:
  46. parent_group_tag = parent_tag.find('groupId', ns)
  47. parent_artifact_tag = parent_tag.find('artifactId', ns)
  48. parent_version_tag = parent_tag.find('version', ns)
  49. parent_group = parent_group_tag.text if parent_group_tag is not None else None
  50. parent_artifact = parent_artifact_tag.text if parent_artifact_tag is not None else None
  51. parent_version = parent_version_tag.text if parent_version_tag is not None else None
  52. logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}')
  53. if parent_group is not None and parent_artifact is not None and parent_version is not None:
  54. parent = Package(
  55. parent_group,
  56. parent_artifact,
  57. parent_version,
  58. )
  59. if str(parent) in done:
  60. self.parent = parent
  61. else:
  62. raise WaitForPackage(parent)
  63. else:
  64. raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}')
  65. logger.debug(f'{package}: Parsing properties')
  66. parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)]
  67. self.properties = self.resolve_props(parent_props)
  68. global_properties[str(package)] = self.properties
  69. logger.debug(f'{package}: Parsing packaging')
  70. if (packaging := self.raw_root.find('packaging', ns)) is not None:
  71. self.packaging = packaging.text
  72. else:
  73. self.packaging = '??'
  74. self.is_bom = self.packaging == 'pom'
  75. self.gradle_packages = [str(package)]
  76. if self.packaging == 'pom':
  77. root_copy = copy.deepcopy(self.raw_root)
  78. dependencies = root_copy.find('dependencies', ns) or ET.SubElement(root_copy, 'dependencies')
  79. self.gradle_packages.extend(
  80. [
  81. f'{dep.find("groupId").text}:{dep.find("artifactId").text}:{dep.find("version").text}'
  82. for dep in dependencies.findall('dependency')
  83. ]
  84. )
  85. logger.debug(f'{package}: POM parsed')
  86. def resolve_props(self, initial: dict[str, str]):
  87. props = initial
  88. for prop_tag in self.raw_root.findall('.//properties/*', ns):
  89. prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '')
  90. value = prop_tag.text if prop_tag.text is not None else ''
  91. logger.debug(f'{self._package}: Setting prop {prop}={value}')
  92. props[prop] = value
  93. changed = True
  94. while changed:
  95. changed = False
  96. for prop, value in props.items():
  97. new_value = self.prop_replace(value, props)
  98. if new_value != value:
  99. changed = True
  100. logger.debug(f'{self._package}: Setting prop {prop}={new_value}')
  101. props[prop] = new_value
  102. return props
  103. def prop_replace(self, text, props: dict[str, str] | None = None) -> str:
  104. def lookup_prop(match) -> str:
  105. prop = match.group(1)
  106. if prop == 'project.groupId':
  107. value = str(self._package.groupId)
  108. elif prop == 'project.artifactId':
  109. value = str(self._package.artifactId)
  110. elif prop == 'project.version':
  111. value = str(self._package.version)
  112. elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'):
  113. value = ''
  114. elif prop in ['project.basedir', 'basedir', 'user.home', 'debug.port']:
  115. value = ''
  116. else:
  117. try:
  118. value = props[prop] if props is not None else self.properties[prop]
  119. except KeyError:
  120. logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""')
  121. value = ''
  122. logger.debug(f'{self._package}: Replacing property {prop} with {value}')
  123. return value
  124. return re.sub(
  125. r'\$\{([^\}]*)\}',
  126. lookup_prop,
  127. text,
  128. )
  129. def _package_from_xml_dep(self, dep: ET.Element) -> 'Package':
  130. def prop_replace_tag(tag) -> str:
  131. return self.prop_replace(
  132. elem.text or '' if (elem := dep.find(tag, ns)) is not None else '',
  133. )
  134. return Package(
  135. groupId=prop_replace_tag('groupId'),
  136. artifactId=prop_replace_tag('artifactId'),
  137. version=prop_replace_tag('version'),
  138. )
  139. @property
  140. def dependency_management(self) -> list['Package']:
  141. dependencies: list[Package] = []
  142. for dep in self.raw_root.find('dependencyManagement/dependencies', ns) or []:
  143. package = self._package_from_xml_dep(dep)
  144. dependencies.append(package)
  145. return dependencies
  146. class Package:
  147. _pom: PackagePOM | None = None
  148. _verified: bool = False
  149. def __init__(self, groupId: str, artifactId: str, version: str | None = None, implicit: bool = False):
  150. self.groupId = groupId
  151. self.artifactId = artifactId
  152. self.version = version if version and not version.isspace() else None
  153. self.implicit = implicit
  154. def __str__(self) -> str:
  155. return f'{self.groupId}:{self.artifactId}:{self.version or "----"}'
  156. def __eq__(self, other) -> bool:
  157. return (
  158. self.groupId == other.groupId
  159. and self.artifactId == other.artifactId
  160. and self.version == other.version
  161. )
  162. def __hash__(self) -> int:
  163. return hash((self.groupId, self.artifactId, self.version))
  164. @property
  165. def dir_path(self):
  166. group_path = self.groupId.replace(".", "/")
  167. return f'{group_path}/{self.artifactId}/{self.version}'
  168. @property
  169. def base_filename(self):
  170. return f'{self.artifactId}-{self.version}'
  171. async def download_file(self, extension):
  172. filepath = f'{self.dir_path}/{self.base_filename}.{extension}'
  173. async with aiohttp.ClientSession() as session:
  174. for mirror in mirrors:
  175. pom_url = f'{mirror}/{filepath}'
  176. logger.debug(f'{self}: Downloading {extension} from {pom_url}')
  177. async with session.get(pom_url) as response:
  178. if response.status == 200:
  179. logger.debug(f'{self}: {extension} downloaded')
  180. return await response.text()
  181. break
  182. elif response.status == 429:
  183. raise TooManyRequestsException()
  184. else:
  185. logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}')
  186. else:
  187. logger.warning(f'{self}: File download of {extension} failed for all mirrors')
  188. return None
  189. @property
  190. async def pom(self) -> PackagePOM:
  191. if self._pom is not None:
  192. return self._pom
  193. if self.version is None:
  194. await self._query_maven()
  195. self._pom = PackagePOM(self, await self.download_file('pom'))
  196. return self._pom
  197. @property
  198. def _urlquery(self) -> str:
  199. q = f'g:{self.groupId}+AND+a:{self.artifactId}'
  200. if self.version is not None:
  201. q += f'+AND+v:{self.version}'
  202. return q
  203. async def _query_maven(self) -> None:
  204. self._verified = False
  205. async with aiohttp.ClientSession() as session:
  206. for mirror in mirrors:
  207. url = f'{mirror}/{self.groupId.replace(".", "/")}/{self.artifactId}/maven-metadata.xml'
  208. logger.debug(f'{self}: Querying maven at url {url}')
  209. async with session.get(url) as response:
  210. if response.status == 200:
  211. response_text = await response.text()
  212. metadata = ET.fromstring(response_text)
  213. if metadata is not None:
  214. logger.debug(f'{self}: Metadata found')
  215. if self.version is None:
  216. release_tag = metadata.find('./versioning/release')
  217. latest_tag = metadata.find('./versioning/latest')
  218. version = release_tag.text if release_tag is not None else latest_tag.text if latest_tag is not None else None
  219. if version is not None:
  220. logger.debug(f'{self}: Using newest version {version}')
  221. self.version = version
  222. self._verified = True
  223. return
  224. else:
  225. logger.info(f'{self}: Could not find latest version in metadata from mirror {mirror}')
  226. else:
  227. if metadata.find(f'./versioning/versions/version[.="{self.version}"]') is not None:
  228. logger.debug(f'{self}: Version {self.version} is valid')
  229. self._verified = True
  230. return
  231. else:
  232. logger.info(f'{self}: Could not find version {self.version} in metadata from mirror {mirror}')
  233. else:
  234. logger.warning('{self}: Invalid XML for maven metadata: {response_text}')
  235. elif response.status == 429:
  236. raise TooManyRequestsException()
  237. else:
  238. logger.info(f'{self}: HTTP error {response.status} downloading maven metadata from {url}')
  239. else:
  240. if self.implicit:
  241. logger.info(f'{self}: Package not found in any mirror')
  242. else:
  243. logger.warning(f'{self}: Package not found in any mirror')
  244. async def verify(self) -> bool:
  245. if not self._verified:
  246. await self._query_maven()
  247. return self._verified
  248. def load_package_list(list_path: Path, queue: asyncio.Queue) -> None:
  249. logger.info(f'Parsing {list_path}')
  250. with list_path.open('r') as f:
  251. for line in f.readlines():
  252. sections = line.strip().split(':')
  253. if len(sections) < 2 or len(sections) > 3:
  254. logger.warning(f'Invalid package format "{line}". It should be "groupID:artifactID" or "groupID:artifactID:version"')
  255. continue
  256. package = Package(
  257. sections[0],
  258. sections[1],
  259. sections[2] if len(sections) == 3 else None,
  260. )
  261. queue.put_nowait(package)
  262. continue
  263. if not package.artifactId.endswith('-jvm'):
  264. queue.put_nowait(
  265. Package(
  266. package.groupId,
  267. f'{package.artifactId}-jvm',
  268. package.version,
  269. True,
  270. )
  271. )
  272. def create_gradle_build(packages, repo) -> str:
  273. return """// Generated, do not edit
  274. plugins {
  275. kotlin("jvm") version "1.7.20"
  276. }
  277. repositories {
  278. maven {
  279. url=uri("http://""" + repo + """/releases")
  280. isAllowInsecureProtocol=true
  281. }
  282. }
  283. val deps = listOf(
  284. """ + ',\n '.join(f'"{dep}"' for dep in sorted(packages)) + """
  285. ).map {
  286. configurations.create(it.replace(':', '_')) to it
  287. }
  288. dependencies {
  289. deps.forEach { (conf, dep) ->
  290. conf(dep)
  291. conf(dep + ":sources")
  292. conf(dep + ":javadoc")
  293. }
  294. }
  295. tasks.register("downloadDependencies") {
  296. doLast {
  297. deps.forEach { (conf, _) ->
  298. conf.files
  299. }
  300. }
  301. }
  302. """
  303. def create_gradle_settings(repo: str) -> str:
  304. return """// Generated, do not edit
  305. rootProject.name = "gradle sync job"
  306. pluginManagement {
  307. repositories {
  308. maven {
  309. url=uri("http://""" + repo + """/releases")
  310. isAllowInsecureProtocol=true
  311. }
  312. }
  313. }
  314. """
  315. async def download(package: Package, queue: asyncio.Queue) -> None:
  316. async with done_lock:
  317. is_done = str(package) in done
  318. async with in_progress_lock:
  319. is_in_progress = str(package) in in_progress
  320. if is_done:
  321. logger.info(f'{package}: Already downloaded. Skipping.')
  322. elif is_in_progress:
  323. logger.info(f'{package}: Already in progress. Skipping.')
  324. else:
  325. async with in_progress_lock:
  326. in_progress.add(str(package))
  327. for _ in range(50):
  328. try:
  329. verified = await package.verify()
  330. break
  331. except TooManyRequestsException:
  332. logger.info(f'{package}: Too many requests. Delaying next attempt')
  333. await asyncio.sleep(3*random.random() + 0.2)
  334. else:
  335. logger.error(f'{package}: Verification failed after 50 tries')
  336. exit(1)
  337. if verified:
  338. for _ in range(50):
  339. try:
  340. pom = await package.pom
  341. break
  342. except TooManyRequestsException:
  343. logger.info(f'{package}: Too many requests. Delaying next attempt')
  344. await asyncio.sleep(3*random.random() + 0.2)
  345. except WaitForPackage as e:
  346. logger.info(f'{package}: Waiting for {e.package}')
  347. async with in_progress_lock:
  348. if str(package) in in_progress:
  349. in_progress.remove(str(package))
  350. if str(e.package) not in in_progress:
  351. await queue.put(e.package)
  352. await queue.put(package)
  353. return
  354. else:
  355. logger.error(f'{package}: POM parsing failed after 50 tries')
  356. exit(1)
  357. if not pom:
  358. logger.warn(f'{package}: No pom')
  359. return
  360. async with gradle_packages_lock:
  361. gradle_packages.update(pom.gradle_packages)
  362. if not pom.is_bom:
  363. for dep in pom.dependency_management:
  364. logger.info(f'{package}: Handling transitive dependency {dep}')
  365. await queue.put(dep)
  366. async with done_lock:
  367. logger.debug(f'{package}: Marking done')
  368. p = copy.copy(package)
  369. p.version = None
  370. done.add(str(package))
  371. done.add(str(p))
  372. async with in_progress_lock:
  373. if str(package) in in_progress:
  374. in_progress.remove(str(package))
  375. else:
  376. p = copy.copy(package)
  377. p.version = None
  378. if str(p) in in_progress:
  379. in_progress.remove(str(p))
  380. else:
  381. logger.warning(f'{package}: Package is done, but not marked as in progress')
  382. async def worker(queue: asyncio.Queue) -> None:
  383. while True:
  384. package = await queue.get()
  385. while True:
  386. try:
  387. await download(package, queue)
  388. break
  389. except PackageError:
  390. logger.exception(f'{package}: Error while processing package')
  391. break
  392. except Exception:
  393. logger.exception(f'{package}: Unknown error while processing package')
  394. break
  395. queue.task_done()
  396. async def main(package_list: Path, output_dir: Path, num_workers: int, gradle_repo: str) -> None:
  397. queue: asyncio.Queue = asyncio.Queue()
  398. tasks = []
  399. load_package_list(package_list, queue)
  400. logger.debug(f'Starting {num_workers} workers')
  401. for i in range(num_workers):
  402. tasks.append(
  403. asyncio.create_task(
  404. worker(queue)
  405. )
  406. )
  407. await queue.join()
  408. logger.debug('Queue is empty. Cancelling workers')
  409. for task in tasks:
  410. task.cancel()
  411. await asyncio.gather(*tasks, return_exceptions=True)
  412. async with gradle_packages_lock:
  413. logger.info('Generating build.gradle.kts')
  414. (output_dir / 'build.gradle.kts').write_text(create_gradle_build(gradle_packages, gradle_repo))
  415. logger.info('Generating settings.gradle.kts')
  416. (output_dir / 'settings.gradle.kts').write_text(create_gradle_settings(gradle_repo))
  417. logger = logging.getLogger(__name__)
  418. if __name__ == '__main__':
  419. parser = argparse.ArgumentParser()
  420. parser.add_argument('-w', '--workers', type=int, default=20)
  421. parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0)
  422. parser.add_argument('--repo', type=str, help="The repository gradle should use", required=True)
  423. parser.add_argument('--output_dir', type=Path, help="The directory to put the generated gradle files in", default=Path('.'), required=False)
  424. parser.add_argument('package_list', type=Path, help="The list of packages to download")
  425. args = parser.parse_args()
  426. if args.verbosity == 0:
  427. log_level = 'WARNING'
  428. elif args.verbosity == 1:
  429. log_level = 'INFO'
  430. else:
  431. log_level = 'DEBUG'
  432. logging.basicConfig(level=log_level)
  433. asyncio.run(
  434. main(args.package_list, args.output_dir, args.workers, args.repo)
  435. )