You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

276 lines
8.3KB

  1. #!/bin/python3
  2. import re
  3. import argparse
  4. import logging
  5. import subprocess
  6. import json
  7. import copy
  8. from urllib import request
  9. from urllib.error import HTTPError
  10. from pathlib import Path
  11. from xml.etree import ElementTree as ET
  12. ET.register_namespace('', 'http://maven.apache.org/POM/4.0.0')
  13. baseurl = 'https://search.maven.org'
  14. class PackagePOM:
  15. _dependencyManagement: list['Package'] = None
  16. def __init__(self, package: 'Package', pom: str):
  17. logger.debug(f'{package}: Parsing POM')
  18. self.raw_root = ET.fromstring(pom)
  19. packaging = self.raw_root.find('packaging')
  20. self.is_bom = True if packaging is not None and packaging.text == 'pom' else False
  21. if self.is_bom:
  22. root_copy = copy.deepcopy(self.raw_root)
  23. depman = root_copy.find('dependencyManagement')
  24. root_copy.extend(depman.findall('*'))
  25. root_copy.remove(depman)
  26. self.generated_root = root_copy
  27. else:
  28. self.generated_root = ET.fromstring(
  29. f"""
  30. <project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
  31. https://maven.apache.org/xsd/maven-4.0.0.xsd"
  32. xmlns="http://maven.apache.org/POM/4.0.0"
  33. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  34. <modelVersion>4.0.0</modelVersion>
  35. <groupId>tmp.{package.groupId}</groupId>
  36. <artifactId>placeholder-{package.artifactId}</artifactId>
  37. <version>{package.version}</version>
  38. <name>Package {package.artifactId}</name>
  39. <dependencies>
  40. <dependency>
  41. <groupId>{package.groupId}</groupId>
  42. <artifactId>{package.artifactId}</artifactId>
  43. <version>{package.version}</version>
  44. </dependency>
  45. </dependencies>
  46. </project>
  47. """
  48. )
  49. def write(self, f):
  50. tree = ET.ElementTree(self.generated_root)
  51. ET.indent(tree)
  52. tree.write(f)
  53. def get_property(self, prop: str):
  54. elem = self.raw_root.find(f'.//properties/{prop}')
  55. import pdb; pdb.set_trace()
  56. if elem is not None:
  57. return elem.text
  58. else:
  59. return None
  60. @property
  61. def dependencyManagement(self) -> list['Package']:
  62. if self._dependencyManagement is not None:
  63. return self._dependencyManagement
  64. self._dependencyManagement = []
  65. def prop_replace(match):
  66. prop = match.group(1)
  67. value = self.get_property(match.group(1))
  68. logger.debug(f'Replacing property {prop} with {value}')
  69. return value
  70. for dep in self.raw_root.find('dependencyManagement/dependencies') or []:
  71. package = Package(
  72. *[
  73. re.sub(
  74. r'\$\{([^\}]*)\}',
  75. prop_replace,
  76. dep.find(tag).text,
  77. )
  78. for tag in [
  79. 'groupId',
  80. 'artifactId',
  81. 'version',
  82. ]
  83. ]
  84. )
  85. self._dependencyManagement.append(package)
  86. return self._dependencyManagement
  87. class Package:
  88. _pom: PackagePOM = None
  89. _verified: bool = False
  90. def __init__(self, groupId: str, artifactId: str, version: str = None):
  91. self.groupId = groupId
  92. self.artifactId = artifactId
  93. self.version = version
  94. def __str__(self) -> str:
  95. return f'{self.groupId}:{self.artifactId}:{self.version or "----"}'
  96. def __eq__(self, other) -> bool:
  97. return (
  98. self.groupId == other.groupId
  99. and self.artifactId == other.artifactId
  100. and self.version == other.version
  101. )
  102. def __hash__(self) -> str:
  103. return hash((self.groupId, self.artifactId, self.version))
  104. @property
  105. def pom(self) -> ET:
  106. if self._pom is not None:
  107. return self._pom
  108. if self.version is None:
  109. self._query_maven()
  110. group_path = self.groupId.replace(".", "/")
  111. pom_path = f'{self.artifactId}-{self.version}.pom'
  112. filepath = f'{group_path}/{self.artifactId}/{self.version}/{pom_path}'
  113. pom_url = f'{baseurl}/remotecontent?filepath={filepath}'
  114. logger.debug(f'{self}: Downloading pom from {pom_url}')
  115. try:
  116. response = request.urlopen(pom_url)
  117. except HTTPError as e:
  118. logger.warning(f'{self}: HTTP error downloading pom')
  119. logger.debug(e)
  120. return None
  121. status = response.status
  122. if status == 200:
  123. logger.debug(f'{self}: POM downloaded')
  124. self._pom = PackagePOM(self, response.read())
  125. else:
  126. logger.warning(f'{self}: HTTP error {status} downloading pom')
  127. return self._pom
  128. @property
  129. def _urlquery(self) -> str:
  130. q = f'g:{self.groupId}+AND+a:{self.artifactId}'
  131. if self.version is not None:
  132. q += f'+AND+v:{self.version}'
  133. return q
  134. def _query_maven(self) -> None:
  135. url = f'{baseurl}/solrsearch/select?q={self._urlquery}&rows=1&wt=json'
  136. logger.debug(f'{self}: Querying maven at url {url}')
  137. response = request.urlopen(url)
  138. status = response.status
  139. if status == 200:
  140. message = json.loads(response.read())
  141. num = message['response']['numFound']
  142. if num:
  143. logger.debug(f'{self}: Query successful')
  144. self._verified = True
  145. if self.version is None:
  146. version = message['response']['docs'][0]['latestVersion']
  147. logger.info(f'{self}: Using newest version {version}')
  148. self.version = version
  149. else:
  150. logger.warning(f'{self}: No matching packages found')
  151. self._verified = False
  152. else:
  153. self._verified = False
  154. logger.warning(f'{self}: HTTP error {status} downloading pom')
  155. def verify(self) -> bool:
  156. if not self._verified:
  157. self._query_maven()
  158. return self._verified
  159. def load_package_list(list_path: Path) -> list[Package]:
  160. packages = []
  161. logger.info(f'Parsing {list_path}')
  162. with list_path.open('r') as f:
  163. for line in f.readlines():
  164. sections = line.strip().split(':')
  165. if len(sections) < 2 or len(sections) > 3:
  166. logger.warning(f'Invalid package format "{line}". It should be "groupID:artifactID" or "groupID:artifactID:version"')
  167. continue
  168. query = Package(
  169. sections[0],
  170. sections[1],
  171. sections[2] if len(sections) == 3 else None,
  172. )
  173. packages.append(query)
  174. return packages
  175. def download(base_path: Path, package: Package, done: [str]) -> None:
  176. if str(package) in done:
  177. logger.info(f'{package}: Already downloaded. Skipping.')
  178. elif package.verify():
  179. pom_dir = base_path / str(package)
  180. pom_path = pom_dir / 'pom.xml'
  181. pom_dir.mkdir(exist_ok=True)
  182. if not package.pom:
  183. return
  184. package.pom.write(pom_path)
  185. done.append(str(package))
  186. logger.info(f'{package}: Downloaded')
  187. if not package.pom.is_bom:
  188. for dep in package.pom.dependencyManagement:
  189. logger.info(f'{package}: Handling transitive dependency {dep}')
  190. download(base_path, dep, done)
  191. else:
  192. logger.warning(f'{package}: Package not found. Check package name and internet connection')
  193. def main() -> None:
  194. packages = load_package_list(Path('package-list.txt'))
  195. base_pom_path = Path('poms')
  196. done = []
  197. for package in packages:
  198. download(base_pom_path, package, done)
  199. subprocess.call(['sh', 'generate_master_pom.sh'])
  200. logger = logging.getLogger(__name__)
  201. if __name__ == '__main__':
  202. parser = argparse.ArgumentParser()
  203. parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0)
  204. args = parser.parse_args()
  205. if args.verbosity == 0:
  206. log_level = 'WARNING'
  207. elif args.verbosity == 1:
  208. log_level = 'INFO'
  209. else:
  210. log_level = 'DEBUG'
  211. logging.basicConfig(level=log_level)
  212. main()