| @@ -1,6 +1,7 @@ | |||
| #!/bin/python3 | |||
| import re | |||
| import copy | |||
| import random | |||
| import argparse | |||
| import logging | |||
| @@ -28,13 +29,25 @@ mirrors = [ | |||
| done: set[str] = set() | |||
| done_lock = asyncio.Lock() | |||
| in_progress: set[str] = set() | |||
| in_progress_lock = asyncio.Lock() | |||
| num_workers = 50 | |||
| global_properties: dict[str, dict[str, str]] = {} | |||
| class TooManyRequestsException(Exception): | |||
| pass | |||
| class PackageError(Exception): | |||
| pass | |||
| class WaitForPackage(Exception): | |||
| def __init__(self, package): | |||
| self.package = package | |||
| class PackagePOM: | |||
| def __init__(self, package: 'Package', pom: str): | |||
| self._package = package | |||
| @@ -42,6 +55,38 @@ class PackagePOM: | |||
| logger.debug(f'{package}: Parsing POM') | |||
| self.raw_root = ET.fromstring(pom) | |||
| self.parent: Package | None = None | |||
| if (parent_tag := self.raw_root.find('parent', ns)) is not None: | |||
| parent_group_tag = parent_tag.find('groupId', ns) | |||
| parent_artifact_tag = parent_tag.find('artifactId', ns) | |||
| parent_version_tag = parent_tag.find('version', ns) | |||
| parent_group = parent_group_tag.text if parent_group_tag is not None else None | |||
| parent_artifact = parent_artifact_tag.text if parent_artifact_tag is not None else None | |||
| parent_version = parent_version_tag.text if parent_version_tag is not None else None | |||
| logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}') | |||
| if parent_group is not None and parent_artifact is not None and parent_version is not None: | |||
| parent = Package( | |||
| parent_group, | |||
| parent_artifact, | |||
| parent_version, | |||
| ) | |||
| if str(parent) in done: | |||
| self.parent = parent | |||
| else: | |||
| raise WaitForPackage(parent) | |||
| else: | |||
| raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}') | |||
| logger.debug(f'{package}: Parsing properties') | |||
| parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)] | |||
| self.properties = self.resolve_props(parent_props) | |||
| global_properties[str(package)] = self.properties | |||
| logger.debug(f'{package}: Parsing packaging') | |||
| if (packaging := self.raw_root.find('packaging', ns)) is not None: | |||
| self.packaging = packaging.text | |||
| else: | |||
| @@ -79,13 +124,11 @@ class PackagePOM: | |||
| ET.SubElement(root_copy, 'version').text = tmpVersion | |||
| # Add a dependency for the pom itself | |||
| if (dependencies := root_copy.find('dependencies', ns)) is not None: | |||
| self_dep = ET.SubElement(dependencies, 'dependency') | |||
| ET.SubElement(self_dep, 'groupId').text = package.groupId | |||
| ET.SubElement(self_dep, 'artifactId').text = package.artifactId | |||
| ET.SubElement(self_dep, 'version').text = package.version | |||
| else: | |||
| logger.warning(f"{package}: No dependencies tag in pom") | |||
| dependencies = root_copy.find('dependencies', ns) or ET.SubElement(root_copy, 'dependencies') | |||
| self_dep = ET.SubElement(dependencies, 'dependency') | |||
| ET.SubElement(self_dep, 'groupId').text = package.groupId | |||
| ET.SubElement(self_dep, 'artifactId').text = package.artifactId | |||
| ET.SubElement(self_dep, 'version').text = package.version | |||
| self.generated_root = root_copy | |||
| else: | |||
| @@ -120,14 +163,31 @@ class PackagePOM: | |||
| ET.indent(tree) | |||
| tree.write(f) | |||
| def get_property(self, prop: str): | |||
| elem = self.raw_root.find(f'.//properties/{prop}', ns) | |||
| if elem is not None: | |||
| return elem.text | |||
| else: | |||
| return None | |||
| def _package_from_xml_dep(self, dep: ET.Element) -> 'Package': | |||
| def resolve_props(self, initial: dict[str, str]): | |||
| props = initial | |||
| for prop_tag in self.raw_root.findall('.//properties/*', ns): | |||
| prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '') | |||
| value = prop_tag.text if prop_tag.text is not None else '' | |||
| logger.debug(f'{self._package}: Setting prop {prop}={value}') | |||
| props[prop] = value | |||
| changed = True | |||
| while changed: | |||
| changed = False | |||
| for prop, value in props.items(): | |||
| new_value = self.prop_replace(value, props) | |||
| if new_value != value: | |||
| changed = True | |||
| logger.debug(f'{self._package}: Setting prop {prop}={new_value}') | |||
| props[prop] = new_value | |||
| return props | |||
| def prop_replace(self, text, props: dict[str, str] | None = None) -> str: | |||
| def lookup_prop(match) -> str: | |||
| prop = match.group(1) | |||
| @@ -137,24 +197,27 @@ class PackagePOM: | |||
| value = str(self._package.artifactId) | |||
| elif prop == 'project.version': | |||
| value = str(self._package.version) | |||
| elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'): | |||
| value = '' | |||
| else: | |||
| value = self.get_property(prop) | |||
| logger.debug(f'{self._package}: Trying to recurse prop {value}') | |||
| value = prop_replace(value) | |||
| try: | |||
| value = props[prop] if props is not None else self.properties[prop] | |||
| except KeyError: | |||
| logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""') | |||
| value = '' | |||
| logger.debug(f'{self._package}: Replacing property {prop} with {value}') | |||
| return value | |||
| def prop_replace(text) -> str: | |||
| logger.debug(f'{self._package}: Getting prop {text}') | |||
| return re.sub( | |||
| r'\$\{([^\}]*)\}', | |||
| lookup_prop, | |||
| text, | |||
| ) | |||
| return re.sub( | |||
| r'\$\{([^\}]*)\}', | |||
| lookup_prop, | |||
| text, | |||
| ) | |||
| def _package_from_xml_dep(self, dep: ET.Element) -> 'Package': | |||
| def prop_replace_tag(tag) -> str: | |||
| return prop_replace( | |||
| return self.prop_replace( | |||
| elem.text or '' if (elem := dep.find(tag, ns)) is not None else '', | |||
| ) | |||
| @@ -319,30 +382,51 @@ async def download(package: Package, queue: asyncio.Queue) -> None: | |||
| async with done_lock: | |||
| skip = str(package) in done | |||
| async with in_progress_lock: | |||
| skip = skip or (str(package) in in_progress) | |||
| if skip: | |||
| logger.info(f'{package}: Already downloaded. Skipping.') | |||
| elif await package.verify(): | |||
| async with done_lock: | |||
| done.add(str(package)) | |||
| else: | |||
| async with in_progress_lock: | |||
| in_progress.add(str(package)) | |||
| pom_dir = base_pom_path / f'{package.groupId}-{package.artifactId}-{package.version}' | |||
| pom_path = pom_dir / 'pom.xml' | |||
| if await package.verify(): | |||
| pom_dir = base_pom_path / f'{package.groupId}-{package.artifactId}-{package.version}' | |||
| pom_path = pom_dir / 'pom.xml' | |||
| pom_dir.mkdir(exist_ok=True) | |||
| pom_dir.mkdir(exist_ok=True) | |||
| pom = await package.pom | |||
| pom = await package.pom | |||
| if not pom: | |||
| return | |||
| if not pom: | |||
| return | |||
| pom.write(pom_path) | |||
| logger.info(f'{package}: Downloaded') | |||
| pom.write(pom_path) | |||
| logger.info(f'{package}: Downloaded') | |||
| if not pom.is_bom: | |||
| for dep in pom.dependency_management: | |||
| logger.info(f'{package}: Handling transitive dependency {dep}') | |||
| await queue.put(dep) | |||
| if not pom.is_bom: | |||
| for dep in pom.dependency_management: | |||
| logger.info(f'{package}: Handling transitive dependency {dep}') | |||
| await queue.put(dep) | |||
| async with done_lock: | |||
| logger.debug(f'{package}: Marking done') | |||
| p = copy.copy(package) | |||
| p.version = None | |||
| done.add(str(package)) | |||
| done.add(str(p)) | |||
| async with in_progress_lock: | |||
| if str(package) in in_progress: | |||
| in_progress.remove(str(package)) | |||
| else: | |||
| p = copy.copy(package) | |||
| p.version = None | |||
| if str(p) in in_progress: | |||
| in_progress.remove(str(p)) | |||
| else: | |||
| logger.warning(f'{package}: Package is done, but not marked as in progress') | |||
| async def worker(queue: asyncio.Queue) -> None: | |||
| while True: | |||
| @@ -355,6 +439,19 @@ async def worker(queue: asyncio.Queue) -> None: | |||
| except TooManyRequestsException: | |||
| logger.info('Too many requests. Delaying next attempt') | |||
| await asyncio.sleep(3*random.random() + 0.2) | |||
| except WaitForPackage as e: | |||
| logger.info(f'{package}: Waiting for {e.package}') | |||
| await queue.put(e.package) | |||
| await queue.put(package) | |||
| break | |||
| except PackageError: | |||
| logger.exception(f'{package}: Error while processing package') | |||
| break | |||
| except Exception: | |||
| logger.exception(f'{package}: Unknown error while processing package') | |||
| logger.error(global_properties) | |||
| break | |||
| queue.task_done() | |||