| @@ -1,6 +1,7 @@ | |||||
| #!/bin/python3 | #!/bin/python3 | ||||
| import re | import re | ||||
| import copy | |||||
| import random | import random | ||||
| import argparse | import argparse | ||||
| import logging | import logging | ||||
| @@ -28,13 +29,25 @@ mirrors = [ | |||||
| done: set[str] = set() | done: set[str] = set() | ||||
| done_lock = asyncio.Lock() | done_lock = asyncio.Lock() | ||||
| in_progress: set[str] = set() | |||||
| in_progress_lock = asyncio.Lock() | |||||
| num_workers = 50 | num_workers = 50 | ||||
| global_properties: dict[str, dict[str, str]] = {} | |||||
| class TooManyRequestsException(Exception): | class TooManyRequestsException(Exception): | ||||
| pass | pass | ||||
| class PackageError(Exception): | |||||
| pass | |||||
| class WaitForPackage(Exception): | |||||
| def __init__(self, package): | |||||
| self.package = package | |||||
| class PackagePOM: | class PackagePOM: | ||||
| def __init__(self, package: 'Package', pom: str): | def __init__(self, package: 'Package', pom: str): | ||||
| self._package = package | self._package = package | ||||
| @@ -42,6 +55,38 @@ class PackagePOM: | |||||
| logger.debug(f'{package}: Parsing POM') | logger.debug(f'{package}: Parsing POM') | ||||
| self.raw_root = ET.fromstring(pom) | self.raw_root = ET.fromstring(pom) | ||||
| self.parent: Package | None = None | |||||
| if (parent_tag := self.raw_root.find('parent', ns)) is not None: | |||||
| parent_group_tag = parent_tag.find('groupId', ns) | |||||
| parent_artifact_tag = parent_tag.find('artifactId', ns) | |||||
| parent_version_tag = parent_tag.find('version', ns) | |||||
| parent_group = parent_group_tag.text if parent_group_tag is not None else None | |||||
| parent_artifact = parent_artifact_tag.text if parent_artifact_tag is not None else None | |||||
| parent_version = parent_version_tag.text if parent_version_tag is not None else None | |||||
| logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}') | |||||
| if parent_group is not None and parent_artifact is not None and parent_version is not None: | |||||
| parent = Package( | |||||
| parent_group, | |||||
| parent_artifact, | |||||
| parent_version, | |||||
| ) | |||||
| if str(parent) in done: | |||||
| self.parent = parent | |||||
| else: | |||||
| raise WaitForPackage(parent) | |||||
| else: | |||||
| raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}') | |||||
| logger.debug(f'{package}: Parsing properties') | |||||
| parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)] | |||||
| self.properties = self.resolve_props(parent_props) | |||||
| global_properties[str(package)] = self.properties | |||||
| logger.debug(f'{package}: Parsing packaging') | |||||
| if (packaging := self.raw_root.find('packaging', ns)) is not None: | if (packaging := self.raw_root.find('packaging', ns)) is not None: | ||||
| self.packaging = packaging.text | self.packaging = packaging.text | ||||
| else: | else: | ||||
| @@ -79,13 +124,11 @@ class PackagePOM: | |||||
| ET.SubElement(root_copy, 'version').text = tmpVersion | ET.SubElement(root_copy, 'version').text = tmpVersion | ||||
| # Add a dependency for the pom itself | # Add a dependency for the pom itself | ||||
| if (dependencies := root_copy.find('dependencies', ns)) is not None: | |||||
| self_dep = ET.SubElement(dependencies, 'dependency') | |||||
| ET.SubElement(self_dep, 'groupId').text = package.groupId | |||||
| ET.SubElement(self_dep, 'artifactId').text = package.artifactId | |||||
| ET.SubElement(self_dep, 'version').text = package.version | |||||
| else: | |||||
| logger.warning(f"{package}: No dependencies tag in pom") | |||||
| dependencies = root_copy.find('dependencies', ns) or ET.SubElement(root_copy, 'dependencies') | |||||
| self_dep = ET.SubElement(dependencies, 'dependency') | |||||
| ET.SubElement(self_dep, 'groupId').text = package.groupId | |||||
| ET.SubElement(self_dep, 'artifactId').text = package.artifactId | |||||
| ET.SubElement(self_dep, 'version').text = package.version | |||||
| self.generated_root = root_copy | self.generated_root = root_copy | ||||
| else: | else: | ||||
| @@ -120,14 +163,31 @@ class PackagePOM: | |||||
| ET.indent(tree) | ET.indent(tree) | ||||
| tree.write(f) | tree.write(f) | ||||
| def get_property(self, prop: str): | |||||
| elem = self.raw_root.find(f'.//properties/{prop}', ns) | |||||
| if elem is not None: | |||||
| return elem.text | |||||
| else: | |||||
| return None | |||||
| def _package_from_xml_dep(self, dep: ET.Element) -> 'Package': | |||||
| def resolve_props(self, initial: dict[str, str]): | |||||
| props = initial | |||||
| for prop_tag in self.raw_root.findall('.//properties/*', ns): | |||||
| prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '') | |||||
| value = prop_tag.text if prop_tag.text is not None else '' | |||||
| logger.debug(f'{self._package}: Setting prop {prop}={value}') | |||||
| props[prop] = value | |||||
| changed = True | |||||
| while changed: | |||||
| changed = False | |||||
| for prop, value in props.items(): | |||||
| new_value = self.prop_replace(value, props) | |||||
| if new_value != value: | |||||
| changed = True | |||||
| logger.debug(f'{self._package}: Setting prop {prop}={new_value}') | |||||
| props[prop] = new_value | |||||
| return props | |||||
| def prop_replace(self, text, props: dict[str, str] | None = None) -> str: | |||||
| def lookup_prop(match) -> str: | def lookup_prop(match) -> str: | ||||
| prop = match.group(1) | prop = match.group(1) | ||||
| @@ -137,24 +197,27 @@ class PackagePOM: | |||||
| value = str(self._package.artifactId) | value = str(self._package.artifactId) | ||||
| elif prop == 'project.version': | elif prop == 'project.version': | ||||
| value = str(self._package.version) | value = str(self._package.version) | ||||
| elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'): | |||||
| value = '' | |||||
| else: | else: | ||||
| value = self.get_property(prop) | |||||
| logger.debug(f'{self._package}: Trying to recurse prop {value}') | |||||
| value = prop_replace(value) | |||||
| try: | |||||
| value = props[prop] if props is not None else self.properties[prop] | |||||
| except KeyError: | |||||
| logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""') | |||||
| value = '' | |||||
| logger.debug(f'{self._package}: Replacing property {prop} with {value}') | logger.debug(f'{self._package}: Replacing property {prop} with {value}') | ||||
| return value | return value | ||||
| def prop_replace(text) -> str: | |||||
| logger.debug(f'{self._package}: Getting prop {text}') | |||||
| return re.sub( | |||||
| r'\$\{([^\}]*)\}', | |||||
| lookup_prop, | |||||
| text, | |||||
| ) | |||||
| return re.sub( | |||||
| r'\$\{([^\}]*)\}', | |||||
| lookup_prop, | |||||
| text, | |||||
| ) | |||||
| def _package_from_xml_dep(self, dep: ET.Element) -> 'Package': | |||||
| def prop_replace_tag(tag) -> str: | def prop_replace_tag(tag) -> str: | ||||
| return prop_replace( | |||||
| return self.prop_replace( | |||||
| elem.text or '' if (elem := dep.find(tag, ns)) is not None else '', | elem.text or '' if (elem := dep.find(tag, ns)) is not None else '', | ||||
| ) | ) | ||||
| @@ -319,30 +382,51 @@ async def download(package: Package, queue: asyncio.Queue) -> None: | |||||
| async with done_lock: | async with done_lock: | ||||
| skip = str(package) in done | skip = str(package) in done | ||||
| async with in_progress_lock: | |||||
| skip = skip or (str(package) in in_progress) | |||||
| if skip: | if skip: | ||||
| logger.info(f'{package}: Already downloaded. Skipping.') | logger.info(f'{package}: Already downloaded. Skipping.') | ||||
| elif await package.verify(): | |||||
| async with done_lock: | |||||
| done.add(str(package)) | |||||
| else: | |||||
| async with in_progress_lock: | |||||
| in_progress.add(str(package)) | |||||
| pom_dir = base_pom_path / f'{package.groupId}-{package.artifactId}-{package.version}' | |||||
| pom_path = pom_dir / 'pom.xml' | |||||
| if await package.verify(): | |||||
| pom_dir = base_pom_path / f'{package.groupId}-{package.artifactId}-{package.version}' | |||||
| pom_path = pom_dir / 'pom.xml' | |||||
| pom_dir.mkdir(exist_ok=True) | |||||
| pom_dir.mkdir(exist_ok=True) | |||||
| pom = await package.pom | |||||
| pom = await package.pom | |||||
| if not pom: | |||||
| return | |||||
| if not pom: | |||||
| return | |||||
| pom.write(pom_path) | |||||
| logger.info(f'{package}: Downloaded') | |||||
| pom.write(pom_path) | |||||
| logger.info(f'{package}: Downloaded') | |||||
| if not pom.is_bom: | |||||
| for dep in pom.dependency_management: | |||||
| logger.info(f'{package}: Handling transitive dependency {dep}') | |||||
| await queue.put(dep) | |||||
| if not pom.is_bom: | |||||
| for dep in pom.dependency_management: | |||||
| logger.info(f'{package}: Handling transitive dependency {dep}') | |||||
| await queue.put(dep) | |||||
| async with done_lock: | |||||
| logger.debug(f'{package}: Marking done') | |||||
| p = copy.copy(package) | |||||
| p.version = None | |||||
| done.add(str(package)) | |||||
| done.add(str(p)) | |||||
| async with in_progress_lock: | |||||
| if str(package) in in_progress: | |||||
| in_progress.remove(str(package)) | |||||
| else: | |||||
| p = copy.copy(package) | |||||
| p.version = None | |||||
| if str(p) in in_progress: | |||||
| in_progress.remove(str(p)) | |||||
| else: | |||||
| logger.warning(f'{package}: Package is done, but not marked as in progress') | |||||
| async def worker(queue: asyncio.Queue) -> None: | async def worker(queue: asyncio.Queue) -> None: | ||||
| while True: | while True: | ||||
| @@ -355,6 +439,19 @@ async def worker(queue: asyncio.Queue) -> None: | |||||
| except TooManyRequestsException: | except TooManyRequestsException: | ||||
| logger.info('Too many requests. Delaying next attempt') | logger.info('Too many requests. Delaying next attempt') | ||||
| await asyncio.sleep(3*random.random() + 0.2) | await asyncio.sleep(3*random.random() + 0.2) | ||||
| except WaitForPackage as e: | |||||
| logger.info(f'{package}: Waiting for {e.package}') | |||||
| await queue.put(e.package) | |||||
| await queue.put(package) | |||||
| break | |||||
| except PackageError: | |||||
| logger.exception(f'{package}: Error while processing package') | |||||
| break | |||||
| except Exception: | |||||
| logger.exception(f'{package}: Unknown error while processing package') | |||||
| logger.error(global_properties) | |||||
| break | |||||
| queue.task_done() | queue.task_done() | ||||