From 50ab152fe4a518320fe7ef820db0940f2531ae0f Mon Sep 17 00:00:00 2001 From: Sindre Stephansen Date: Wed, 28 Dec 2022 16:13:21 +0100 Subject: [PATCH] Improve property handling --- update-poms.py | 177 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 137 insertions(+), 40 deletions(-) diff --git a/update-poms.py b/update-poms.py index 2fc4007..b60f908 100755 --- a/update-poms.py +++ b/update-poms.py @@ -1,6 +1,7 @@ #!/bin/python3 import re +import copy import random import argparse import logging @@ -28,13 +29,25 @@ mirrors = [ done: set[str] = set() done_lock = asyncio.Lock() +in_progress: set[str] = set() +in_progress_lock = asyncio.Lock() num_workers = 50 +global_properties: dict[str, dict[str, str]] = {} + class TooManyRequestsException(Exception): pass +class PackageError(Exception): + pass + +class WaitForPackage(Exception): + def __init__(self, package): + self.package = package + + class PackagePOM: def __init__(self, package: 'Package', pom: str): self._package = package @@ -42,6 +55,38 @@ class PackagePOM: logger.debug(f'{package}: Parsing POM') self.raw_root = ET.fromstring(pom) + self.parent: Package | None = None + + if (parent_tag := self.raw_root.find('parent', ns)) is not None: + parent_group_tag = parent_tag.find('groupId', ns) + parent_artifact_tag = parent_tag.find('artifactId', ns) + parent_version_tag = parent_tag.find('version', ns) + parent_group = parent_group_tag.text if parent_group_tag is not None else None + parent_artifact = parent_artifact_tag.text if parent_artifact_tag is not None else None + parent_version = parent_version_tag.text if parent_version_tag is not None else None + + logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}') + + if parent_group is not None and parent_artifact is not None and parent_version is not None: + parent = Package( + parent_group, + parent_artifact, + parent_version, + ) + + if str(parent) in done: + self.parent = parent + else: + raise WaitForPackage(parent) + else: + raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}') + + logger.debug(f'{package}: Parsing properties') + parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)] + self.properties = self.resolve_props(parent_props) + global_properties[str(package)] = self.properties + + logger.debug(f'{package}: Parsing packaging') if (packaging := self.raw_root.find('packaging', ns)) is not None: self.packaging = packaging.text else: @@ -79,13 +124,11 @@ class PackagePOM: ET.SubElement(root_copy, 'version').text = tmpVersion # Add a dependency for the pom itself - if (dependencies := root_copy.find('dependencies', ns)) is not None: - self_dep = ET.SubElement(dependencies, 'dependency') - ET.SubElement(self_dep, 'groupId').text = package.groupId - ET.SubElement(self_dep, 'artifactId').text = package.artifactId - ET.SubElement(self_dep, 'version').text = package.version - else: - logger.warning(f"{package}: No dependencies tag in pom") + dependencies = root_copy.find('dependencies', ns) or ET.SubElement(root_copy, 'dependencies') + self_dep = ET.SubElement(dependencies, 'dependency') + ET.SubElement(self_dep, 'groupId').text = package.groupId + ET.SubElement(self_dep, 'artifactId').text = package.artifactId + ET.SubElement(self_dep, 'version').text = package.version self.generated_root = root_copy else: @@ -120,14 +163,31 @@ class PackagePOM: ET.indent(tree) tree.write(f) - def get_property(self, prop: str): - elem = self.raw_root.find(f'.//properties/{prop}', ns) - if elem is not None: - return elem.text - else: - return None - def _package_from_xml_dep(self, dep: ET.Element) -> 'Package': + def resolve_props(self, initial: dict[str, str]): + props = initial + + for prop_tag in self.raw_root.findall('.//properties/*', ns): + prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '') + value = prop_tag.text if prop_tag.text is not None else '' + logger.debug(f'{self._package}: Setting prop {prop}={value}') + props[prop] = value + + changed = True + while changed: + changed = False + + for prop, value in props.items(): + new_value = self.prop_replace(value, props) + + if new_value != value: + changed = True + logger.debug(f'{self._package}: Setting prop {prop}={new_value}') + props[prop] = new_value + + return props + + def prop_replace(self, text, props: dict[str, str] | None = None) -> str: def lookup_prop(match) -> str: prop = match.group(1) @@ -137,24 +197,27 @@ class PackagePOM: value = str(self._package.artifactId) elif prop == 'project.version': value = str(self._package.version) + elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'): + value = '' else: - value = self.get_property(prop) - logger.debug(f'{self._package}: Trying to recurse prop {value}') - value = prop_replace(value) + try: + value = props[prop] if props is not None else self.properties[prop] + except KeyError: + logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""') + value = '' logger.debug(f'{self._package}: Replacing property {prop} with {value}') return value - def prop_replace(text) -> str: - logger.debug(f'{self._package}: Getting prop {text}') - return re.sub( - r'\$\{([^\}]*)\}', - lookup_prop, - text, - ) + return re.sub( + r'\$\{([^\}]*)\}', + lookup_prop, + text, + ) + def _package_from_xml_dep(self, dep: ET.Element) -> 'Package': def prop_replace_tag(tag) -> str: - return prop_replace( + return self.prop_replace( elem.text or '' if (elem := dep.find(tag, ns)) is not None else '', ) @@ -319,30 +382,51 @@ async def download(package: Package, queue: asyncio.Queue) -> None: async with done_lock: skip = str(package) in done + async with in_progress_lock: + skip = skip or (str(package) in in_progress) + if skip: logger.info(f'{package}: Already downloaded. Skipping.') - elif await package.verify(): - async with done_lock: - done.add(str(package)) + else: + async with in_progress_lock: + in_progress.add(str(package)) - pom_dir = base_pom_path / f'{package.groupId}-{package.artifactId}-{package.version}' - pom_path = pom_dir / 'pom.xml' + if await package.verify(): + pom_dir = base_pom_path / f'{package.groupId}-{package.artifactId}-{package.version}' + pom_path = pom_dir / 'pom.xml' - pom_dir.mkdir(exist_ok=True) + pom_dir.mkdir(exist_ok=True) - pom = await package.pom + pom = await package.pom - if not pom: - return + if not pom: + return - pom.write(pom_path) - logger.info(f'{package}: Downloaded') + pom.write(pom_path) + logger.info(f'{package}: Downloaded') - if not pom.is_bom: - for dep in pom.dependency_management: - logger.info(f'{package}: Handling transitive dependency {dep}') - await queue.put(dep) + if not pom.is_bom: + for dep in pom.dependency_management: + logger.info(f'{package}: Handling transitive dependency {dep}') + await queue.put(dep) + async with done_lock: + logger.debug(f'{package}: Marking done') + p = copy.copy(package) + p.version = None + done.add(str(package)) + done.add(str(p)) + + async with in_progress_lock: + if str(package) in in_progress: + in_progress.remove(str(package)) + else: + p = copy.copy(package) + p.version = None + if str(p) in in_progress: + in_progress.remove(str(p)) + else: + logger.warning(f'{package}: Package is done, but not marked as in progress') async def worker(queue: asyncio.Queue) -> None: while True: @@ -355,6 +439,19 @@ async def worker(queue: asyncio.Queue) -> None: except TooManyRequestsException: logger.info('Too many requests. Delaying next attempt') await asyncio.sleep(3*random.random() + 0.2) + except WaitForPackage as e: + logger.info(f'{package}: Waiting for {e.package}') + await queue.put(e.package) + await queue.put(package) + break + except PackageError: + logger.exception(f'{package}: Error while processing package') + break + except Exception: + logger.exception(f'{package}: Unknown error while processing package') + logger.error(global_properties) + break + queue.task_done()