From 27258ffee6280dc68bee2d5c556ff7fbe6e0bffd Mon Sep 17 00:00:00 2001 From: Sindre Stephansen Date: Tue, 7 Jun 2022 15:32:59 +0200 Subject: [PATCH] Add mirrors to pom fetch --- pom.xml | 1 + .../pom.xml | 14 +++++++ update-poms.py | 39 ++++++++++++++----- 3 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 poms/org.asciidoctor:asciidoctorj:2.5.4/pom.xml diff --git a/pom.xml b/pom.xml index e642f3c..9073b95 100644 --- a/pom.xml +++ b/pom.xml @@ -20,6 +20,7 @@ poms/org.apache.maven.plugins:maven-failsafe-plugin:3.0.0-M7/pom.xml poms/org.apache.maven.plugins:maven-shade-plugin:3.3.0/pom.xml poms/org.apache.maven.plugins:maven-surefire-plugin:3.0.0-M7/pom.xml + poms/org.asciidoctor:asciidoctorj:2.5.4/pom.xml poms/org.asciidoctor:asciidoctorj-api:2.5.4/pom.xml poms/org.asciidoctor:asciidoctorj-diagram:2.2.3/pom.xml poms/org.asciidoctor:asciidoctorj-diagram-plantuml:1.2022.5/pom.xml diff --git a/poms/org.asciidoctor:asciidoctorj:2.5.4/pom.xml b/poms/org.asciidoctor:asciidoctorj:2.5.4/pom.xml new file mode 100644 index 0000000..e49bfb7 --- /dev/null +++ b/poms/org.asciidoctor:asciidoctorj:2.5.4/pom.xml @@ -0,0 +1,14 @@ + + 4.0.0 + tmp.org.asciidoctor + placeholder-asciidoctorj + 2.5.4 + Package asciidoctorj + + + org.asciidoctor + asciidoctorj + 2.5.4 + + + \ No newline at end of file diff --git a/update-poms.py b/update-poms.py index 96a6d20..1a95c07 100755 --- a/update-poms.py +++ b/update-poms.py @@ -15,6 +15,15 @@ ET.register_namespace('', 'http://maven.apache.org/POM/4.0.0') baseurl = 'https://search.maven.org' base_pom_path = Path('poms') +mirrors = [ + "https://repo.maven.apache.org/maven2", + "https://repo1.maven.org/maven2", + "https://oss.sonatype.org/content/repositories/snapshots", + "https://packages.confluent.io/maven", + "https://registry.quarkus.io/maven", + "https://plugins.gradle.org/m2", +] + done: set[str] = set() done_lock = asyncio.Lock() num_workers = 50 @@ -67,7 +76,6 @@ class PackagePOM: def get_property(self, prop: str): elem = self.raw_root.find(f'.//properties/{prop}') - import pdb; pdb.set_trace() if elem is not None: return elem.text else: @@ -141,17 +149,21 @@ class Package: group_path = self.groupId.replace(".", "/") pom_path = f'{self.artifactId}-{self.version}.pom' filepath = f'{group_path}/{self.artifactId}/{self.version}/{pom_path}' - pom_url = f'{baseurl}/remotecontent?filepath={filepath}' - - logger.debug(f'{self}: Downloading pom from {pom_url}') async with aiohttp.ClientSession() as session: - async with session.get(pom_url) as response: - if response.status == 200: - logger.debug(f'{self}: POM downloaded') - self._pom = PackagePOM(self, await response.text()) - else: - logger.warning(f'{self}: HTTP error {response.status} downloading pom') + for mirror in mirrors: + pom_url = f'{mirror}/{filepath}' + logger.debug(f'{self}: Downloading pom from {pom_url}') + + async with session.get(pom_url) as response: + if response.status == 200: + logger.debug(f'{self}: POM downloaded') + self._pom = PackagePOM(self, await response.text()) + break + else: + logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}') + else: + logger.warning(f'{self}: Failed for all mirrors') return self._pom @@ -258,6 +270,7 @@ async def main() -> None: load_package_list(Path('package-list.txt'), queue) + logger.debug(f'Starting {num_workers} workers') for i in range(num_workers): tasks.append( asyncio.create_task( @@ -266,11 +279,14 @@ async def main() -> None: ) await queue.join() + + logger.debug('Queue is empty. Cancelling workers') for task in tasks: task.cancel() await asyncio.gather(*tasks, return_exceptions=True) + logger.info('Generating master POM') subprocess.call(['sh', 'generate_master_pom.sh']) @@ -278,6 +294,7 @@ logger = logging.getLogger(__name__) if __name__ == '__main__': parser = argparse.ArgumentParser() + parser.add_argument('-w', '--workers', type=int, default=num_workers) parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0) args = parser.parse_args() @@ -290,4 +307,6 @@ if __name__ == '__main__': logging.basicConfig(level=log_level) + num_workers = args.workers + asyncio.run(main())