diff --git a/docker-compose.yml b/docker-compose.yml index b60fd6b..d33df49 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,24 +10,13 @@ services: volumes: - ./repo/reposilite.json:/reposilite.json - ./data:/app/data - # Network added in docker-compose.override.yml - networks: - - nonet ports: - "9001:80" restart: unless-stopped - maven-sync-job: + + sync-job: build: ./sync depends_on: - repo volumes: - - ./poms:/poms - networks: - - nonet - extra_hosts: - - "repo.maven.apache.org:0.0.0.0" - - "registry.quarkus.io:0.0.0.0" - -networks: - nonet: - internal: true + - ./package-list.txt:/package-list.txt diff --git a/run.sh b/run.sh index 3c11316..5484e1d 100755 --- a/run.sh +++ b/run.sh @@ -9,7 +9,7 @@ if [ ! -d ./data ]; then chown "$user:$group" ./data fi -USER_ID=$user GROUP_ID=$group docker-compose up --build --exit-code-from maven-sync-job +USER_ID=$user GROUP_ID=$group docker-compose up --build --exit-code-from sync-job SYNCRET=$? diff --git a/sync/.dockerignore b/sync/.dockerignore index 2b3a6a2..dcc4d72 100644 --- a/sync/.dockerignore +++ b/sync/.dockerignore @@ -1,4 +1,4 @@ ** -!settings.xml -!resolve-deps-from-poms.sh -!wait-for-it.sh +!resolve-deps.sh +!generate-gradle.py +!requirements.txt diff --git a/sync/Dockerfile b/sync/Dockerfile index 59d29c7..959bc8a 100644 --- a/sync/Dockerfile +++ b/sync/Dockerfile @@ -1,10 +1,13 @@ -FROM maven:3-openjdk-11 +FROM gradle:8-jdk11-jammy -RUN mkdir /poms && mkdir -p /root/.m2 +WORKDIR /workdir -COPY resolve-deps-from-poms.sh /resolve-deps-from-poms.sh -COPY settings.xml /root/.m2/settings.xml +RUN apt-get update && apt-get install -y python3-pip -VOLUME [ "/poms" ] +COPY resolve-deps.sh . +COPY generate-gradle.py . +COPY requirements.txt . -CMD [ "/resolve-deps-from-poms.sh" ] +RUN python3 -m pip install -r requirements.txt + +CMD [ "./resolve-deps.sh" ] diff --git a/sync/generate-gradle.py b/sync/generate-gradle.py new file mode 100755 index 0000000..79d7942 --- /dev/null +++ b/sync/generate-gradle.py @@ -0,0 +1,550 @@ +#!/bin/python3 + +import re +import copy +import random +import argparse +import logging +import asyncio +import subprocess +import copy +import aiohttp +from pathlib import Path +from xml.etree import ElementTree as ET + + +ns = {'': 'http://maven.apache.org/POM/4.0.0'} +ET.register_namespace('', ns['']) + +baseurl = 'https://search.maven.org' +base_pom_path = Path('poms') +mirrors = [ + "https://repo.maven.apache.org/maven2", + "https://repo1.maven.org/maven2", + "https://oss.sonatype.org/content/repositories/snapshots", + "https://packages.confluent.io/maven", + "https://registry.quarkus.io/maven", + "https://plugins.gradle.org/m2", +] + +done: set[str] = set() +done_lock = asyncio.Lock() +in_progress: set[str] = set() +in_progress_lock = asyncio.Lock() +gradle_packages: set[str] = set() +gradle_packages_lock = asyncio.Lock() + +global_properties: dict[str, dict[str, str]] = {} + + +class TooManyRequestsException(Exception): + pass + + +class PackageError(Exception): + pass + +class WaitForPackage(Exception): + def __init__(self, package): + self.package = package + + +class PackagePOM: + def __init__(self, package: 'Package', pom: str): + self._package = package + + logger.debug(f'{package}: Parsing POM') + self.raw_root = ET.fromstring(pom) + + self.parent: Package | None = None + + if (parent_tag := self.raw_root.find('parent', ns)) is not None: + parent_group_tag = parent_tag.find('groupId', ns) + parent_artifact_tag = parent_tag.find('artifactId', ns) + parent_version_tag = parent_tag.find('version', ns) + parent_group = parent_group_tag.text if parent_group_tag is not None else None + parent_artifact = parent_artifact_tag.text if parent_artifact_tag is not None else None + parent_version = parent_version_tag.text if parent_version_tag is not None else None + + logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}') + + if parent_group is not None and parent_artifact is not None and parent_version is not None: + parent = Package( + parent_group, + parent_artifact, + parent_version, + ) + + if str(parent) in done: + self.parent = parent + else: + raise WaitForPackage(parent) + else: + raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}') + + logger.debug(f'{package}: Parsing properties') + parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)] + self.properties = self.resolve_props(parent_props) + global_properties[str(package)] = self.properties + + logger.debug(f'{package}: Parsing packaging') + if (packaging := self.raw_root.find('packaging', ns)) is not None: + self.packaging = packaging.text + else: + self.packaging = '??' + + self.is_bom = self.packaging == 'pom' + + self.gradle_packages = [str(package)] + + if self.packaging == 'pom': + root_copy = copy.deepcopy(self.raw_root) + dependencies = root_copy.find('dependencies', ns) or ET.SubElement(root_copy, 'dependencies') + + self.gradle_packages.extend( + [ + f'{dep.find("groupId").text}:{dep.find("artifactId").text}:{dep.find("version").text}' + for dep in dependencies.findall('dependency') + ] + ) + + logger.debug(f'{package}: POM parsed') + + def resolve_props(self, initial: dict[str, str]): + props = initial + + for prop_tag in self.raw_root.findall('.//properties/*', ns): + prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '') + value = prop_tag.text if prop_tag.text is not None else '' + logger.debug(f'{self._package}: Setting prop {prop}={value}') + props[prop] = value + + changed = True + while changed: + changed = False + + for prop, value in props.items(): + new_value = self.prop_replace(value, props) + + if new_value != value: + changed = True + logger.debug(f'{self._package}: Setting prop {prop}={new_value}') + props[prop] = new_value + + return props + + def prop_replace(self, text, props: dict[str, str] | None = None) -> str: + def lookup_prop(match) -> str: + prop = match.group(1) + + if prop == 'project.groupId': + value = str(self._package.groupId) + elif prop == 'project.artifactId': + value = str(self._package.artifactId) + elif prop == 'project.version': + value = str(self._package.version) + elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'): + value = '' + elif prop in ['project.basedir', 'basedir', 'user.home', 'debug.port']: + value = '' + else: + try: + value = props[prop] if props is not None else self.properties[prop] + except KeyError: + logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""') + value = '' + + logger.debug(f'{self._package}: Replacing property {prop} with {value}') + return value + + return re.sub( + r'\$\{([^\}]*)\}', + lookup_prop, + text, + ) + + def _package_from_xml_dep(self, dep: ET.Element) -> 'Package': + def prop_replace_tag(tag) -> str: + return self.prop_replace( + elem.text or '' if (elem := dep.find(tag, ns)) is not None else '', + ) + + return Package( + groupId=prop_replace_tag('groupId'), + artifactId=prop_replace_tag('artifactId'), + version=prop_replace_tag('version'), + ) + + @property + def dependency_management(self) -> list['Package']: + dependencies: list[Package] = [] + + for dep in self.raw_root.find('dependencyManagement/dependencies', ns) or []: + package = self._package_from_xml_dep(dep) + dependencies.append(package) + + return dependencies + + +class Package: + _pom: PackagePOM | None = None + _verified: bool = False + + def __init__(self, groupId: str, artifactId: str, version: str | None = None, implicit: bool = False): + self.groupId = groupId + self.artifactId = artifactId + self.version = version if version and not version.isspace() else None + self.implicit = implicit + + def __str__(self) -> str: + return f'{self.groupId}:{self.artifactId}:{self.version or "----"}' + + def __eq__(self, other) -> bool: + return ( + self.groupId == other.groupId + and self.artifactId == other.artifactId + and self.version == other.version + ) + + def __hash__(self) -> int: + return hash((self.groupId, self.artifactId, self.version)) + + @property + def dir_path(self): + group_path = self.groupId.replace(".", "/") + return f'{group_path}/{self.artifactId}/{self.version}' + + @property + def base_filename(self): + return f'{self.artifactId}-{self.version}' + + async def download_file(self, extension): + filepath = f'{self.dir_path}/{self.base_filename}.{extension}' + + async with aiohttp.ClientSession() as session: + for mirror in mirrors: + pom_url = f'{mirror}/{filepath}' + logger.debug(f'{self}: Downloading {extension} from {pom_url}') + + async with session.get(pom_url) as response: + if response.status == 200: + logger.debug(f'{self}: {extension} downloaded') + return await response.text() + break + elif response.status == 429: + raise TooManyRequestsException() + else: + logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}') + else: + logger.warning(f'{self}: File download of {extension} failed for all mirrors') + return None + + @property + async def pom(self) -> PackagePOM: + if self._pom is not None: + return self._pom + + if self.version is None: + await self._query_maven() + + self._pom = PackagePOM(self, await self.download_file('pom')) + + return self._pom + + @property + def _urlquery(self) -> str: + q = f'g:{self.groupId}+AND+a:{self.artifactId}' + + if self.version is not None: + q += f'+AND+v:{self.version}' + + return q + + async def _query_maven(self) -> None: + self._verified = False + + async with aiohttp.ClientSession() as session: + for mirror in mirrors: + url = f'{mirror}/{self.groupId.replace(".", "/")}/{self.artifactId}/maven-metadata.xml' + logger.debug(f'{self}: Querying maven at url {url}') + + async with session.get(url) as response: + if response.status == 200: + response_text = await response.text() + metadata = ET.fromstring(response_text) + + if metadata is not None: + logger.debug(f'{self}: Metadata found') + + if self.version is None: + release_tag = metadata.find('./versioning/release') + latest_tag = metadata.find('./versioning/latest') + version = release_tag.text if release_tag is not None else latest_tag.text if latest_tag is not None else None + + if version is not None: + logger.debug(f'{self}: Using newest version {version}') + self.version = version + self._verified = True + return + else: + logger.info(f'{self}: Could not find latest version in metadata from mirror {mirror}') + else: + if metadata.find(f'./versioning/versions/version[.="{self.version}"]') is not None: + logger.debug(f'{self}: Version {self.version} is valid') + self._verified = True + return + else: + logger.info(f'{self}: Could not find version {self.version} in metadata from mirror {mirror}') + else: + logger.warning('{self}: Invalid XML for maven metadata: {response_text}') + elif response.status == 429: + raise TooManyRequestsException() + else: + logger.info(f'{self}: HTTP error {response.status} downloading maven metadata from {url}') + else: + if self.implicit: + logger.info(f'{self}: Package not found in any mirror') + else: + logger.warning(f'{self}: Package not found in any mirror') + + async def verify(self) -> bool: + if not self._verified: + await self._query_maven() + return self._verified + + +def load_package_list(list_path: Path, queue: asyncio.Queue) -> None: + logger.info(f'Parsing {list_path}') + + with list_path.open('r') as f: + for line in f.readlines(): + sections = line.strip().split(':') + + if len(sections) < 2 or len(sections) > 3: + logger.warning(f'Invalid package format "{line}". It should be "groupID:artifactID" or "groupID:artifactID:version"') + continue + + package = Package( + sections[0], + sections[1], + sections[2] if len(sections) == 3 else None, + ) + + queue.put_nowait(package) + + continue + if not package.artifactId.endswith('-jvm'): + queue.put_nowait( + Package( + package.groupId, + f'{package.artifactId}-jvm', + package.version, + True, + ) + ) + +async def create_gradle_build() -> str: + async with gradle_packages_lock: + return """// Generated, do not edit +plugins { + kotlin("jvm") version "1.7.20" +} + +repositories { + maven { + url=uri("http://repo:80/releases") + isAllowInsecureProtocol=true + } +} + +val deps = listOf( + """ + ',\n '.join(f'"{dep}"' for dep in sorted(gradle_packages)) + """ +).map { + configurations.create(it.replace(':', '_')) to it +} + + +dependencies { + deps.forEach { (conf, dep) -> + conf(dep) + } +} + +tasks.register("downloadDependencies") { + doLast { + deps.forEach { (conf, dep) -> + conf.files.forEach { file -> + copy { + from(file) + into("data/") + } + } + } + } +} +""" + +async def create_gradle_settings() -> str: + return """// Generated, do not edit +rootProject.name = "gradle sync job" + +pluginManagement { + repositories { + maven { + url=uri("http://repo:80/releases") + isAllowInsecureProtocol=true + } + } +} +""" + +async def download(package: Package, queue: asyncio.Queue) -> None: + async with done_lock: + is_done = str(package) in done + + async with in_progress_lock: + is_in_progress = str(package) in in_progress + + if is_done: + logger.info(f'{package}: Already downloaded. Skipping.') + elif is_in_progress: + logger.info(f'{package}: Already in progress. Skipping.') + else: + async with in_progress_lock: + in_progress.add(str(package)) + + for _ in range(50): + try: + verified = await package.verify() + break + except TooManyRequestsException: + logger.info(f'{package}: Too many requests. Delaying next attempt') + await asyncio.sleep(3*random.random() + 0.2) + else: + logger.error(f'{package}: Verification failed after 50 tries') + exit(1) + + if verified: + for _ in range(50): + try: + pom = await package.pom + break + except TooManyRequestsException: + logger.info(f'{package}: Too many requests. Delaying next attempt') + await asyncio.sleep(3*random.random() + 0.2) + except WaitForPackage as e: + logger.info(f'{package}: Waiting for {e.package}') + + async with in_progress_lock: + if str(package) in in_progress: + in_progress.remove(str(package)) + + if str(e.package) not in in_progress: + await queue.put(e.package) + + await queue.put(package) + + return + else: + logger.error(f'{package}: POM parsing failed after 50 tries') + exit(1) + + if not pom: + logger.warn(f'{package}: No pom') + return + + async with gradle_packages_lock: + gradle_packages.update(pom.gradle_packages) + + if not pom.is_bom: + for dep in pom.dependency_management: + logger.info(f'{package}: Handling transitive dependency {dep}') + await queue.put(dep) + + async with done_lock: + logger.debug(f'{package}: Marking done') + p = copy.copy(package) + p.version = None + done.add(str(package)) + done.add(str(p)) + + async with in_progress_lock: + if str(package) in in_progress: + in_progress.remove(str(package)) + else: + p = copy.copy(package) + p.version = None + if str(p) in in_progress: + in_progress.remove(str(p)) + else: + logger.warning(f'{package}: Package is done, but not marked as in progress') + +async def worker(queue: asyncio.Queue) -> None: + while True: + package = await queue.get() + + while True: + try: + await download(package, queue) + break + except PackageError: + logger.exception(f'{package}: Error while processing package') + break + except Exception: + logger.exception(f'{package}: Unknown error while processing package') + break + + + queue.task_done() + + +async def main(package_list: Path, output_dir: Path, num_workers: int) -> None: + queue: asyncio.Queue = asyncio.Queue() + tasks = [] + + load_package_list(package_list, queue) + + logger.debug(f'Starting {num_workers} workers') + for i in range(num_workers): + tasks.append( + asyncio.create_task( + worker(queue) + ) + ) + + await queue.join() + + logger.debug('Queue is empty. Cancelling workers') + for task in tasks: + task.cancel() + + await asyncio.gather(*tasks, return_exceptions=True) + + logger.info('Generating build.gradle.kts') + (output_dir / 'build.gradle.kts').write_text(await create_gradle_build()) + + logger.info('Generating settings.gradle.kts') + (output_dir / 'settings.gradle.kts').write_text(await create_gradle_settings()) + +logger = logging.getLogger(__name__) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-w', '--workers', type=int, default=20) + parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0) + parser.add_argument('--output_dir', type=Path, help="The directory to put the generated gradle files in", default=Path('.'), required=False) + parser.add_argument('package_list', type=Path, help="The list of packages to download") + args = parser.parse_args() + + if args.verbosity == 0: + log_level = 'WARNING' + elif args.verbosity == 1: + log_level = 'INFO' + else: + log_level = 'DEBUG' + + logging.basicConfig(level=log_level) + + asyncio.run( + main(args.package_list, args.output_dir, args.workers) + ) diff --git a/sync/requirements.txt b/sync/requirements.txt new file mode 100644 index 0000000..9fe29a9 --- /dev/null +++ b/sync/requirements.txt @@ -0,0 +1 @@ +aiohttp==3.8.4 diff --git a/sync/resolve-deps-from-poms.sh b/sync/resolve-deps-from-poms.sh deleted file mode 100755 index 861276c..0000000 --- a/sync/resolve-deps-from-poms.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash - -usage () -{ - echo "USAGE: $0 [FLAGS]" - echo "Flags:" - echo " -t: Skip download of dependencies. Used for testing that a previous download succeded." - echo " -h: Print this message" -} - -is_reposilite_up () -{ - curl -s -o /dev/null repo:80 - return $? -} - -wait_for_reposilite () -{ - i=0 - until is_reposilite_up; do - i=$((i+1)) - - if [ $i -gt 30 ]; then - return $i - fi - - sleep 1 - done - - return $i -} - -should_download=1 - -while [ $# -ne 0 ]; do - case "$1" in - -t) should_download=0 ;; - -h) usage; exit 0 ;; - esac - shift -done - -handle_parameters -wait_for_reposilite - -if [ $? -lt 30 ]; then - if [ $should_download -eq 1 ]; then - for pom in /poms/*; do - mvn -P central \ - -Dos.detected.classifier=linux-x86_64 \ - -f $pom \ - dependency:go-offline \ - dependency:resolve -Dclassifier=javadoc - mvn -P central \ - -Dos.detected.classifier=linux-x86_64 \ - -f $pom \ - dependency:sources - done - fi - - echo "Generating quarkus project" - mvn -P central \ - io.quarkus.platform:quarkus-maven-plugin::create \ - -DprojectGroupId=com.example \ - -DprojectArtifactId=quarkus-test - - RET=$? - if [ $RET -eq 0 ]; then - cd quarkus-test - echo "Building quarkus project" - mvn package - RET=$? - fi - exit $RET -else - echo "Can't connect to repository" - exit 255 -fi diff --git a/sync/resolve-deps.sh b/sync/resolve-deps.sh new file mode 100755 index 0000000..7d67e81 --- /dev/null +++ b/sync/resolve-deps.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +is_reposilite_up () +{ + curl -s -o /dev/null repo:80 + return $? +} + +wait_for_reposilite () +{ + i=0 + until is_reposilite_up; do + i=$((i+1)) + + if [ $i -gt 30 ]; then + return $i + fi + + sleep 1 + done + + return $i +} + +# Main scipt + +if [[ ! -f /package-list.txt ]]; then + echo "No /package-list.txt file. Aborting" + exit 255 +fi + +echo "Resolving packages and generating gradle config" +./generate-gradle.py /package-list.txt + +echo "Check that reposilite is running" +wait_for_reposilite + +if [ $? -lt 30 ]; then + echo "Running gradle" + gradle downloadDependencies + exit 0 +else + echo "Can't connect to repository" + exit 255 +fi