Procházet zdrojové kódy

Change the sync job to use gradle

wip/gradle-reposilite
Sindre Stephansen před 2 roky
rodič
revize
0db241e723
Podepsáno: sindre <sindre@sindrestephansen.com> ID GPG klíče: B06FC67D17A46ADE
8 změnil soubory, kde provedl 612 přidání a 102 odebrání
  1. +3
    -14
      docker-compose.yml
  2. +1
    -1
      run.sh
  3. +3
    -3
      sync/.dockerignore
  4. +9
    -6
      sync/Dockerfile
  5. +550
    -0
      sync/generate-gradle.py
  6. +1
    -0
      sync/requirements.txt
  7. +0
    -78
      sync/resolve-deps-from-poms.sh
  8. +45
    -0
      sync/resolve-deps.sh

+ 3
- 14
docker-compose.yml Zobrazit soubor

@@ -10,24 +10,13 @@ services:
volumes:
- ./repo/reposilite.json:/reposilite.json
- ./data:/app/data
# Network added in docker-compose.override.yml
networks:
- nonet
ports:
- "9001:80"
restart: unless-stopped
maven-sync-job:

sync-job:
build: ./sync
depends_on:
- repo
volumes:
- ./poms:/poms
networks:
- nonet
extra_hosts:
- "repo.maven.apache.org:0.0.0.0"
- "registry.quarkus.io:0.0.0.0"

networks:
nonet:
internal: true
- ./package-list.txt:/package-list.txt

+ 1
- 1
run.sh Zobrazit soubor

@@ -9,7 +9,7 @@ if [ ! -d ./data ]; then
chown "$user:$group" ./data
fi

USER_ID=$user GROUP_ID=$group docker-compose up --build --exit-code-from maven-sync-job
USER_ID=$user GROUP_ID=$group docker-compose up --build --exit-code-from sync-job

SYNCRET=$?



+ 3
- 3
sync/.dockerignore Zobrazit soubor

@@ -1,4 +1,4 @@
**
!settings.xml
!resolve-deps-from-poms.sh
!wait-for-it.sh
!resolve-deps.sh
!generate-gradle.py
!requirements.txt

+ 9
- 6
sync/Dockerfile Zobrazit soubor

@@ -1,10 +1,13 @@
FROM maven:3-openjdk-11
FROM gradle:8-jdk11-jammy

RUN mkdir /poms && mkdir -p /root/.m2
WORKDIR /workdir

COPY resolve-deps-from-poms.sh /resolve-deps-from-poms.sh
COPY settings.xml /root/.m2/settings.xml
RUN apt-get update && apt-get install -y python3-pip

VOLUME [ "/poms" ]
COPY resolve-deps.sh .
COPY generate-gradle.py .
COPY requirements.txt .

CMD [ "/resolve-deps-from-poms.sh" ]
RUN python3 -m pip install -r requirements.txt

CMD [ "./resolve-deps.sh" ]

+ 550
- 0
sync/generate-gradle.py Zobrazit soubor

@@ -0,0 +1,550 @@
#!/bin/python3

import re
import copy
import random
import argparse
import logging
import asyncio
import subprocess
import copy
import aiohttp
from pathlib import Path
from xml.etree import ElementTree as ET


ns = {'': 'http://maven.apache.org/POM/4.0.0'}
ET.register_namespace('', ns[''])

baseurl = 'https://search.maven.org'
base_pom_path = Path('poms')
mirrors = [
"https://repo.maven.apache.org/maven2",
"https://repo1.maven.org/maven2",
"https://oss.sonatype.org/content/repositories/snapshots",
"https://packages.confluent.io/maven",
"https://registry.quarkus.io/maven",
"https://plugins.gradle.org/m2",
]

done: set[str] = set()
done_lock = asyncio.Lock()
in_progress: set[str] = set()
in_progress_lock = asyncio.Lock()
gradle_packages: set[str] = set()
gradle_packages_lock = asyncio.Lock()

global_properties: dict[str, dict[str, str]] = {}


class TooManyRequestsException(Exception):
pass


class PackageError(Exception):
pass

class WaitForPackage(Exception):
def __init__(self, package):
self.package = package


class PackagePOM:
def __init__(self, package: 'Package', pom: str):
self._package = package

logger.debug(f'{package}: Parsing POM')
self.raw_root = ET.fromstring(pom)

self.parent: Package | None = None

if (parent_tag := self.raw_root.find('parent', ns)) is not None:
parent_group_tag = parent_tag.find('groupId', ns)
parent_artifact_tag = parent_tag.find('artifactId', ns)
parent_version_tag = parent_tag.find('version', ns)
parent_group = parent_group_tag.text if parent_group_tag is not None else None
parent_artifact = parent_artifact_tag.text if parent_artifact_tag is not None else None
parent_version = parent_version_tag.text if parent_version_tag is not None else None

logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}')

if parent_group is not None and parent_artifact is not None and parent_version is not None:
parent = Package(
parent_group,
parent_artifact,
parent_version,
)

if str(parent) in done:
self.parent = parent
else:
raise WaitForPackage(parent)
else:
raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}')

logger.debug(f'{package}: Parsing properties')
parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)]
self.properties = self.resolve_props(parent_props)
global_properties[str(package)] = self.properties

logger.debug(f'{package}: Parsing packaging')
if (packaging := self.raw_root.find('packaging', ns)) is not None:
self.packaging = packaging.text
else:
self.packaging = '??'

self.is_bom = self.packaging == 'pom'

self.gradle_packages = [str(package)]

if self.packaging == 'pom':
root_copy = copy.deepcopy(self.raw_root)
dependencies = root_copy.find('dependencies', ns) or ET.SubElement(root_copy, 'dependencies')

self.gradle_packages.extend(
[
f'{dep.find("groupId").text}:{dep.find("artifactId").text}:{dep.find("version").text}'
for dep in dependencies.findall('dependency')
]
)

logger.debug(f'{package}: POM parsed')

def resolve_props(self, initial: dict[str, str]):
props = initial

for prop_tag in self.raw_root.findall('.//properties/*', ns):
prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '')
value = prop_tag.text if prop_tag.text is not None else ''
logger.debug(f'{self._package}: Setting prop {prop}={value}')
props[prop] = value

changed = True
while changed:
changed = False

for prop, value in props.items():
new_value = self.prop_replace(value, props)

if new_value != value:
changed = True
logger.debug(f'{self._package}: Setting prop {prop}={new_value}')
props[prop] = new_value

return props

def prop_replace(self, text, props: dict[str, str] | None = None) -> str:
def lookup_prop(match) -> str:
prop = match.group(1)

if prop == 'project.groupId':
value = str(self._package.groupId)
elif prop == 'project.artifactId':
value = str(self._package.artifactId)
elif prop == 'project.version':
value = str(self._package.version)
elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'):
value = ''
elif prop in ['project.basedir', 'basedir', 'user.home', 'debug.port']:
value = ''
else:
try:
value = props[prop] if props is not None else self.properties[prop]
except KeyError:
logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""')
value = ''

logger.debug(f'{self._package}: Replacing property {prop} with {value}')
return value

return re.sub(
r'\$\{([^\}]*)\}',
lookup_prop,
text,
)

def _package_from_xml_dep(self, dep: ET.Element) -> 'Package':
def prop_replace_tag(tag) -> str:
return self.prop_replace(
elem.text or '' if (elem := dep.find(tag, ns)) is not None else '',
)

return Package(
groupId=prop_replace_tag('groupId'),
artifactId=prop_replace_tag('artifactId'),
version=prop_replace_tag('version'),
)

@property
def dependency_management(self) -> list['Package']:
dependencies: list[Package] = []

for dep in self.raw_root.find('dependencyManagement/dependencies', ns) or []:
package = self._package_from_xml_dep(dep)
dependencies.append(package)

return dependencies


class Package:
_pom: PackagePOM | None = None
_verified: bool = False

def __init__(self, groupId: str, artifactId: str, version: str | None = None, implicit: bool = False):
self.groupId = groupId
self.artifactId = artifactId
self.version = version if version and not version.isspace() else None
self.implicit = implicit

def __str__(self) -> str:
return f'{self.groupId}:{self.artifactId}:{self.version or "----"}'

def __eq__(self, other) -> bool:
return (
self.groupId == other.groupId
and self.artifactId == other.artifactId
and self.version == other.version
)

def __hash__(self) -> int:
return hash((self.groupId, self.artifactId, self.version))

@property
def dir_path(self):
group_path = self.groupId.replace(".", "/")
return f'{group_path}/{self.artifactId}/{self.version}'

@property
def base_filename(self):
return f'{self.artifactId}-{self.version}'

async def download_file(self, extension):
filepath = f'{self.dir_path}/{self.base_filename}.{extension}'

async with aiohttp.ClientSession() as session:
for mirror in mirrors:
pom_url = f'{mirror}/{filepath}'
logger.debug(f'{self}: Downloading {extension} from {pom_url}')

async with session.get(pom_url) as response:
if response.status == 200:
logger.debug(f'{self}: {extension} downloaded')
return await response.text()
break
elif response.status == 429:
raise TooManyRequestsException()
else:
logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}')
else:
logger.warning(f'{self}: File download of {extension} failed for all mirrors')
return None

@property
async def pom(self) -> PackagePOM:
if self._pom is not None:
return self._pom

if self.version is None:
await self._query_maven()

self._pom = PackagePOM(self, await self.download_file('pom'))

return self._pom

@property
def _urlquery(self) -> str:
q = f'g:{self.groupId}+AND+a:{self.artifactId}'

if self.version is not None:
q += f'+AND+v:{self.version}'

return q

async def _query_maven(self) -> None:
self._verified = False

async with aiohttp.ClientSession() as session:
for mirror in mirrors:
url = f'{mirror}/{self.groupId.replace(".", "/")}/{self.artifactId}/maven-metadata.xml'
logger.debug(f'{self}: Querying maven at url {url}')

async with session.get(url) as response:
if response.status == 200:
response_text = await response.text()
metadata = ET.fromstring(response_text)

if metadata is not None:
logger.debug(f'{self}: Metadata found')

if self.version is None:
release_tag = metadata.find('./versioning/release')
latest_tag = metadata.find('./versioning/latest')
version = release_tag.text if release_tag is not None else latest_tag.text if latest_tag is not None else None

if version is not None:
logger.debug(f'{self}: Using newest version {version}')
self.version = version
self._verified = True
return
else:
logger.info(f'{self}: Could not find latest version in metadata from mirror {mirror}')
else:
if metadata.find(f'./versioning/versions/version[.="{self.version}"]') is not None:
logger.debug(f'{self}: Version {self.version} is valid')
self._verified = True
return
else:
logger.info(f'{self}: Could not find version {self.version} in metadata from mirror {mirror}')
else:
logger.warning('{self}: Invalid XML for maven metadata: {response_text}')
elif response.status == 429:
raise TooManyRequestsException()
else:
logger.info(f'{self}: HTTP error {response.status} downloading maven metadata from {url}')
else:
if self.implicit:
logger.info(f'{self}: Package not found in any mirror')
else:
logger.warning(f'{self}: Package not found in any mirror')

async def verify(self) -> bool:
if not self._verified:
await self._query_maven()
return self._verified


def load_package_list(list_path: Path, queue: asyncio.Queue) -> None:
logger.info(f'Parsing {list_path}')

with list_path.open('r') as f:
for line in f.readlines():
sections = line.strip().split(':')

if len(sections) < 2 or len(sections) > 3:
logger.warning(f'Invalid package format "{line}". It should be "groupID:artifactID" or "groupID:artifactID:version"')
continue

package = Package(
sections[0],
sections[1],
sections[2] if len(sections) == 3 else None,
)

queue.put_nowait(package)

continue
if not package.artifactId.endswith('-jvm'):
queue.put_nowait(
Package(
package.groupId,
f'{package.artifactId}-jvm',
package.version,
True,
)
)

async def create_gradle_build() -> str:
async with gradle_packages_lock:
return """// Generated, do not edit
plugins {
kotlin("jvm") version "1.7.20"
}

repositories {
maven {
url=uri("http://repo:80/releases")
isAllowInsecureProtocol=true
}
}

val deps = listOf(
""" + ',\n '.join(f'"{dep}"' for dep in sorted(gradle_packages)) + """
).map {
configurations.create(it.replace(':', '_')) to it
}


dependencies {
deps.forEach { (conf, dep) ->
conf(dep)
}
}

tasks.register("downloadDependencies") {
doLast {
deps.forEach { (conf, dep) ->
conf.files.forEach { file ->
copy {
from(file)
into("data/")
}
}
}
}
}
"""

async def create_gradle_settings() -> str:
return """// Generated, do not edit
rootProject.name = "gradle sync job"

pluginManagement {
repositories {
maven {
url=uri("http://repo:80/releases")
isAllowInsecureProtocol=true
}
}
}
"""

async def download(package: Package, queue: asyncio.Queue) -> None:
async with done_lock:
is_done = str(package) in done

async with in_progress_lock:
is_in_progress = str(package) in in_progress

if is_done:
logger.info(f'{package}: Already downloaded. Skipping.')
elif is_in_progress:
logger.info(f'{package}: Already in progress. Skipping.')
else:
async with in_progress_lock:
in_progress.add(str(package))

for _ in range(50):
try:
verified = await package.verify()
break
except TooManyRequestsException:
logger.info(f'{package}: Too many requests. Delaying next attempt')
await asyncio.sleep(3*random.random() + 0.2)
else:
logger.error(f'{package}: Verification failed after 50 tries')
exit(1)

if verified:
for _ in range(50):
try:
pom = await package.pom
break
except TooManyRequestsException:
logger.info(f'{package}: Too many requests. Delaying next attempt')
await asyncio.sleep(3*random.random() + 0.2)
except WaitForPackage as e:
logger.info(f'{package}: Waiting for {e.package}')

async with in_progress_lock:
if str(package) in in_progress:
in_progress.remove(str(package))

if str(e.package) not in in_progress:
await queue.put(e.package)

await queue.put(package)

return
else:
logger.error(f'{package}: POM parsing failed after 50 tries')
exit(1)

if not pom:
logger.warn(f'{package}: No pom')
return

async with gradle_packages_lock:
gradle_packages.update(pom.gradle_packages)

if not pom.is_bom:
for dep in pom.dependency_management:
logger.info(f'{package}: Handling transitive dependency {dep}')
await queue.put(dep)

async with done_lock:
logger.debug(f'{package}: Marking done')
p = copy.copy(package)
p.version = None
done.add(str(package))
done.add(str(p))

async with in_progress_lock:
if str(package) in in_progress:
in_progress.remove(str(package))
else:
p = copy.copy(package)
p.version = None
if str(p) in in_progress:
in_progress.remove(str(p))
else:
logger.warning(f'{package}: Package is done, but not marked as in progress')

async def worker(queue: asyncio.Queue) -> None:
while True:
package = await queue.get()

while True:
try:
await download(package, queue)
break
except PackageError:
logger.exception(f'{package}: Error while processing package')
break
except Exception:
logger.exception(f'{package}: Unknown error while processing package')
break


queue.task_done()


async def main(package_list: Path, output_dir: Path, num_workers: int) -> None:
queue: asyncio.Queue = asyncio.Queue()
tasks = []

load_package_list(package_list, queue)

logger.debug(f'Starting {num_workers} workers')
for i in range(num_workers):
tasks.append(
asyncio.create_task(
worker(queue)
)
)

await queue.join()

logger.debug('Queue is empty. Cancelling workers')
for task in tasks:
task.cancel()

await asyncio.gather(*tasks, return_exceptions=True)

logger.info('Generating build.gradle.kts')
(output_dir / 'build.gradle.kts').write_text(await create_gradle_build())

logger.info('Generating settings.gradle.kts')
(output_dir / 'settings.gradle.kts').write_text(await create_gradle_settings())

logger = logging.getLogger(__name__)

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-w', '--workers', type=int, default=20)
parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0)
parser.add_argument('--output_dir', type=Path, help="The directory to put the generated gradle files in", default=Path('.'), required=False)
parser.add_argument('package_list', type=Path, help="The list of packages to download")
args = parser.parse_args()

if args.verbosity == 0:
log_level = 'WARNING'
elif args.verbosity == 1:
log_level = 'INFO'
else:
log_level = 'DEBUG'

logging.basicConfig(level=log_level)

asyncio.run(
main(args.package_list, args.output_dir, args.workers)
)

+ 1
- 0
sync/requirements.txt Zobrazit soubor

@@ -0,0 +1 @@
aiohttp==3.8.4

+ 0
- 78
sync/resolve-deps-from-poms.sh Zobrazit soubor

@@ -1,78 +0,0 @@
#!/bin/bash

usage ()
{
echo "USAGE: $0 [FLAGS]"
echo "Flags:"
echo " -t: Skip download of dependencies. Used for testing that a previous download succeded."
echo " -h: Print this message"
}

is_reposilite_up ()
{
curl -s -o /dev/null repo:80
return $?
}

wait_for_reposilite ()
{
i=0
until is_reposilite_up; do
i=$((i+1))

if [ $i -gt 30 ]; then
return $i
fi

sleep 1
done

return $i
}

should_download=1

while [ $# -ne 0 ]; do
case "$1" in
-t) should_download=0 ;;
-h) usage; exit 0 ;;
esac
shift
done

handle_parameters
wait_for_reposilite

if [ $? -lt 30 ]; then
if [ $should_download -eq 1 ]; then
for pom in /poms/*; do
mvn -P central \
-Dos.detected.classifier=linux-x86_64 \
-f $pom \
dependency:go-offline \
dependency:resolve -Dclassifier=javadoc
mvn -P central \
-Dos.detected.classifier=linux-x86_64 \
-f $pom \
dependency:sources
done
fi

echo "Generating quarkus project"
mvn -P central \
io.quarkus.platform:quarkus-maven-plugin::create \
-DprojectGroupId=com.example \
-DprojectArtifactId=quarkus-test

RET=$?
if [ $RET -eq 0 ]; then
cd quarkus-test
echo "Building quarkus project"
mvn package
RET=$?
fi
exit $RET
else
echo "Can't connect to repository"
exit 255
fi

+ 45
- 0
sync/resolve-deps.sh Zobrazit soubor

@@ -0,0 +1,45 @@
#!/bin/bash

is_reposilite_up ()
{
curl -s -o /dev/null repo:80
return $?
}

wait_for_reposilite ()
{
i=0
until is_reposilite_up; do
i=$((i+1))

if [ $i -gt 30 ]; then
return $i
fi

sleep 1
done

return $i
}

# Main scipt

if [[ ! -f /package-list.txt ]]; then
echo "No /package-list.txt file. Aborting"
exit 255
fi

echo "Resolving packages and generating gradle config"
./generate-gradle.py /package-list.txt

echo "Check that reposilite is running"
wait_for_reposilite

if [ $? -lt 30 ]; then
echo "Running gradle"
gradle downloadDependencies
exit 0
else
echo "Can't connect to repository"
exit 255
fi

Načítá se…
Zrušit
Uložit