|
|
|
@@ -0,0 +1,550 @@ |
|
|
|
#!/bin/python3 |
|
|
|
|
|
|
|
import re |
|
|
|
import copy |
|
|
|
import random |
|
|
|
import argparse |
|
|
|
import logging |
|
|
|
import asyncio |
|
|
|
import subprocess |
|
|
|
import copy |
|
|
|
import aiohttp |
|
|
|
from pathlib import Path |
|
|
|
from xml.etree import ElementTree as ET |
|
|
|
|
|
|
|
|
|
|
|
ns = {'': 'http://maven.apache.org/POM/4.0.0'} |
|
|
|
ET.register_namespace('', ns['']) |
|
|
|
|
|
|
|
baseurl = 'https://search.maven.org' |
|
|
|
base_pom_path = Path('poms') |
|
|
|
mirrors = [ |
|
|
|
"https://repo.maven.apache.org/maven2", |
|
|
|
"https://repo1.maven.org/maven2", |
|
|
|
"https://oss.sonatype.org/content/repositories/snapshots", |
|
|
|
"https://packages.confluent.io/maven", |
|
|
|
"https://registry.quarkus.io/maven", |
|
|
|
"https://plugins.gradle.org/m2", |
|
|
|
] |
|
|
|
|
|
|
|
done: set[str] = set() |
|
|
|
done_lock = asyncio.Lock() |
|
|
|
in_progress: set[str] = set() |
|
|
|
in_progress_lock = asyncio.Lock() |
|
|
|
gradle_packages: set[str] = set() |
|
|
|
gradle_packages_lock = asyncio.Lock() |
|
|
|
|
|
|
|
global_properties: dict[str, dict[str, str]] = {} |
|
|
|
|
|
|
|
|
|
|
|
class TooManyRequestsException(Exception): |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
class PackageError(Exception): |
|
|
|
pass |
|
|
|
|
|
|
|
class WaitForPackage(Exception): |
|
|
|
def __init__(self, package): |
|
|
|
self.package = package |
|
|
|
|
|
|
|
|
|
|
|
class PackagePOM: |
|
|
|
def __init__(self, package: 'Package', pom: str): |
|
|
|
self._package = package |
|
|
|
|
|
|
|
logger.debug(f'{package}: Parsing POM') |
|
|
|
self.raw_root = ET.fromstring(pom) |
|
|
|
|
|
|
|
self.parent: Package | None = None |
|
|
|
|
|
|
|
if (parent_tag := self.raw_root.find('parent', ns)) is not None: |
|
|
|
parent_group_tag = parent_tag.find('groupId', ns) |
|
|
|
parent_artifact_tag = parent_tag.find('artifactId', ns) |
|
|
|
parent_version_tag = parent_tag.find('version', ns) |
|
|
|
parent_group = parent_group_tag.text if parent_group_tag is not None else None |
|
|
|
parent_artifact = parent_artifact_tag.text if parent_artifact_tag is not None else None |
|
|
|
parent_version = parent_version_tag.text if parent_version_tag is not None else None |
|
|
|
|
|
|
|
logger.debug(f'{package}: Parsing parent {parent_group}:{parent_artifact}:{parent_version}') |
|
|
|
|
|
|
|
if parent_group is not None and parent_artifact is not None and parent_version is not None: |
|
|
|
parent = Package( |
|
|
|
parent_group, |
|
|
|
parent_artifact, |
|
|
|
parent_version, |
|
|
|
) |
|
|
|
|
|
|
|
if str(parent) in done: |
|
|
|
self.parent = parent |
|
|
|
else: |
|
|
|
raise WaitForPackage(parent) |
|
|
|
else: |
|
|
|
raise PackageError(f'Invalid parent {parent_group}:{parent_artifact}:{parent_version}') |
|
|
|
|
|
|
|
logger.debug(f'{package}: Parsing properties') |
|
|
|
parent_props: dict[str, str] = {} if self.parent is None else global_properties[str(self.parent)] |
|
|
|
self.properties = self.resolve_props(parent_props) |
|
|
|
global_properties[str(package)] = self.properties |
|
|
|
|
|
|
|
logger.debug(f'{package}: Parsing packaging') |
|
|
|
if (packaging := self.raw_root.find('packaging', ns)) is not None: |
|
|
|
self.packaging = packaging.text |
|
|
|
else: |
|
|
|
self.packaging = '??' |
|
|
|
|
|
|
|
self.is_bom = self.packaging == 'pom' |
|
|
|
|
|
|
|
self.gradle_packages = [str(package)] |
|
|
|
|
|
|
|
if self.packaging == 'pom': |
|
|
|
root_copy = copy.deepcopy(self.raw_root) |
|
|
|
dependencies = root_copy.find('dependencies', ns) or ET.SubElement(root_copy, 'dependencies') |
|
|
|
|
|
|
|
self.gradle_packages.extend( |
|
|
|
[ |
|
|
|
f'{dep.find("groupId").text}:{dep.find("artifactId").text}:{dep.find("version").text}' |
|
|
|
for dep in dependencies.findall('dependency') |
|
|
|
] |
|
|
|
) |
|
|
|
|
|
|
|
logger.debug(f'{package}: POM parsed') |
|
|
|
|
|
|
|
def resolve_props(self, initial: dict[str, str]): |
|
|
|
props = initial |
|
|
|
|
|
|
|
for prop_tag in self.raw_root.findall('.//properties/*', ns): |
|
|
|
prop = prop_tag.tag.replace(f'{{{ns[""]}}}', '') |
|
|
|
value = prop_tag.text if prop_tag.text is not None else '' |
|
|
|
logger.debug(f'{self._package}: Setting prop {prop}={value}') |
|
|
|
props[prop] = value |
|
|
|
|
|
|
|
changed = True |
|
|
|
while changed: |
|
|
|
changed = False |
|
|
|
|
|
|
|
for prop, value in props.items(): |
|
|
|
new_value = self.prop_replace(value, props) |
|
|
|
|
|
|
|
if new_value != value: |
|
|
|
changed = True |
|
|
|
logger.debug(f'{self._package}: Setting prop {prop}={new_value}') |
|
|
|
props[prop] = new_value |
|
|
|
|
|
|
|
return props |
|
|
|
|
|
|
|
def prop_replace(self, text, props: dict[str, str] | None = None) -> str: |
|
|
|
def lookup_prop(match) -> str: |
|
|
|
prop = match.group(1) |
|
|
|
|
|
|
|
if prop == 'project.groupId': |
|
|
|
value = str(self._package.groupId) |
|
|
|
elif prop == 'project.artifactId': |
|
|
|
value = str(self._package.artifactId) |
|
|
|
elif prop == 'project.version': |
|
|
|
value = str(self._package.version) |
|
|
|
elif prop.startswith('project.build') or prop.startswith('env.') or prop.startswith('maven.'): |
|
|
|
value = '' |
|
|
|
elif prop in ['project.basedir', 'basedir', 'user.home', 'debug.port']: |
|
|
|
value = '' |
|
|
|
else: |
|
|
|
try: |
|
|
|
value = props[prop] if props is not None else self.properties[prop] |
|
|
|
except KeyError: |
|
|
|
logger.error(f'{self._package}: Could not find property {prop}. Setting it to ""') |
|
|
|
value = '' |
|
|
|
|
|
|
|
logger.debug(f'{self._package}: Replacing property {prop} with {value}') |
|
|
|
return value |
|
|
|
|
|
|
|
return re.sub( |
|
|
|
r'\$\{([^\}]*)\}', |
|
|
|
lookup_prop, |
|
|
|
text, |
|
|
|
) |
|
|
|
|
|
|
|
def _package_from_xml_dep(self, dep: ET.Element) -> 'Package': |
|
|
|
def prop_replace_tag(tag) -> str: |
|
|
|
return self.prop_replace( |
|
|
|
elem.text or '' if (elem := dep.find(tag, ns)) is not None else '', |
|
|
|
) |
|
|
|
|
|
|
|
return Package( |
|
|
|
groupId=prop_replace_tag('groupId'), |
|
|
|
artifactId=prop_replace_tag('artifactId'), |
|
|
|
version=prop_replace_tag('version'), |
|
|
|
) |
|
|
|
|
|
|
|
@property |
|
|
|
def dependency_management(self) -> list['Package']: |
|
|
|
dependencies: list[Package] = [] |
|
|
|
|
|
|
|
for dep in self.raw_root.find('dependencyManagement/dependencies', ns) or []: |
|
|
|
package = self._package_from_xml_dep(dep) |
|
|
|
dependencies.append(package) |
|
|
|
|
|
|
|
return dependencies |
|
|
|
|
|
|
|
|
|
|
|
class Package: |
|
|
|
_pom: PackagePOM | None = None |
|
|
|
_verified: bool = False |
|
|
|
|
|
|
|
def __init__(self, groupId: str, artifactId: str, version: str | None = None, implicit: bool = False): |
|
|
|
self.groupId = groupId |
|
|
|
self.artifactId = artifactId |
|
|
|
self.version = version if version and not version.isspace() else None |
|
|
|
self.implicit = implicit |
|
|
|
|
|
|
|
def __str__(self) -> str: |
|
|
|
return f'{self.groupId}:{self.artifactId}:{self.version or "----"}' |
|
|
|
|
|
|
|
def __eq__(self, other) -> bool: |
|
|
|
return ( |
|
|
|
self.groupId == other.groupId |
|
|
|
and self.artifactId == other.artifactId |
|
|
|
and self.version == other.version |
|
|
|
) |
|
|
|
|
|
|
|
def __hash__(self) -> int: |
|
|
|
return hash((self.groupId, self.artifactId, self.version)) |
|
|
|
|
|
|
|
@property |
|
|
|
def dir_path(self): |
|
|
|
group_path = self.groupId.replace(".", "/") |
|
|
|
return f'{group_path}/{self.artifactId}/{self.version}' |
|
|
|
|
|
|
|
@property |
|
|
|
def base_filename(self): |
|
|
|
return f'{self.artifactId}-{self.version}' |
|
|
|
|
|
|
|
async def download_file(self, extension): |
|
|
|
filepath = f'{self.dir_path}/{self.base_filename}.{extension}' |
|
|
|
|
|
|
|
async with aiohttp.ClientSession() as session: |
|
|
|
for mirror in mirrors: |
|
|
|
pom_url = f'{mirror}/{filepath}' |
|
|
|
logger.debug(f'{self}: Downloading {extension} from {pom_url}') |
|
|
|
|
|
|
|
async with session.get(pom_url) as response: |
|
|
|
if response.status == 200: |
|
|
|
logger.debug(f'{self}: {extension} downloaded') |
|
|
|
return await response.text() |
|
|
|
break |
|
|
|
elif response.status == 429: |
|
|
|
raise TooManyRequestsException() |
|
|
|
else: |
|
|
|
logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}') |
|
|
|
else: |
|
|
|
logger.warning(f'{self}: File download of {extension} failed for all mirrors') |
|
|
|
return None |
|
|
|
|
|
|
|
@property |
|
|
|
async def pom(self) -> PackagePOM: |
|
|
|
if self._pom is not None: |
|
|
|
return self._pom |
|
|
|
|
|
|
|
if self.version is None: |
|
|
|
await self._query_maven() |
|
|
|
|
|
|
|
self._pom = PackagePOM(self, await self.download_file('pom')) |
|
|
|
|
|
|
|
return self._pom |
|
|
|
|
|
|
|
@property |
|
|
|
def _urlquery(self) -> str: |
|
|
|
q = f'g:{self.groupId}+AND+a:{self.artifactId}' |
|
|
|
|
|
|
|
if self.version is not None: |
|
|
|
q += f'+AND+v:{self.version}' |
|
|
|
|
|
|
|
return q |
|
|
|
|
|
|
|
async def _query_maven(self) -> None: |
|
|
|
self._verified = False |
|
|
|
|
|
|
|
async with aiohttp.ClientSession() as session: |
|
|
|
for mirror in mirrors: |
|
|
|
url = f'{mirror}/{self.groupId.replace(".", "/")}/{self.artifactId}/maven-metadata.xml' |
|
|
|
logger.debug(f'{self}: Querying maven at url {url}') |
|
|
|
|
|
|
|
async with session.get(url) as response: |
|
|
|
if response.status == 200: |
|
|
|
response_text = await response.text() |
|
|
|
metadata = ET.fromstring(response_text) |
|
|
|
|
|
|
|
if metadata is not None: |
|
|
|
logger.debug(f'{self}: Metadata found') |
|
|
|
|
|
|
|
if self.version is None: |
|
|
|
release_tag = metadata.find('./versioning/release') |
|
|
|
latest_tag = metadata.find('./versioning/latest') |
|
|
|
version = release_tag.text if release_tag is not None else latest_tag.text if latest_tag is not None else None |
|
|
|
|
|
|
|
if version is not None: |
|
|
|
logger.debug(f'{self}: Using newest version {version}') |
|
|
|
self.version = version |
|
|
|
self._verified = True |
|
|
|
return |
|
|
|
else: |
|
|
|
logger.info(f'{self}: Could not find latest version in metadata from mirror {mirror}') |
|
|
|
else: |
|
|
|
if metadata.find(f'./versioning/versions/version[.="{self.version}"]') is not None: |
|
|
|
logger.debug(f'{self}: Version {self.version} is valid') |
|
|
|
self._verified = True |
|
|
|
return |
|
|
|
else: |
|
|
|
logger.info(f'{self}: Could not find version {self.version} in metadata from mirror {mirror}') |
|
|
|
else: |
|
|
|
logger.warning('{self}: Invalid XML for maven metadata: {response_text}') |
|
|
|
elif response.status == 429: |
|
|
|
raise TooManyRequestsException() |
|
|
|
else: |
|
|
|
logger.info(f'{self}: HTTP error {response.status} downloading maven metadata from {url}') |
|
|
|
else: |
|
|
|
if self.implicit: |
|
|
|
logger.info(f'{self}: Package not found in any mirror') |
|
|
|
else: |
|
|
|
logger.warning(f'{self}: Package not found in any mirror') |
|
|
|
|
|
|
|
async def verify(self) -> bool: |
|
|
|
if not self._verified: |
|
|
|
await self._query_maven() |
|
|
|
return self._verified |
|
|
|
|
|
|
|
|
|
|
|
def load_package_list(list_path: Path, queue: asyncio.Queue) -> None: |
|
|
|
logger.info(f'Parsing {list_path}') |
|
|
|
|
|
|
|
with list_path.open('r') as f: |
|
|
|
for line in f.readlines(): |
|
|
|
sections = line.strip().split(':') |
|
|
|
|
|
|
|
if len(sections) < 2 or len(sections) > 3: |
|
|
|
logger.warning(f'Invalid package format "{line}". It should be "groupID:artifactID" or "groupID:artifactID:version"') |
|
|
|
continue |
|
|
|
|
|
|
|
package = Package( |
|
|
|
sections[0], |
|
|
|
sections[1], |
|
|
|
sections[2] if len(sections) == 3 else None, |
|
|
|
) |
|
|
|
|
|
|
|
queue.put_nowait(package) |
|
|
|
|
|
|
|
continue |
|
|
|
if not package.artifactId.endswith('-jvm'): |
|
|
|
queue.put_nowait( |
|
|
|
Package( |
|
|
|
package.groupId, |
|
|
|
f'{package.artifactId}-jvm', |
|
|
|
package.version, |
|
|
|
True, |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
async def create_gradle_build() -> str: |
|
|
|
async with gradle_packages_lock: |
|
|
|
return """// Generated, do not edit |
|
|
|
plugins { |
|
|
|
kotlin("jvm") version "1.7.20" |
|
|
|
} |
|
|
|
|
|
|
|
repositories { |
|
|
|
maven { |
|
|
|
url=uri("http://repo:80/releases") |
|
|
|
isAllowInsecureProtocol=true |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
val deps = listOf( |
|
|
|
""" + ',\n '.join(f'"{dep}"' for dep in sorted(gradle_packages)) + """ |
|
|
|
).map { |
|
|
|
configurations.create(it.replace(':', '_')) to it |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
dependencies { |
|
|
|
deps.forEach { (conf, dep) -> |
|
|
|
conf(dep) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
tasks.register("downloadDependencies") { |
|
|
|
doLast { |
|
|
|
deps.forEach { (conf, dep) -> |
|
|
|
conf.files.forEach { file -> |
|
|
|
copy { |
|
|
|
from(file) |
|
|
|
into("data/") |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
""" |
|
|
|
|
|
|
|
async def create_gradle_settings() -> str: |
|
|
|
return """// Generated, do not edit |
|
|
|
rootProject.name = "gradle sync job" |
|
|
|
|
|
|
|
pluginManagement { |
|
|
|
repositories { |
|
|
|
maven { |
|
|
|
url=uri("http://repo:80/releases") |
|
|
|
isAllowInsecureProtocol=true |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
""" |
|
|
|
|
|
|
|
async def download(package: Package, queue: asyncio.Queue) -> None: |
|
|
|
async with done_lock: |
|
|
|
is_done = str(package) in done |
|
|
|
|
|
|
|
async with in_progress_lock: |
|
|
|
is_in_progress = str(package) in in_progress |
|
|
|
|
|
|
|
if is_done: |
|
|
|
logger.info(f'{package}: Already downloaded. Skipping.') |
|
|
|
elif is_in_progress: |
|
|
|
logger.info(f'{package}: Already in progress. Skipping.') |
|
|
|
else: |
|
|
|
async with in_progress_lock: |
|
|
|
in_progress.add(str(package)) |
|
|
|
|
|
|
|
for _ in range(50): |
|
|
|
try: |
|
|
|
verified = await package.verify() |
|
|
|
break |
|
|
|
except TooManyRequestsException: |
|
|
|
logger.info(f'{package}: Too many requests. Delaying next attempt') |
|
|
|
await asyncio.sleep(3*random.random() + 0.2) |
|
|
|
else: |
|
|
|
logger.error(f'{package}: Verification failed after 50 tries') |
|
|
|
exit(1) |
|
|
|
|
|
|
|
if verified: |
|
|
|
for _ in range(50): |
|
|
|
try: |
|
|
|
pom = await package.pom |
|
|
|
break |
|
|
|
except TooManyRequestsException: |
|
|
|
logger.info(f'{package}: Too many requests. Delaying next attempt') |
|
|
|
await asyncio.sleep(3*random.random() + 0.2) |
|
|
|
except WaitForPackage as e: |
|
|
|
logger.info(f'{package}: Waiting for {e.package}') |
|
|
|
|
|
|
|
async with in_progress_lock: |
|
|
|
if str(package) in in_progress: |
|
|
|
in_progress.remove(str(package)) |
|
|
|
|
|
|
|
if str(e.package) not in in_progress: |
|
|
|
await queue.put(e.package) |
|
|
|
|
|
|
|
await queue.put(package) |
|
|
|
|
|
|
|
return |
|
|
|
else: |
|
|
|
logger.error(f'{package}: POM parsing failed after 50 tries') |
|
|
|
exit(1) |
|
|
|
|
|
|
|
if not pom: |
|
|
|
logger.warn(f'{package}: No pom') |
|
|
|
return |
|
|
|
|
|
|
|
async with gradle_packages_lock: |
|
|
|
gradle_packages.update(pom.gradle_packages) |
|
|
|
|
|
|
|
if not pom.is_bom: |
|
|
|
for dep in pom.dependency_management: |
|
|
|
logger.info(f'{package}: Handling transitive dependency {dep}') |
|
|
|
await queue.put(dep) |
|
|
|
|
|
|
|
async with done_lock: |
|
|
|
logger.debug(f'{package}: Marking done') |
|
|
|
p = copy.copy(package) |
|
|
|
p.version = None |
|
|
|
done.add(str(package)) |
|
|
|
done.add(str(p)) |
|
|
|
|
|
|
|
async with in_progress_lock: |
|
|
|
if str(package) in in_progress: |
|
|
|
in_progress.remove(str(package)) |
|
|
|
else: |
|
|
|
p = copy.copy(package) |
|
|
|
p.version = None |
|
|
|
if str(p) in in_progress: |
|
|
|
in_progress.remove(str(p)) |
|
|
|
else: |
|
|
|
logger.warning(f'{package}: Package is done, but not marked as in progress') |
|
|
|
|
|
|
|
async def worker(queue: asyncio.Queue) -> None: |
|
|
|
while True: |
|
|
|
package = await queue.get() |
|
|
|
|
|
|
|
while True: |
|
|
|
try: |
|
|
|
await download(package, queue) |
|
|
|
break |
|
|
|
except PackageError: |
|
|
|
logger.exception(f'{package}: Error while processing package') |
|
|
|
break |
|
|
|
except Exception: |
|
|
|
logger.exception(f'{package}: Unknown error while processing package') |
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
queue.task_done() |
|
|
|
|
|
|
|
|
|
|
|
async def main(package_list: Path, output_dir: Path, num_workers: int) -> None: |
|
|
|
queue: asyncio.Queue = asyncio.Queue() |
|
|
|
tasks = [] |
|
|
|
|
|
|
|
load_package_list(package_list, queue) |
|
|
|
|
|
|
|
logger.debug(f'Starting {num_workers} workers') |
|
|
|
for i in range(num_workers): |
|
|
|
tasks.append( |
|
|
|
asyncio.create_task( |
|
|
|
worker(queue) |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
await queue.join() |
|
|
|
|
|
|
|
logger.debug('Queue is empty. Cancelling workers') |
|
|
|
for task in tasks: |
|
|
|
task.cancel() |
|
|
|
|
|
|
|
await asyncio.gather(*tasks, return_exceptions=True) |
|
|
|
|
|
|
|
logger.info('Generating build.gradle.kts') |
|
|
|
(output_dir / 'build.gradle.kts').write_text(await create_gradle_build()) |
|
|
|
|
|
|
|
logger.info('Generating settings.gradle.kts') |
|
|
|
(output_dir / 'settings.gradle.kts').write_text(await create_gradle_settings()) |
|
|
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
parser = argparse.ArgumentParser() |
|
|
|
parser.add_argument('-w', '--workers', type=int, default=20) |
|
|
|
parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0) |
|
|
|
parser.add_argument('--output_dir', type=Path, help="The directory to put the generated gradle files in", default=Path('.'), required=False) |
|
|
|
parser.add_argument('package_list', type=Path, help="The list of packages to download") |
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
if args.verbosity == 0: |
|
|
|
log_level = 'WARNING' |
|
|
|
elif args.verbosity == 1: |
|
|
|
log_level = 'INFO' |
|
|
|
else: |
|
|
|
log_level = 'DEBUG' |
|
|
|
|
|
|
|
logging.basicConfig(level=log_level) |
|
|
|
|
|
|
|
asyncio.run( |
|
|
|
main(args.package_list, args.output_dir, args.workers) |
|
|
|
) |