Explorar el Código

Update update-poms.py to output a list of dependencies

wip/coursier
Sindre Stephansen hace 3 años
padre
commit
43e0ee230e
Se han modificado 1 ficheros con 51 adiciones y 85 borrados
  1. +51
    -85
      update-poms.py

+ 51
- 85
update-poms.py Ver fichero

@@ -1,6 +1,7 @@
#!/bin/python3

import re
import random
import argparse
import logging
import asyncio
@@ -15,7 +16,7 @@ ns = {'': 'http://maven.apache.org/POM/4.0.0'}
ET.register_namespace('', ns[''])

baseurl = 'https://search.maven.org'
base_pom_path = Path('poms')
output_path: Path = Path()
mirrors = [
"https://repo.maven.apache.org/maven2",
"https://repo1.maven.org/maven2",
@@ -33,6 +34,7 @@ num_workers = 50
class PackagePOM:
def __init__(self, package: 'Package', pom: str):
logger.debug(f'{package}: Parsing POM')
self._package = package
self.raw_root = ET.fromstring(pom)

if (packaging := self.raw_root.find('packaging', ns)) is not None:
@@ -43,64 +45,12 @@ class PackagePOM:
self.is_bom = self.packaging == 'pom'

if self.packaging == 'pom':
root_copy = copy.deepcopy(self.raw_root)
depman = root_copy.find('dependencyManagement', ns)
if depman is not None:
root_copy.extend(depman.findall('*'))
root_copy.remove(depman)

if (groupId := root_copy.find('groupId', ns)) is not None:
groupId.text = f'tmp.{package.groupId}'
else:
logger.warning(f"{package}: No groupId tag in pom")

if (artifactId := root_copy.find('groupId', ns)) is not None:
artifactId.text = f'placeholder.{package.artifactId}'
else:
logger.warning(f"{package}: No artifactId tag in pom")

# Add a dependency for the pom itself
if (dependencies := root_copy.find('dependencies', ns)) is not None:
self_dep = ET.SubElement(dependencies, 'dependency')
ET.SubElement(self_dep, 'groupId').text = package.groupId
ET.SubElement(self_dep, 'artifactId').text = package.artifactId
ET.SubElement(self_dep, 'version').text = package.version
else:
logger.warning(f"{package}: No dependencies tag in pom")

self.generated_root = root_copy
self.packages = [package, *self.dependency_management]
else:
self.generated_root = ET.fromstring(
f"""
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd"
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<modelVersion>4.0.0</modelVersion>
<groupId>tmp.{package.groupId}</groupId>
<artifactId>placeholder-{package.artifactId}</artifactId>
<version>{package.version}</version>
<name>Package {package.artifactId}</name>

<dependencies>
<dependency>
<groupId>{package.groupId}</groupId>
<artifactId>{package.artifactId}</artifactId>
<version>{package.version}</version>
</dependency>
</dependencies>
</project>
"""
)
self.packages = [package]

logger.debug(f'{package}: POM parsed')

def write(self, f):
tree = ET.ElementTree(self.generated_root)
ET.indent(tree)
tree.write(f)

def get_property(self, prop: str):
elem = self.raw_root.find(f'.//properties/{prop}', ns)
if elem is not None:
@@ -108,18 +58,33 @@ class PackagePOM:
else:
return None

def _package_from_xml_dep(self, dep: ET.Element):
def prop_replace(match):
def _package_from_xml_dep(self, dep: ET.Element) -> 'Package':
def lookup_prop(match) -> str:
prop = match.group(1)
value = self.get_property(match.group(1))
logger.debug(f'Replacing property {prop} with {value}')

if prop == 'project.groupId':
value = str(self._package.groupId)
elif prop == 'project.artifactId':
value = str(self._package.artifactId)
elif prop == 'project.version':
value = str(self._package.version)
else:
value = prop_replace(self.get_property(prop))

logger.debug(f'{self._package}: Replacing property {prop} with {value}')
return value

def prop_replace(text) -> str:
return re.sub(
r'\$\{([^\}]*)\}',
lookup_prop,
text,
)


return Package(
*[
re.sub(
r'\$\{([^\}]*)\}',
prop_replace,
prop_replace(
elem.text or '' if (elem := dep.find(tag, ns)) is not None else '',
)

@@ -146,7 +111,7 @@ class Package:
_pom: PackagePOM | None = None
_verified: bool = False

def __init__(self, groupId: str, artifactId: str, version: str = None):
def __init__(self, groupId: str, artifactId: str, version: str | None = None):
self.groupId = groupId
self.artifactId = artifactId
self.version = version
@@ -186,8 +151,10 @@ class Package:
logger.debug(f'{self}: {extension} downloaded')
return await response.text()
break
elif response.status == 429:
logger.error(f'{self}: HTTP error 429 (Too many requests). Retry after {response.headers["Retry-After"]}')
else:
logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}')
logger.error(f'{self}: HTTP error {response.status} from mirror {mirror}')
else:
logger.warning(f'{self}: File download of {extension} failed for all mirrors')
return None
@@ -228,14 +195,14 @@ class Package:
self._verified = True
if self.version is None:
version = message['response']['docs'][0]['latestVersion']
logger.debug(f'{self}: Using newest version {version}')
self.version = version
logger.debug(f'{self}: Using newest version {version}')
else:
logger.warning(f'{self}: No matching packages found')
self._verified = False
else:
self._verified = False
logger.warning(f'{self}: HTTP error {response.status} downloading pom')
logger.error(f'{self}: HTTP error {response.status} downloading pom')

async def verify(self) -> bool:
if not self._verified:
@@ -270,26 +237,19 @@ async def download(package: Package, queue: asyncio.Queue) -> None:
if skip:
logger.info(f'{package}: Already downloaded. Skipping.')
elif await package.verify():
async with done_lock:
done.add(str(package))

pom_dir = base_pom_path / f'{package.groupId}-{package.artifactId}-{package.version}'
pom_path = pom_dir / 'pom.xml'

pom_dir.mkdir(exist_ok=True)

pom = await package.pom

if not pom:
return

pom.write(pom_path)
logger.info(f'{package}: Downloaded')
if pom:
logger.info(f'{package}: Done')
async with done_lock:
for p in pom.packages:
if not p.version:
logger.warning(f'{p}: No version found!')

if not pom.is_bom:
for dep in pom.dependency_management:
logger.info(f'{package}: Handling transitive dependency {dep}')
await queue.put(dep)
logger.debug(f'{p}: Adding from BOM')
done.add(str(p))
else:
logger.warning(f'{package}: No POM for package')
else:
logger.warning(f'{package}: Package not found. Check package name and internet connection')

@@ -298,6 +258,7 @@ async def worker(queue: asyncio.Queue) -> None:
while True:
package = await queue.get()
await download(package, queue)
await asyncio.sleep(random.random())
queue.task_done()


@@ -323,8 +284,11 @@ async def main() -> None:

await asyncio.gather(*tasks, return_exceptions=True)

logger.info('Generating master POM')
subprocess.call(['sh', 'generate_master_pom.sh'])
logger.info('Generating list of all packages')
async with done_lock:
with open(output_path, 'w') as f:
for p in done:
f.write(p + '\n')


logger = logging.getLogger(__name__)
@@ -333,6 +297,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-w', '--workers', type=int, default=num_workers)
parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0)
parser.add_argument('-o', '--output', type=Path, default=Path('full-package-list.txt'))
args = parser.parse_args()

if args.verbosity == 0:
@@ -345,5 +310,6 @@ if __name__ == '__main__':
logging.basicConfig(level=log_level)

num_workers = args.workers
output_path = args.output

asyncio.run(main())

Cargando…
Cancelar
Guardar