浏览代码

Update update-poms.py to output a list of dependencies

wip/coursier
父节点
当前提交
43e0ee230e
共有 1 个文件被更改,包括 51 次插入85 次删除
  1. +51
    -85
      update-poms.py

+ 51
- 85
update-poms.py 查看文件

@@ -1,6 +1,7 @@
#!/bin/python3 #!/bin/python3


import re import re
import random
import argparse import argparse
import logging import logging
import asyncio import asyncio
@@ -15,7 +16,7 @@ ns = {'': 'http://maven.apache.org/POM/4.0.0'}
ET.register_namespace('', ns['']) ET.register_namespace('', ns[''])


baseurl = 'https://search.maven.org' baseurl = 'https://search.maven.org'
base_pom_path = Path('poms')
output_path: Path = Path()
mirrors = [ mirrors = [
"https://repo.maven.apache.org/maven2", "https://repo.maven.apache.org/maven2",
"https://repo1.maven.org/maven2", "https://repo1.maven.org/maven2",
@@ -33,6 +34,7 @@ num_workers = 50
class PackagePOM: class PackagePOM:
def __init__(self, package: 'Package', pom: str): def __init__(self, package: 'Package', pom: str):
logger.debug(f'{package}: Parsing POM') logger.debug(f'{package}: Parsing POM')
self._package = package
self.raw_root = ET.fromstring(pom) self.raw_root = ET.fromstring(pom)


if (packaging := self.raw_root.find('packaging', ns)) is not None: if (packaging := self.raw_root.find('packaging', ns)) is not None:
@@ -43,64 +45,12 @@ class PackagePOM:
self.is_bom = self.packaging == 'pom' self.is_bom = self.packaging == 'pom'


if self.packaging == 'pom': if self.packaging == 'pom':
root_copy = copy.deepcopy(self.raw_root)
depman = root_copy.find('dependencyManagement', ns)
if depman is not None:
root_copy.extend(depman.findall('*'))
root_copy.remove(depman)

if (groupId := root_copy.find('groupId', ns)) is not None:
groupId.text = f'tmp.{package.groupId}'
else:
logger.warning(f"{package}: No groupId tag in pom")

if (artifactId := root_copy.find('groupId', ns)) is not None:
artifactId.text = f'placeholder.{package.artifactId}'
else:
logger.warning(f"{package}: No artifactId tag in pom")

# Add a dependency for the pom itself
if (dependencies := root_copy.find('dependencies', ns)) is not None:
self_dep = ET.SubElement(dependencies, 'dependency')
ET.SubElement(self_dep, 'groupId').text = package.groupId
ET.SubElement(self_dep, 'artifactId').text = package.artifactId
ET.SubElement(self_dep, 'version').text = package.version
else:
logger.warning(f"{package}: No dependencies tag in pom")

self.generated_root = root_copy
self.packages = [package, *self.dependency_management]
else: else:
self.generated_root = ET.fromstring(
f"""
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd"
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<modelVersion>4.0.0</modelVersion>
<groupId>tmp.{package.groupId}</groupId>
<artifactId>placeholder-{package.artifactId}</artifactId>
<version>{package.version}</version>
<name>Package {package.artifactId}</name>

<dependencies>
<dependency>
<groupId>{package.groupId}</groupId>
<artifactId>{package.artifactId}</artifactId>
<version>{package.version}</version>
</dependency>
</dependencies>
</project>
"""
)
self.packages = [package]


logger.debug(f'{package}: POM parsed') logger.debug(f'{package}: POM parsed')


def write(self, f):
tree = ET.ElementTree(self.generated_root)
ET.indent(tree)
tree.write(f)

def get_property(self, prop: str): def get_property(self, prop: str):
elem = self.raw_root.find(f'.//properties/{prop}', ns) elem = self.raw_root.find(f'.//properties/{prop}', ns)
if elem is not None: if elem is not None:
@@ -108,18 +58,33 @@ class PackagePOM:
else: else:
return None return None


def _package_from_xml_dep(self, dep: ET.Element):
def prop_replace(match):
def _package_from_xml_dep(self, dep: ET.Element) -> 'Package':
def lookup_prop(match) -> str:
prop = match.group(1) prop = match.group(1)
value = self.get_property(match.group(1))
logger.debug(f'Replacing property {prop} with {value}')

if prop == 'project.groupId':
value = str(self._package.groupId)
elif prop == 'project.artifactId':
value = str(self._package.artifactId)
elif prop == 'project.version':
value = str(self._package.version)
else:
value = prop_replace(self.get_property(prop))

logger.debug(f'{self._package}: Replacing property {prop} with {value}')
return value return value


def prop_replace(text) -> str:
return re.sub(
r'\$\{([^\}]*)\}',
lookup_prop,
text,
)


return Package( return Package(
*[ *[
re.sub(
r'\$\{([^\}]*)\}',
prop_replace,
prop_replace(
elem.text or '' if (elem := dep.find(tag, ns)) is not None else '', elem.text or '' if (elem := dep.find(tag, ns)) is not None else '',
) )


@@ -146,7 +111,7 @@ class Package:
_pom: PackagePOM | None = None _pom: PackagePOM | None = None
_verified: bool = False _verified: bool = False


def __init__(self, groupId: str, artifactId: str, version: str = None):
def __init__(self, groupId: str, artifactId: str, version: str | None = None):
self.groupId = groupId self.groupId = groupId
self.artifactId = artifactId self.artifactId = artifactId
self.version = version self.version = version
@@ -186,8 +151,10 @@ class Package:
logger.debug(f'{self}: {extension} downloaded') logger.debug(f'{self}: {extension} downloaded')
return await response.text() return await response.text()
break break
elif response.status == 429:
logger.error(f'{self}: HTTP error 429 (Too many requests). Retry after {response.headers["Retry-After"]}')
else: else:
logger.debug(f'{self}: HTTP error {response.status} from mirror {mirror}')
logger.error(f'{self}: HTTP error {response.status} from mirror {mirror}')
else: else:
logger.warning(f'{self}: File download of {extension} failed for all mirrors') logger.warning(f'{self}: File download of {extension} failed for all mirrors')
return None return None
@@ -228,14 +195,14 @@ class Package:
self._verified = True self._verified = True
if self.version is None: if self.version is None:
version = message['response']['docs'][0]['latestVersion'] version = message['response']['docs'][0]['latestVersion']
logger.debug(f'{self}: Using newest version {version}')
self.version = version self.version = version
logger.debug(f'{self}: Using newest version {version}')
else: else:
logger.warning(f'{self}: No matching packages found') logger.warning(f'{self}: No matching packages found')
self._verified = False self._verified = False
else: else:
self._verified = False self._verified = False
logger.warning(f'{self}: HTTP error {response.status} downloading pom')
logger.error(f'{self}: HTTP error {response.status} downloading pom')


async def verify(self) -> bool: async def verify(self) -> bool:
if not self._verified: if not self._verified:
@@ -270,26 +237,19 @@ async def download(package: Package, queue: asyncio.Queue) -> None:
if skip: if skip:
logger.info(f'{package}: Already downloaded. Skipping.') logger.info(f'{package}: Already downloaded. Skipping.')
elif await package.verify(): elif await package.verify():
async with done_lock:
done.add(str(package))

pom_dir = base_pom_path / f'{package.groupId}-{package.artifactId}-{package.version}'
pom_path = pom_dir / 'pom.xml'

pom_dir.mkdir(exist_ok=True)

pom = await package.pom pom = await package.pom


if not pom:
return

pom.write(pom_path)
logger.info(f'{package}: Downloaded')
if pom:
logger.info(f'{package}: Done')
async with done_lock:
for p in pom.packages:
if not p.version:
logger.warning(f'{p}: No version found!')


if not pom.is_bom:
for dep in pom.dependency_management:
logger.info(f'{package}: Handling transitive dependency {dep}')
await queue.put(dep)
logger.debug(f'{p}: Adding from BOM')
done.add(str(p))
else:
logger.warning(f'{package}: No POM for package')
else: else:
logger.warning(f'{package}: Package not found. Check package name and internet connection') logger.warning(f'{package}: Package not found. Check package name and internet connection')


@@ -298,6 +258,7 @@ async def worker(queue: asyncio.Queue) -> None:
while True: while True:
package = await queue.get() package = await queue.get()
await download(package, queue) await download(package, queue)
await asyncio.sleep(random.random())
queue.task_done() queue.task_done()




@@ -323,8 +284,11 @@ async def main() -> None:


await asyncio.gather(*tasks, return_exceptions=True) await asyncio.gather(*tasks, return_exceptions=True)


logger.info('Generating master POM')
subprocess.call(['sh', 'generate_master_pom.sh'])
logger.info('Generating list of all packages')
async with done_lock:
with open(output_path, 'w') as f:
for p in done:
f.write(p + '\n')




logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -333,6 +297,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-w', '--workers', type=int, default=num_workers) parser.add_argument('-w', '--workers', type=int, default=num_workers)
parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0) parser.add_argument('-v', '--verbose', dest='verbosity', action='count', default=0)
parser.add_argument('-o', '--output', type=Path, default=Path('full-package-list.txt'))
args = parser.parse_args() args = parser.parse_args()


if args.verbosity == 0: if args.verbosity == 0:
@@ -345,5 +310,6 @@ if __name__ == '__main__':
logging.basicConfig(level=log_level) logging.basicConfig(level=log_level)


num_workers = args.workers num_workers = args.workers
output_path = args.output


asyncio.run(main()) asyncio.run(main())

正在加载...
取消
保存