Skip to content

Instantly share code, notes, and snippets.

@eclipseo
Last active December 22, 2019 21:33
Show Gist options
  • Save eclipseo/fbc52aeebccb7f560221bd40ec28b6af to your computer and use it in GitHub Desktop.
Save eclipseo/fbc52aeebccb7f560221bd40ec28b6af to your computer and use it in GitHub Desktop.
Tool to register packages with Anitya
import aiohttp
import asyncio
import os
import sys
import time
from subprocess import CalledProcessError, check_output
# Anitya access token
TOKEN = ''
# LOGS
TODOFILE = 'Anitya_TODO_' + time.strftime('%Y-%m-%d_%H%M%S') + '.txt'
ADDEDFILE = 'Anitya_ADDED_' + time.strftime('%Y-%m-%d_%H%M%S') + '.txt'
LINKEDFILE = 'Anitya_LINKED_' + time.strftime('%Y-%m-%d_%H%M%S') + '.txt'
DONEFILE = 'Anitya_DONE_' + time.strftime('%Y-%m-%d_%H%M%S') + '.txt'
def get_data(pkg):
data = {}
spec = os.path.join(pkg, pkg.split('/')[-1] + '.spec')
try:
rpmspec = check_output(['rpmspec', '-P', spec],
universal_newlines=True).strip()
except CalledProcessError:
data['name'] = pkg
data['backend'] = 'unknown'
data['ecosystem'] = ''
return data
source = [
line.strip('/') for line in rpmspec.split('\n')
if 'Source:' in line or 'Source0:' in line
][0].split()[1]
name = [
line.strip('/') for line in rpmspec.split('\n') if 'Name:' in line
][0].split()[1]
try:
url = [
line.strip('/') for line in rpmspec.split('\n')
if 'URL:' in line or 'Url:' in line
][0].split()[1]
domain = source.split('/')[2]
except IndexError:
data['name'] = name
data['backend'] = 'unknown'
data['ecosystem'] = ''
return data
if 'crates.io' in domain:
data['backend'] = 'crates.io'
data['ecosystem'] = 'crates.io'
data['name'] = url.split('/')[4]
data['homepage'] = url
elif 'maven.org' in domain:
data['backend'] = 'Maven Central'
data['ecosystem'] = 'maven'
data['name'] = url.split('/')[-1]
data['homepage'] = url
elif 'npmjs.org' in domain:
data['backend'] = 'npmjs'
data['ecosystem'] = 'npm'
data['name'] = source.split('/')[3]
data['homepage'] = url
elif ('pythonhosted.org' in domain or 'pypi.python.org' in domain
or 'pypi.io' in domain or 'pypi.org' in domain):
data['backend'] = 'PyPI'
data['ecosystem'] = 'pypi'
data['name'] = source.split('/')[6]
data['homepage'] = url
elif 'rubygems.org' in domain:
data['backend'] = 'Rubygems'
data['ecosystem'] = 'rubygems'
data['name'] = '-'.join(
source.strip('/').split('/')[4].split('-')[:-1])
data['homepage'] = url
elif 'bitbucket.org' in domain:
data['backend'] = 'BitBucket'
data['name'] = source.split('/')[4]
data['homepage'] = 'https://' + '/'.join(source.split('/')[2:5])
data['ecosystem'] = data['homepage']
data['version_url'] = '/'.join(source.split('/')[3:5])
elif 'metacpan.org' in domain or 'cpan.org' in domain:
data['backend'] = 'CPAN (perl)'
data['name'] = url.split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'r-project.org' in domain:
data['backend'] = 'CRAN (R)'
data['name'] = '_'.join(
source.strip('/').split('/')[5].split('_')[:-1])
data['homepage'] = 'https://www.rforge.net/' + data['name']
data['ecosystem'] = data['homepage']
elif 'debian.org' in domain:
data['backend'] = 'Debian project'
data['name'] = source.strip('/').split('/')[7]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'drupal.org' in domain and 'drupal6' in name:
data['backend'] = 'Drupal6'
data['name'] = url.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'drupal.org' in domain and 'drupal7' in name:
data['backend'] = 'Drupal7'
data['name'] = url.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'freecode.com' in domain:
data['backend'] = 'Freshmeat'
data['name'] = url.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'github.com' in domain:
data['backend'] = 'GitHub'
data['name'] = source.split('/')[4]
data['homepage'] = 'https://' + '/'.join(source.split('/')[2:5])
data['ecosystem'] = data['homepage']
data['version_url'] = '/'.join(source.split('/')[3:5])
elif 'gitlab' in domain:
data['backend'] = 'GitLab'
data['name'] = source.split('-')[0].strip('/').split('/')[-1]
data['homepage'] = 'https://' + '/'.join(
url.split('-')[0].strip('/').split('/')[2:])
data['ecosystem'] = data['homepage']
elif 'gnome.org' in domain:
data['backend'] = 'GNOME'
data['name'] = source.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'gnu.org' in domain:
data['backend'] = 'GNU project'
data['name'] = source.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'google.com' in domain:
data['backend'] = 'Google code'
data['name'] = url.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'haskell.org' in domain:
data['backend'] = 'Hackage'
data['name'] = source.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'launchpad.net' in domain:
data['backend'] = 'Launchpad'
data['name'] = source.strip('/').split('/')[3]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'packagist.org' in domain:
data['backend'] = 'Packagist'
data['name'] = url.strip('/').split('/')[5]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'pagure.io' in domain:
data['backend'] = 'pagure'
data['name'] = source.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'pear.php.net' in domain:
data['backend'] = 'PEAR'
data['name'] = '-'.join(
source.strip('/').split('/')[4].split('-')[:-1])
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'pecl.php.net' in domain:
data['backend'] = 'PECL'
data['name'] = '-'.join(
source.strip('/').split('/')[4].split('-')[:-1])
data['homepage'] = url
data['ecosystem'] = data['homepage']
elif 'sourceforge.net' in domain:
# sourceforge is a mess. do them manually
data['name'] = name
data['backend'] = 'unknown'
data['ecosystem'] = url
elif 'stackage.org' in domain:
data['backend'] = 'Stackage'
data['name'] = url.strip('/').split('/')[4]
data['homepage'] = url
data['ecosystem'] = data['homepage']
else:
data['name'] = name
data['backend'] = 'unknown'
data['ecosystem'] = url
return data
def get_subdirectories(dir):
return [
os.path.join(dir, name) for name in os.listdir(dir)
if os.path.isdir(os.path.join(dir, name))
]
async def add(session, pkg, pkg_data):
pkgname = pkg.split('/')[-1]
if 'unknown' not in pkg_data['backend']:
apiurl = f'https://release-monitoring.org/api/v2/projects/'
pdata = {
'backend': pkg_data['backend'],
'homepage': pkg_data['homepage'],
'name': pkg_data['name'],
'check_release': True
}
if 'version_url' in pkg_data:
pdata['version_url'] = pkg_data['version_url']
async with session.post(
apiurl, json=pdata, headers={'Authorization':
'token ' + TOKEN}) as resp:
print(await resp.json())
print(f'{pkgname} added to Anitya.')
# resp.raise_for_status()
with open(ADDEDFILE, 'a') as file:
file.write(pkgname + '\n')
await link(session, pkg, pkg_data)
# Unknown backend: we add it to a TODO list for manual review
else:
print(f'{pkgname} to review manually.')
with open(TODOFILE, 'a') as file:
file.write(pkgname + '\n')
async def link(session, pkg, pkg_data):
pkgname = pkg.split('/')[-1]
apiurl = f'https://release-monitoring.org/api/v2/packages/'
pdata = {
'distribution': 'Fedora',
'package_name': pkgname,
'project_name': pkg_data['name'],
'project_ecosystem': pkg_data['ecosystem']
}
async with session.post(
apiurl, json=pdata, headers={'Authorization':
'token ' + TOKEN}) as resp:
print(await resp.json())
print(f'{pkgname} linked to Fedora.')
# resp.raise_for_status()
with open(LINKEDFILE, 'a') as file:
file.write(pkgname + '\n')
async def main(paths):
pkgs = [
pkg for pkg in paths
if os.path.exists(os.path.join(pkg,
pkg.split('/')[-1] + '.spec'))
]
for pkg in pkgs:
pkgname = pkg.split('/')[-1]
print(pkgname)
try:
async with aiohttp.ClientSession() as session:
# Check if the package is tracked *for Fedora*
apiurl = f'https://release-monitoring.org/api/project/fedora/{pkgname}'
async with session.get(apiurl) as resp:
try:
jsonresp = await resp.json()
except:
continue
if 'error' in jsonresp:
print(jsonresp['error'])
pkg_data = get_data(pkg)
# Check if the package is already tracked
if (pkg_data["backend"] == 'crates.io'
or pkg_data["backend"] == 'Maven Central'
or pkg_data["backend"] == 'npmjs'
or pkg_data["backend"] == 'PyPI'
or pkg_data["backend"] == 'Rubygems'):
apiurl = f'https://release-monitoring.org/api/v2/projects/?name={pkg_data["name"]}&ecosystem={pkg_data["ecosystem"]}'
else:
apiurl = f'https://release-monitoring.org/api/v2/projects/?name={pkg_data["name"]}'
async with session.get(apiurl) as resp:
try:
jsonresp = await resp.json()
except:
continue
# Not tracked
if jsonresp['total_items'] == 0:
await add(session, pkg, pkg_data)
else:
# There should be only one match for a given backend
if pkgname.startswith('python-'):
package_by_backend = [
p for p in jsonresp['items']
if p['backend'] == pkg_data["backend"]
or p['backend'] == 'PyPI'
]
elif pkgname.startswith('ghc-'):
package_by_backend = [
p for p in jsonresp['items']
if p['backend'] == pkg_data["backend"]
or p['backend'] == 'Stackage'
or p['backend'] == 'Hackage'
]
elif pkgname.startswith('nodejs-'):
package_by_backend = [
p for p in jsonresp['items']
if p['backend'] == pkg_data["backend"]
or p['backend'] == 'npmjs'
]
elif pkgname.startswith('php-'):
package_by_backend = [
p for p in jsonresp['items']
if p['backend'] == pkg_data["backend"]
or p['backend'] == 'Packagist'
or p['backend'] == 'PEAR'
or p['backend'] == 'PECL'
]
else:
package_by_backend = [
p for p in jsonresp['items']
if p['backend'] == pkg_data["backend"]
]
if len(package_by_backend) == 0:
# Not tracked
await add(session, pkg, pkg_data)
elif len(package_by_backend) == 1:
# tracked: we link it to the Fedora package
pkg_data["ecosystem"] = package_by_backend[
0]["ecosystem"]
await link(session, pkg, pkg_data)
else:
# Should not happen: we add it to a TODO list for manual review
print(f'{pkgname} to review manually.')
with open(TODOFILE, 'a') as file:
file.write(pkgname + '\n')
else:
print(f'{pkgname} is already set up.')
with open(DONEFILE, 'a') as file:
file.write(pkgname + '\n')
except (asyncio.TimeoutError, aiohttp.ServerDisconnectedError,
aiohttp.ClientResponseError,
aiohttp.ClientConnectorError) as err:
print(f'Client disconnected: {err}')
await main([pkg])
if __name__ == '__main__':
if len(sys.argv) > 1 and os.path.isdir(sys.argv[1]):
with open(TODOFILE, 'a') as file:
file.write('Packages to add to Anitya manually' + '\n')
file.write('==================================' + '\n')
with open(ADDEDFILE, 'a') as file:
file.write('Projects added to Anitya' + '\n')
file.write('========================' + '\n')
with open(LINKEDFILE, 'a') as file:
file.write('Packages linked to Fedora' + '\n')
file.write('=========================' + '\n')
with open(DONEFILE, 'a') as file:
file.write('Packages already linked to Fedora' + '\n')
file.write('=========================' + '\n')
asyncio.run(main(get_subdirectories(sys.argv[1])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment