Skip to content

Instantly share code, notes, and snippets.

@DieHertz
Created April 21, 2014 10:07
Show Gist options
  • Save DieHertz/11138283 to your computer and use it in GitHub Desktop.
Save DieHertz/11138283 to your computer and use it in GitHub Desktop.
arch-pkg-fake-fetcher.py
from htmldom import htmldom
import string
import re
import sys
import queue
def get_pkg_name(pkg_url):
return pkg_name_regex.search(pkg_url).group(1)
def fetch_with_deps():
while not pkg_queue.empty():
pkg_url = pkg_queue.get()
pkg_name = get_pkg_name(pkg_url)
if pkg_name in pkg_processed:
print(pkg_name, '- already processed')
continue
print(pkg_name)
pkg_processed.append(pkg_name)
pkg_address[pkg_name] = pkg_url + 'download/'
dom = htmldom.HtmlDom(base_url + pkg_url).createDom()
for dep in dom.find('#pkgdepslist > li'):
dep_url = dep.find('a').attr('href')
dep_html = dep.html();
if 'opt-dep' in dep_html or 'make-dep' in dep_html or 'check-dep' in dep_html:
continue
pkg_queue.put(dep_url)
print(' ', get_pkg_name(dep_url))
print()
for pkg in pkg_processed:
print('wget', base_url + pkg_address[pkg])
if len(sys.argv) != 2:
print('usage:\t', sys.argv[0], '<pkg-address>')
print('\t', sys.argv[0], '/packages/community/any/dkms/')
exit()
pkg_name_regex = re.compile('/([^/]*)/*$')
base_url = 'https://www.archlinux.org'
pkg_processed = []
pkg_queue = queue.Queue()
pkg_address = {}
pkg_queue.put(sys.argv[1])
fetch_with_deps()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment