Skip to content

Instantly share code, notes, and snippets.

@jaraco
Created September 5, 2017 15:34
Show Gist options
  • Save jaraco/e73a52b9cface23783b0fa0cd2e18ba4 to your computer and use it in GitHub Desktop.
Save jaraco/e73a52b9cface23783b0fa0cd2e18ba4 to your computer and use it in GitHub Desktop.
"""
Given a scrape of the HTML page for a Kiln project, download everything.
"""
__requires__ = [
'lxml',
]
import lxml.etree
parser = lxml.etree.HTMLParser()
with open('archive.html') as stream:
tree = lxml.etree.parse(stream, parser)
path_tmpl = "//div[contains(@class, 'repoDiv {type}')]//a[@class='main']/@href"
nodes = tree.xpath(path_tmpl.format(type='git'))
[
print(f'git clone --mirror https://yougov.kilnhg.com{node}.git')
for node in nodes
]
nodes = tree.xpath(path_tmpl.format(type='hg'))
[
print(f'hg clone --noupdate https://yougov.kilnhg.com{node}')
for node in nodes
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment