Skip to content

Instantly share code, notes, and snippets.

@phred
Created November 1, 2012 18:10
Show Gist options
  • Save phred/3995467 to your computer and use it in GitHub Desktop.
Save phred/3995467 to your computer and use it in GitHub Desktop.
Extract Ubuntu Vulnerabilities into a easy to copy/paste text form
#!/usr/bin/env python
# Usage: python extract_vulns.py <url>
# Bog-simple screen scraping, will fail unless URL is something like this:
# http://www.ubuntu.com/usn/lucid/
# http://www.ubuntu.com/usn/lucid/?page=3
#
from BeautifulSoup import BeautifulSoup
import requests, time, sys
def extract_date(tag):
dmy = tag.text.split(' ')[-3:]
dmy[0] = "".join([c for c in dmy[0] if unicode.isnumeric(c)]) # strip ordinal
time_str = "/".join(dmy)
return time.strptime(time_str, "%d/%B/%Y") # for example, "3/May/2012"
def extract_content(soup):
the_good_stuff = soup.find('div', { 'id': 'content' }).findAll('h3')
return the_good_stuff[2:]
def extract_title(tag):
return tag.find('a').text
def extract_vulns(soup):
content = extract_content(soup)
vulns = [(extract_date(tag), extract_title(tag)) for tag in content]
vulns.reverse()
return vulns
def scrape_vulns(url):
resp = requests.get(url)
soup = BeautifulSoup(resp.text)
return extract_vulns(soup)
def format_vulns(vulns):
return "\n".join([time.strftime("%m/%d/%Y", v[0]) + "\t" + v[1] for v in vulns])
if __name__ == '__main__':
vulns = scrape_vulns(sys.argv[1])
print(format_vulns(vulns))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment