Created
November 1, 2012 18:10
-
-
Save phred/3995467 to your computer and use it in GitHub Desktop.
Extract Ubuntu Vulnerabilities into a easy to copy/paste text form
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Usage: python extract_vulns.py <url> | |
# Bog-simple screen scraping, will fail unless URL is something like this: | |
# http://www.ubuntu.com/usn/lucid/ | |
# http://www.ubuntu.com/usn/lucid/?page=3 | |
# | |
from BeautifulSoup import BeautifulSoup | |
import requests, time, sys | |
def extract_date(tag): | |
dmy = tag.text.split(' ')[-3:] | |
dmy[0] = "".join([c for c in dmy[0] if unicode.isnumeric(c)]) # strip ordinal | |
time_str = "/".join(dmy) | |
return time.strptime(time_str, "%d/%B/%Y") # for example, "3/May/2012" | |
def extract_content(soup): | |
the_good_stuff = soup.find('div', { 'id': 'content' }).findAll('h3') | |
return the_good_stuff[2:] | |
def extract_title(tag): | |
return tag.find('a').text | |
def extract_vulns(soup): | |
content = extract_content(soup) | |
vulns = [(extract_date(tag), extract_title(tag)) for tag in content] | |
vulns.reverse() | |
return vulns | |
def scrape_vulns(url): | |
resp = requests.get(url) | |
soup = BeautifulSoup(resp.text) | |
return extract_vulns(soup) | |
def format_vulns(vulns): | |
return "\n".join([time.strftime("%m/%d/%Y", v[0]) + "\t" + v[1] for v in vulns]) | |
if __name__ == '__main__': | |
vulns = scrape_vulns(sys.argv[1]) | |
print(format_vulns(vulns)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment