Skip to content

Instantly share code, notes, and snippets.

@grafuls
Last active August 29, 2015 14:15
Show Gist options
  • Save grafuls/913e379b52d5977479a0 to your computer and use it in GitHub Desktop.
Save grafuls/913e379b52d5977479a0 to your computer and use it in GitHub Desktop.
Ovirt feature status scrap
from bs4 import BeautifulSoup
from xtermcolor import colorize
import requests
import re
URL = 'http://www.ovirt.org'
FEATURES_PREFIX = "/Category:Feature"
def fetch_source(source_url):
source_response = requests.get(source_url)
return source_response.text
def get_status(status_url):
status_response = fetch_source(status_url)
status_soup = BeautifulSoup(status_response)
status_links = [a.attrs.get('href') for a in status_soup.select('div.mw-content-ltr a[href^=/Features]')]
for status_link in status_links:
try:
status_response = fetch_source(URL + status_link)
status_soup = BeautifulSoup(status_response)
b = status_soup.find('b', text = re.compile(ur'Status'))
status = b.nextSibling
print '\t', colorize(status_link, ansi=12), "| status{}".format(status.upper())
except AttributeError:
print '\t', colorize(status_link, ansi=196), "| status: N/A".format(status_link)
if __name__ == '__main__':
feature_response = fetch_source(URL + FEATURES_PREFIX)
feature_soup = BeautifulSoup(feature_response)
feature_links = [a.attrs.get('href') for a in feature_soup.select('a.external')]
for feature_link in feature_links:
print feature_link[21:]
get_status(feature_link)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment