Last active
August 29, 2015 14:15
-
-
Save grafuls/913e379b52d5977479a0 to your computer and use it in GitHub Desktop.
Ovirt feature status scrap
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
from xtermcolor import colorize | |
import requests | |
import re | |
URL = 'http://www.ovirt.org' | |
FEATURES_PREFIX = "/Category:Feature" | |
def fetch_source(source_url): | |
source_response = requests.get(source_url) | |
return source_response.text | |
def get_status(status_url): | |
status_response = fetch_source(status_url) | |
status_soup = BeautifulSoup(status_response) | |
status_links = [a.attrs.get('href') for a in status_soup.select('div.mw-content-ltr a[href^=/Features]')] | |
for status_link in status_links: | |
try: | |
status_response = fetch_source(URL + status_link) | |
status_soup = BeautifulSoup(status_response) | |
b = status_soup.find('b', text = re.compile(ur'Status')) | |
status = b.nextSibling | |
print '\t', colorize(status_link, ansi=12), "| status{}".format(status.upper()) | |
except AttributeError: | |
print '\t', colorize(status_link, ansi=196), "| status: N/A".format(status_link) | |
if __name__ == '__main__': | |
feature_response = fetch_source(URL + FEATURES_PREFIX) | |
feature_soup = BeautifulSoup(feature_response) | |
feature_links = [a.attrs.get('href') for a in feature_soup.select('a.external')] | |
for feature_link in feature_links: | |
print feature_link[21:] | |
get_status(feature_link) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment