Skip to content

Instantly share code, notes, and snippets.

@lmorchard
Created April 8, 2013 16:57
Show Gist options
  • Save lmorchard/5338423 to your computer and use it in GitHub Desktop.
Save lmorchard/5338423 to your computer and use it in GitHub Desktop.
This is the python scraper I use to push MDN chief log data to an MDN wiki page.
#!/usr/bin/env python
import sys
sys.path.append('/home/lorchard/.local/lib/python2.6/')
import json
import requests
from requests.auth import HTTPBasicAuth
from bs4 import BeautifulSoup
FIELD_NAMES=['time', 'who', 'commit', 'status']
LOG_URL="http://developeradm.private.scl3.mozilla.com/chief/developer.prod/history"
PUT_URL='https://developer.mozilla.org/en-US/docs/User:lmorchard/ChiefHistory'
KEY_ID="YOUR_KEY_HERE"
SECRET="YOUR_SECRET_HERE"
log_html = requests.get(LOG_URL).text
doc = BeautifulSoup(log_html)
log = []
rows = doc.find_all('tr')
for row in rows:
headers = row.find_all('th')
if (len(headers)):
continue
cells = row.find_all('td')
msg = dict([FIELD_NAMES[idx], cells[idx].get_text()]
for idx in range(0, len(FIELD_NAMES)))
log.append(msg)
requests.put(PUT_URL,
headers={'Content-Type':'text/html'},
auth=HTTPBasicAuth(KEY_ID, SECRET),
data=json.dumps(log))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment