Skip to content

Instantly share code, notes, and snippets.

@vdavez
Created January 21, 2014 22:30
Show Gist options
  • Save vdavez/8549798 to your computer and use it in GitHub Desktop.
Save vdavez/8549798 to your computer and use it in GitHub Desktop.
The process by which I built the openlims list of enrolled bills
#!/usr/bin/env python
import re
import os
import glob
import json
import pymongo
from pymongo import MongoClient
import shutil
limsdb = json.load(open('./out.json','r'))
client = MongoClient('mongodb://user:[email protected]:10049/limsydb')
collection = client.limsydb.measures
for f in glob.glob('./pdfs/B*_Enr*.pdf'):
bNo = re.match('(B\d+\-\d{1,4})(\_)', os.path.basename(f)).group(1)
lNo = collection.find_one({"LegislationNo":bNo})['LawNo']
if (lNo != ''):
lNo = re.sub(r'L0',r'L',lNo)
lNo = re.sub(r'-(0){1,4}','-',lNo)
shutil.copy(f,'./enr/'+lNo+'.pdf')
#!/usr/bin/env python
import json
import re
import os
import urllib
f = open('./out.json','r')
measures = json.load(f)
f_log = open ('./pdfs/log.txt','a')
bname = "http://dcclims1.dccouncil.us"
for m in measures:
measure_id = m['LegislationNo']
for doc in m['versions']:
fname = './pdfs/' + measure_id + "_" + doc['version'] + '.pdf'
if (not os.path.exists(fname)):
url = re.sub('\.\.', bname, doc['url'])
try:
urllib.urlretrieve(url,fname)
f_log.write(fname + ' added.\n')
except:
f_log.write("ERROR: There was an error here for:" + fname + "url: " + url + '\n')
'''
Naming convention: "B12-0144_[version].pdf"
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment