Created
December 18, 2018 17:04
-
-
Save vdavez/5e6b232313e053b5a07d88e589083318 to your computer and use it in GitHub Desktop.
Parsing through the Federal Register
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from lxml import etree | |
import csv | |
import sys | |
titles = {} | |
for title in range(1,50): | |
titles[title] = 0 | |
for froot, dirs, files in os.walk('.', topdown=True): | |
for name in files: | |
if name.lower().endswith('.xml'): | |
fname = os.path.join(froot, name) # This is the filename for the XML file | |
# TODO: Check for Zero bytes... right now, I just deleted the file.... | |
root = etree.parse(fname) | |
for title in range(1,50): | |
rules = root.xpath("//REGTEXT[@TITLE='" + str(title) + "']/ancestor::RULE") | |
titles[title] = titles[title] + len(rules) | |
w = csv.writer(sys.stderr) | |
w.writerows(titles.items()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Data comes from the zip file here: https://www.govinfo.gov/bulkdata/FR