banterability · October 7, 2010 22:22
diff --git a/eml_parser.py b/eml_parser.py
 import os
 import shutil
 import urllib2
 import zipfile
 from lxml import etree  # requires lxml & friends

 # TODO:
 # - Total vote counts
 # - County-level results

 DOWNLOAD_URL = "http://www.sos.ca.gov/media/10gg/november2010-sample-xml.zip"
 DATA_FILE = "X10GG_510.xml"
 TMP_DIR = "election_tmp"

 CONTEST_TYPES = {
    '0200': {'description': 'Governor', 'type': 'candidate'},
    '0300': {'description': 'Lieutenant Governor', 'type': 'candidate'},
    '0400': {'description': 'Secretary of State', 'type': 'candidate'},
    '0500': {'description': 'State Controller', 'type': 'candidate'},
    '0600': {'description': 'State Treasurer', 'type': 'candidate'},
    '0700': {'description': 'Attorney General', 'type': 'candidate'},
    '0800': {'description': 'Insurance Commissioner', 'type': 'candidate'},
    '0900': {'description': 'Board of Equalization', 'type': 'candidate'},
    '1000': {'description': 'U.S. Senate', 'type': 'candidate'},
    '1100': {'description': 'U.S. Representative in Congress', 'type': 'candidate'},
    '1200': {'description': 'State Senate', 'type': 'candidate'},
    '1300': {'description': 'State Assembly', 'type': 'candidate'},
    '1400': {'description': 'Supreme Court Justices', 'type': 'measure'},
    '1500': {'description': 'Courts of Appeal Justices', 'type': 'measure'},
    '1600': {'description': 'Superintendent of Public Instruction', 'type': 'candidate'},
    '1900': {'description': 'Ballot Measures', 'type': 'measure'},
 }

 #### Helpers ####


 def download_file(url):
    os.mkdir(TMP_DIR)
    file_name = url.split('/')[-1]
    request = urllib2.Request(url)
    opener = urllib2.build_opener()
    request.add_header('User-Agent', 'kpcc-hancock/1.0 +http://www.scpr.org')
    remote_file = opener.open(request)
    local_file = open(os.path.join(TMP_DIR, file_name), 'w')
    local_file.write(remote_file.read())
    local_file.close()
    remote_file.close()
    return os.path.join(TMP_DIR, file_name)


 def unzip_and_extract(data_archive):
    f = open(data_archive, 'r')
    zfobj = zipfile.ZipFile(f)
    for name in zfobj.namelist():
        if name == DATA_FILE:
            outfile = open(os.path.join(TMP_DIR, name), 'wb')
            outfile.write(zfobj.read(name))
            outfile.close()
    f.close()


 def process_candidate(contest_package):
    """
    Handle a contest with candidates.
    TODO: Fix return type.
    """
    contest = contest_package["contest"]
    print "%s:" % contest_package["name"]

    # Get all available candidates
    selections = contest.findall('TotalVotes/Selection')
    for s in selections:
        cname = s.findtext("Candidate/CandidateIdentifier/CandidateName")
        # Party affiliation
        cid = s.findtext("Candidate/Affiliation/Type")

        # TODO: Get incumbancy status

        print "  %s (%s)" % (cname, cid)

        # Get vote count for this candidate
        votes = s.findtext("ValidVotes")

        # Get percentage of vote in this contest for this candidate
        pv = s.findtext("CountMetric[@Id='PVR']")
        print "   - %s%% (%s)" % (pv, votes)


 def process_measure(contest_package):
    """
    Handle a contest with yes/no options.
    TODO: Fix return type.
    """
    contest = contest_package["contest"]
    # If contest is a prop, extract the proposition number from the contest ID
    if contest_package["info_dict"]["description"] == "Ballot Measures":
        print "Prop %s: %s" % (
            int(contest_package["id"][7:12]),
            contest_package["name"])
    else:
        print "%s:" % contest_package["name"]

    # Get percentage of yes & no votes in this contest
    pyv = contest.findtext('TotalVotes/CountMetric[@Id="PYV"]')
    pnv = contest.findtext('TotalVotes/CountMetric[@Id="PNV"]')

    # Get all available responses (Should just be yes and no)
    selections = contest.findall('TotalVotes/Selection')

    for s in selections:
        cname = s.find("Candidate/ProposalItem").get("ReferendumOptionIdentifier")
        print "  %s" % cname

        votes = s.findtext("ValidVotes")

        # Check option identifier and associate with vote percentages
        if cname == "Yes":
            use_var = pyv
        if cname == "No":
            use_var = pnv
        print "   - %s%% (%s)" % (use_var, votes)

 #### Main #####

 # get the file
 unzip_and_extract(download_file(DOWNLOAD_URL))  # download data, unzip, and extract xml

 response = open(os.path.join(TMP_DIR, DATA_FILE), 'r')

 results = etree.fromstring(response.read())

 response.close()
 shutil.rmtree(TMP_DIR)  # erase working directory

 contests = results.findall('.//Contests/Contest')

 for contest in contests:
    # Extract the contest ID to decide how to handle things
    contest_id = contest.find('ContestIdentifier').get("Id", "Not defined")
    # Compare the first four digits against mapping dictionary
    contest_type_dict = CONTEST_TYPES[contest_id[0:4]]
    contest_type = contest_type_dict["type"]

    # Package contest plus previously accessed data for helper functions
    contest_package = {
        'contest': contest,
        'id': contest_id,
        'info_dict': contest_type_dict,
        'name': contest.findtext('ContestIdentifier/ContestName')}

    # Hand off...
    if contest_type == "measure":
        process_measure(contest_package)
    elif contest_type == "candidate":
        process_candidate(contest_package)
    print "\n"
	import os
	import shutil
	import urllib2
	import zipfile
	from lxml import etree # requires lxml & friends

	# TODO:
	# - Total vote counts
	# - County-level results

	DOWNLOAD_URL = "http://www.sos.ca.gov/media/10gg/november2010-sample-xml.zip"
	DATA_FILE = "X10GG_510.xml"
	TMP_DIR = "election_tmp"

	CONTEST_TYPES = {
	'0200': {'description': 'Governor', 'type': 'candidate'},
	'0300': {'description': 'Lieutenant Governor', 'type': 'candidate'},
	'0400': {'description': 'Secretary of State', 'type': 'candidate'},
	'0500': {'description': 'State Controller', 'type': 'candidate'},
	'0600': {'description': 'State Treasurer', 'type': 'candidate'},
	'0700': {'description': 'Attorney General', 'type': 'candidate'},
	'0800': {'description': 'Insurance Commissioner', 'type': 'candidate'},
	'0900': {'description': 'Board of Equalization', 'type': 'candidate'},
	'1000': {'description': 'U.S. Senate', 'type': 'candidate'},
	'1100': {'description': 'U.S. Representative in Congress', 'type': 'candidate'},
	'1200': {'description': 'State Senate', 'type': 'candidate'},
	'1300': {'description': 'State Assembly', 'type': 'candidate'},
	'1400': {'description': 'Supreme Court Justices', 'type': 'measure'},
	'1500': {'description': 'Courts of Appeal Justices', 'type': 'measure'},
	'1600': {'description': 'Superintendent of Public Instruction', 'type': 'candidate'},
	'1900': {'description': 'Ballot Measures', 'type': 'measure'},
	}

	#### Helpers ####


	def download_file(url):
	os.mkdir(TMP_DIR)
	file_name = url.split('/')[-1]
	request = urllib2.Request(url)
	opener = urllib2.build_opener()
	request.add_header('User-Agent', 'kpcc-hancock/1.0 +http://www.scpr.org')
	remote_file = opener.open(request)
	local_file = open(os.path.join(TMP_DIR, file_name), 'w')
	local_file.write(remote_file.read())
	local_file.close()
	remote_file.close()
	return os.path.join(TMP_DIR, file_name)


	def unzip_and_extract(data_archive):
	f = open(data_archive, 'r')
	zfobj = zipfile.ZipFile(f)
	for name in zfobj.namelist():
	if name == DATA_FILE:
	outfile = open(os.path.join(TMP_DIR, name), 'wb')
	outfile.write(zfobj.read(name))
	outfile.close()
	f.close()


	def process_candidate(contest_package):
	"""
	Handle a contest with candidates.
	TODO: Fix return type.
	"""
	contest = contest_package["contest"]
	print "%s:" % contest_package["name"]

	# Get all available candidates
	selections = contest.findall('TotalVotes/Selection')
	for s in selections:
	cname = s.findtext("Candidate/CandidateIdentifier/CandidateName")
	# Party affiliation
	cid = s.findtext("Candidate/Affiliation/Type")

	# TODO: Get incumbancy status

	print " %s (%s)" % (cname, cid)

	# Get vote count for this candidate
	votes = s.findtext("ValidVotes")

	# Get percentage of vote in this contest for this candidate
	pv = s.findtext("CountMetric[@Id='PVR']")
	print " - %s%% (%s)" % (pv, votes)


	def process_measure(contest_package):
	"""
	Handle a contest with yes/no options.
	TODO: Fix return type.
	"""
	contest = contest_package["contest"]
	# If contest is a prop, extract the proposition number from the contest ID
	if contest_package["info_dict"]["description"] == "Ballot Measures":
	print "Prop %s: %s" % (
	int(contest_package["id"][7:12]),
	contest_package["name"])
	else:
	print "%s:" % contest_package["name"]

	# Get percentage of yes & no votes in this contest
	pyv = contest.findtext('TotalVotes/CountMetric[@Id="PYV"]')
	pnv = contest.findtext('TotalVotes/CountMetric[@Id="PNV"]')

	# Get all available responses (Should just be yes and no)
	selections = contest.findall('TotalVotes/Selection')

	for s in selections:
	cname = s.find("Candidate/ProposalItem").get("ReferendumOptionIdentifier")
	print " %s" % cname

	votes = s.findtext("ValidVotes")

	# Check option identifier and associate with vote percentages
	if cname == "Yes":
	use_var = pyv
	if cname == "No":
	use_var = pnv
	print " - %s%% (%s)" % (use_var, votes)

	#### Main #####

	# get the file
	unzip_and_extract(download_file(DOWNLOAD_URL)) # download data, unzip, and extract xml

	response = open(os.path.join(TMP_DIR, DATA_FILE), 'r')

	results = etree.fromstring(response.read())

	response.close()
	shutil.rmtree(TMP_DIR) # erase working directory

	contests = results.findall('.//Contests/Contest')

	for contest in contests:
	# Extract the contest ID to decide how to handle things
	contest_id = contest.find('ContestIdentifier').get("Id", "Not defined")
	# Compare the first four digits against mapping dictionary
	contest_type_dict = CONTEST_TYPES[contest_id[0:4]]
	contest_type = contest_type_dict["type"]

	# Package contest plus previously accessed data for helper functions
	contest_package = {
	'contest': contest,
	'id': contest_id,
	'info_dict': contest_type_dict,
	'name': contest.findtext('ContestIdentifier/ContestName')}

	# Hand off...
	if contest_type == "measure":
	process_measure(contest_package)
	elif contest_type == "candidate":
	process_candidate(contest_package)
	print "\n"
No results found