harrisoncramer · October 8, 2020 12:19
diff --git a/parseCsv.py b/parseCsv.py
 # SCRIPT FOR SCRAPING SCOTUS HEARING TRANSCRIPT
 # DATA COMPILED BY R STREET INSTITUTE: https://www.rstreet.org/2019/04/04/supreme-court-confirmation-hearing-transcripts-as-data/
 # This script parses the data and searches for key terms, and outputs another (results.csv) file, along with printing a summary

 import csv
 import sys
 import re

 def print_inventory(dct):
    print("Mentions:")
    for item, amount in dct.items():  # dct.iteritems() in Python 2
        print("{} ({})".format(item, amount))

 args = sys.argv

 if len(args) < 3:
    print("USAGE: python3 readCsv.py <FILENAME> <SEARCHTERM>")
    raise Exception("Must provide file name, then search term.")

 fileName = args[1]
 term = args[2]

 # Times mentioned by party
 total = 0
 totalR = 0
 totalD = 0

 # Times mentioned per justice
 justices = {
   "Neil M. Gorsuch": 0,
   "Elena Kagan":  0,
   "Sonia Sotomayor": 0,
   "Samuel A. Alito, Jr.": 0,
   "John G. Roberts, Jr.": 0,
   "Stephen G. Breyer": 0,
   "Ruth Bader Ginsburg": 0,
   "Clarence Thomas": 0,
   "David H. Souter": 0,
   "Anthony M. Kennedy": 0,
   "William H. Rehnquist (to be Associate Justice) and Lewis F. Powell Jr.": 0,
   "William H. Rehnquist (to be chief)": 0,
   "Antonin Scalia": 0,
   "Sandra Day O'Connor": 0,
   "John Paul Stevens": 0,
   "Judge Robert Bork": 0
 }

 results = open('results.csv', 'w')
 writer = csv.writer(results, delimiter=',', quotechar='"')
 writer.writerow(['HEARING', 'SPEAKER', 'PARTY', 'COMMENT'])

 with open(fileName, newline='', encoding="utf8", errors="ignore") as csvfile:
    data = csv.reader(csvfile, delimiter=',', quotechar='"')
    for row in data:
        title = row[4]
        comments = row[7]
        if title == 'Senator' or title == 'Chairman':
            x = re.findall(term, comments, re.IGNORECASE)
            # If term is mentioned...
            num = len(x)
            if num > 0:
                # Get the hearing, and add one to the appropriate key
                hearing = row[3]
                for key, value in justices.items():
                    if key == hearing:
                        justices[key] = justices[key] + len(x)

                # Log the party values
                party = row[5]
                name = row[6]
                total = total + len(x)
                values = [hearing, name, party, comments]
                writer.writerow(values)
                if party == 'D':
                    totalD = totalD + len(x)
                else:
                     totalR = totalR + len(x)

 print("-- SUMMARY --")
 print("\n")
 print(str(total) + ' mentions of ' + '"' + term + '"')
 print("\n")
 print("Total Dem Instances: " + str(totalD))
 print("Total GOP Instances: " + str(totalR))
 print("\n")
 print_inventory(justices)
	# SCRIPT FOR SCRAPING SCOTUS HEARING TRANSCRIPT
	# DATA COMPILED BY R STREET INSTITUTE: https://www.rstreet.org/2019/04/04/supreme-court-confirmation-hearing-transcripts-as-data/
	# This script parses the data and searches for key terms, and outputs another (results.csv) file, along with printing a summary

	import csv
	import sys
	import re

	def print_inventory(dct):
	print("Mentions:")
	for item, amount in dct.items(): # dct.iteritems() in Python 2
	print("{} ({})".format(item, amount))

	args = sys.argv

	if len(args) < 3:
	print("USAGE: python3 readCsv.py <FILENAME> <SEARCHTERM>")
	raise Exception("Must provide file name, then search term.")

	fileName = args[1]
	term = args[2]

	# Times mentioned by party
	total = 0
	totalR = 0
	totalD = 0

	# Times mentioned per justice
	justices = {
	"Neil M. Gorsuch": 0,
	"Elena Kagan": 0,
	"Sonia Sotomayor": 0,
	"Samuel A. Alito, Jr.": 0,
	"John G. Roberts, Jr.": 0,
	"Stephen G. Breyer": 0,
	"Ruth Bader Ginsburg": 0,
	"Clarence Thomas": 0,
	"David H. Souter": 0,
	"Anthony M. Kennedy": 0,
	"William H. Rehnquist (to be Associate Justice) and Lewis F. Powell Jr.": 0,
	"William H. Rehnquist (to be chief)": 0,
	"Antonin Scalia": 0,
	"Sandra Day O'Connor": 0,
	"John Paul Stevens": 0,
	"Judge Robert Bork": 0
	}

	results = open('results.csv', 'w')
	writer = csv.writer(results, delimiter=',', quotechar='"')
	writer.writerow(['HEARING', 'SPEAKER', 'PARTY', 'COMMENT'])

	with open(fileName, newline='', encoding="utf8", errors="ignore") as csvfile:
	data = csv.reader(csvfile, delimiter=',', quotechar='"')
	for row in data:
	title = row[4]
	comments = row[7]
	if title == 'Senator' or title == 'Chairman':
	x = re.findall(term, comments, re.IGNORECASE)
	# If term is mentioned...
	num = len(x)
	if num > 0:
	# Get the hearing, and add one to the appropriate key
	hearing = row[3]
	for key, value in justices.items():
	if key == hearing:
	justices[key] = justices[key] + len(x)

	# Log the party values
	party = row[5]
	name = row[6]
	total = total + len(x)
	values = [hearing, name, party, comments]
	writer.writerow(values)
	if party == 'D':
	totalD = totalD + len(x)
	else:
	totalR = totalR + len(x)

	print("-- SUMMARY --")
	print("\n")
	print(str(total) + ' mentions of ' + '"' + term + '"')
	print("\n")
	print("Total Dem Instances: " + str(totalD))
	print("Total GOP Instances: " + str(totalR))
	print("\n")
	print_inventory(justices)