Created
October 8, 2020 12:19
-
-
Save harrisoncramer/9e206bb90c438b65c9adb6a91d1474c7 to your computer and use it in GitHub Desktop.
SCOTUS_Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SCRIPT FOR SCRAPING SCOTUS HEARING TRANSCRIPT | |
# DATA COMPILED BY R STREET INSTITUTE: https://www.rstreet.org/2019/04/04/supreme-court-confirmation-hearing-transcripts-as-data/ | |
# This script parses the data and searches for key terms, and outputs another (results.csv) file, along with printing a summary | |
import csv | |
import sys | |
import re | |
def print_inventory(dct): | |
print("Mentions:") | |
for item, amount in dct.items(): # dct.iteritems() in Python 2 | |
print("{} ({})".format(item, amount)) | |
args = sys.argv | |
if len(args) < 3: | |
print("USAGE: python3 readCsv.py <FILENAME> <SEARCHTERM>") | |
raise Exception("Must provide file name, then search term.") | |
fileName = args[1] | |
term = args[2] | |
# Times mentioned by party | |
total = 0 | |
totalR = 0 | |
totalD = 0 | |
# Times mentioned per justice | |
justices = { | |
"Neil M. Gorsuch": 0, | |
"Elena Kagan": 0, | |
"Sonia Sotomayor": 0, | |
"Samuel A. Alito, Jr.": 0, | |
"John G. Roberts, Jr.": 0, | |
"Stephen G. Breyer": 0, | |
"Ruth Bader Ginsburg": 0, | |
"Clarence Thomas": 0, | |
"David H. Souter": 0, | |
"Anthony M. Kennedy": 0, | |
"William H. Rehnquist (to be Associate Justice) and Lewis F. Powell Jr.": 0, | |
"William H. Rehnquist (to be chief)": 0, | |
"Antonin Scalia": 0, | |
"Sandra Day O'Connor": 0, | |
"John Paul Stevens": 0, | |
"Judge Robert Bork": 0 | |
} | |
results = open('results.csv', 'w') | |
writer = csv.writer(results, delimiter=',', quotechar='"') | |
writer.writerow(['HEARING', 'SPEAKER', 'PARTY', 'COMMENT']) | |
with open(fileName, newline='', encoding="utf8", errors="ignore") as csvfile: | |
data = csv.reader(csvfile, delimiter=',', quotechar='"') | |
for row in data: | |
title = row[4] | |
comments = row[7] | |
if title == 'Senator' or title == 'Chairman': | |
x = re.findall(term, comments, re.IGNORECASE) | |
# If term is mentioned... | |
num = len(x) | |
if num > 0: | |
# Get the hearing, and add one to the appropriate key | |
hearing = row[3] | |
for key, value in justices.items(): | |
if key == hearing: | |
justices[key] = justices[key] + len(x) | |
# Log the party values | |
party = row[5] | |
name = row[6] | |
total = total + len(x) | |
values = [hearing, name, party, comments] | |
writer.writerow(values) | |
if party == 'D': | |
totalD = totalD + len(x) | |
else: | |
totalR = totalR + len(x) | |
print("-- SUMMARY --") | |
print("\n") | |
print(str(total) + ' mentions of ' + '"' + term + '"') | |
print("\n") | |
print("Total Dem Instances: " + str(totalD)) | |
print("Total GOP Instances: " + str(totalR)) | |
print("\n") | |
print_inventory(justices) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment