Skip to content

Instantly share code, notes, and snippets.

@harrisoncramer
Created October 8, 2020 12:19
Show Gist options
  • Save harrisoncramer/9e206bb90c438b65c9adb6a91d1474c7 to your computer and use it in GitHub Desktop.
Save harrisoncramer/9e206bb90c438b65c9adb6a91d1474c7 to your computer and use it in GitHub Desktop.
SCOTUS_Analysis
# SCRIPT FOR SCRAPING SCOTUS HEARING TRANSCRIPT
# DATA COMPILED BY R STREET INSTITUTE: https://www.rstreet.org/2019/04/04/supreme-court-confirmation-hearing-transcripts-as-data/
# This script parses the data and searches for key terms, and outputs another (results.csv) file, along with printing a summary
import csv
import sys
import re
def print_inventory(dct):
print("Mentions:")
for item, amount in dct.items(): # dct.iteritems() in Python 2
print("{} ({})".format(item, amount))
args = sys.argv
if len(args) < 3:
print("USAGE: python3 readCsv.py <FILENAME> <SEARCHTERM>")
raise Exception("Must provide file name, then search term.")
fileName = args[1]
term = args[2]
# Times mentioned by party
total = 0
totalR = 0
totalD = 0
# Times mentioned per justice
justices = {
"Neil M. Gorsuch": 0,
"Elena Kagan": 0,
"Sonia Sotomayor": 0,
"Samuel A. Alito, Jr.": 0,
"John G. Roberts, Jr.": 0,
"Stephen G. Breyer": 0,
"Ruth Bader Ginsburg": 0,
"Clarence Thomas": 0,
"David H. Souter": 0,
"Anthony M. Kennedy": 0,
"William H. Rehnquist (to be Associate Justice) and Lewis F. Powell Jr.": 0,
"William H. Rehnquist (to be chief)": 0,
"Antonin Scalia": 0,
"Sandra Day O'Connor": 0,
"John Paul Stevens": 0,
"Judge Robert Bork": 0
}
results = open('results.csv', 'w')
writer = csv.writer(results, delimiter=',', quotechar='"')
writer.writerow(['HEARING', 'SPEAKER', 'PARTY', 'COMMENT'])
with open(fileName, newline='', encoding="utf8", errors="ignore") as csvfile:
data = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in data:
title = row[4]
comments = row[7]
if title == 'Senator' or title == 'Chairman':
x = re.findall(term, comments, re.IGNORECASE)
# If term is mentioned...
num = len(x)
if num > 0:
# Get the hearing, and add one to the appropriate key
hearing = row[3]
for key, value in justices.items():
if key == hearing:
justices[key] = justices[key] + len(x)
# Log the party values
party = row[5]
name = row[6]
total = total + len(x)
values = [hearing, name, party, comments]
writer.writerow(values)
if party == 'D':
totalD = totalD + len(x)
else:
totalR = totalR + len(x)
print("-- SUMMARY --")
print("\n")
print(str(total) + ' mentions of ' + '"' + term + '"')
print("\n")
print("Total Dem Instances: " + str(totalD))
print("Total GOP Instances: " + str(totalR))
print("\n")
print_inventory(justices)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment