Created
April 16, 2016 02:22
-
-
Save mandlar/26817665c3c9b935785caf728edf1ff6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
def social_data(var): | |
if var == '': | |
var = 0 # assigns value of zero when not present in scraped data | |
else: | |
var = var.replace(',', '') # replaces commas | |
return float(var) # converts string to float #No need to change any part of thise function | |
def clean_social_data(fin, n, o, p): #Multiple arguments? I need to read 3 different columns in the csv file at one time | |
""" Creates a Python list with values from social data column. | |
fin: file in | |
n: column index | |
return: list with column name as first item, followed by values | |
""" | |
with open(fin, newline='') as csvfile_in: | |
datareader = csv.reader(csvfile_in) #Reads it, all good | |
title = next(datareader)[n] # gets column title, then skips headers line | |
data = [title] #This is where things get gnarly. I need a way to basically do all of this but with the specific counts I mentioned | |
for line in datareader: | |
data.append(social_data(line[n])) | |
for line in datareader: | |
data.append(social_data(line[o])) | |
for line in datareader: | |
data.append(social_data(line[p])) # A placeholder for now, not sure if any of that is right or if I'm on the right track. | |
return data | |
f = '2015_buzz_scrape_Jan.csv' #File name | |
data = clean_social_data(f, 8, 9, 11) # Call all arguments at once? | |
# pseudo c# code | |
var oneShare = 0; | |
var oneHundredShares = 0; | |
var oneThousandShares = 0; | |
var tenThousandShares = 0; | |
var totalFacebookShares = 0; | |
var totalTwitterShares = 0; | |
var totalEmailShares = 0; | |
for each (var row in data) | |
{ | |
totalFacebookShares = totalFacebookShares + row.get("facebookShares"); # add this row's facebook count to the running total | |
totalTwitterShares = totalTwitterShares + row.get("twitterShares"); | |
totalEmailShares = totalEmailShares + row.get("emailShares"); | |
if(row.get("facebookShares") >= 1) | |
{ | |
oneShare = oneShares + 1; #increment counter by one | |
} | |
if(row.get("facebookShares") >= 100) | |
{ | |
oneHundredShares = oneHundredShares + 1; | |
} | |
if(row.get("facebookShares") >= 1000) | |
{ | |
oneThousandShares = oneThousandShares + 1; | |
} | |
if(row.get("facebookShares") >= 10000) | |
{ | |
tenThousandShares = tenThousandShares + 1; | |
} | |
# repeat the above for twitter and email shares, e.g. | |
if(row.get("twitterShares") >= 1) | |
{ | |
oneShare = oneShare + 1; | |
} | |
# etc. it could probably be pulled out into a function if you don't want to repeat it three times over | |
} | |
print(totalFacebookShares) | |
print(totalTwitterShares) | |
print(totalEmailShares) | |
print(oneShare) | |
print(oneHundredShares) | |
print(oneThousandShares) | |
print(tenThousandShares) | |
data_total = sum(data[1:]) #Again, need the total for all 3 columns for at least 1 share, at least 100 shares, at least 1000 shares, and at least 10000 shares | |
print('Total Twitter Shares ' + ' is ' + str(data_total)) # I can currently get the sum total in one column, but I need to split it up and display the other two (on separate lines) | |
# Need if statements saying if >= 100, 1000, and 10000, then print the totals. | |
# I don't know if having all those arguments works | |
# I don't know how to get it to print all those things I'm trying to find on separate | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment