Skip to content

Instantly share code, notes, and snippets.

@mandlar
Created April 16, 2016 02:22
Show Gist options
  • Save mandlar/26817665c3c9b935785caf728edf1ff6 to your computer and use it in GitHub Desktop.
Save mandlar/26817665c3c9b935785caf728edf1ff6 to your computer and use it in GitHub Desktop.
import csv
def social_data(var):
if var == '':
var = 0 # assigns value of zero when not present in scraped data
else:
var = var.replace(',', '') # replaces commas
return float(var) # converts string to float #No need to change any part of thise function
def clean_social_data(fin, n, o, p): #Multiple arguments? I need to read 3 different columns in the csv file at one time
""" Creates a Python list with values from social data column.
fin: file in
n: column index
return: list with column name as first item, followed by values
"""
with open(fin, newline='') as csvfile_in:
datareader = csv.reader(csvfile_in) #Reads it, all good
title = next(datareader)[n] # gets column title, then skips headers line
data = [title] #This is where things get gnarly. I need a way to basically do all of this but with the specific counts I mentioned
for line in datareader:
data.append(social_data(line[n]))
for line in datareader:
data.append(social_data(line[o]))
for line in datareader:
data.append(social_data(line[p])) # A placeholder for now, not sure if any of that is right or if I'm on the right track.
return data
f = '2015_buzz_scrape_Jan.csv' #File name
data = clean_social_data(f, 8, 9, 11) # Call all arguments at once?
# pseudo c# code
var oneShare = 0;
var oneHundredShares = 0;
var oneThousandShares = 0;
var tenThousandShares = 0;
var totalFacebookShares = 0;
var totalTwitterShares = 0;
var totalEmailShares = 0;
for each (var row in data)
{
totalFacebookShares = totalFacebookShares + row.get("facebookShares"); # add this row's facebook count to the running total
totalTwitterShares = totalTwitterShares + row.get("twitterShares");
totalEmailShares = totalEmailShares + row.get("emailShares");
if(row.get("facebookShares") >= 1)
{
oneShare = oneShares + 1; #increment counter by one
}
if(row.get("facebookShares") >= 100)
{
oneHundredShares = oneHundredShares + 1;
}
if(row.get("facebookShares") >= 1000)
{
oneThousandShares = oneThousandShares + 1;
}
if(row.get("facebookShares") >= 10000)
{
tenThousandShares = tenThousandShares + 1;
}
# repeat the above for twitter and email shares, e.g.
if(row.get("twitterShares") >= 1)
{
oneShare = oneShare + 1;
}
# etc. it could probably be pulled out into a function if you don't want to repeat it three times over
}
print(totalFacebookShares)
print(totalTwitterShares)
print(totalEmailShares)
print(oneShare)
print(oneHundredShares)
print(oneThousandShares)
print(tenThousandShares)
data_total = sum(data[1:]) #Again, need the total for all 3 columns for at least 1 share, at least 100 shares, at least 1000 shares, and at least 10000 shares
print('Total Twitter Shares ' + ' is ' + str(data_total)) # I can currently get the sum total in one column, but I need to split it up and display the other two (on separate lines)
# Need if statements saying if >= 100, 1000, and 10000, then print the totals.
# I don't know if having all those arguments works
# I don't know how to get it to print all those things I'm trying to find on separate
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment