Created
April 16, 2016 01:26
-
-
Save mandlar/2473303a61396f641470b6f26217d2d4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
def social_data(var): | |
if var == '': | |
var = 0 # assigns value of zero when not present in scraped data | |
else: | |
var = var.replace(',', '') # replaces commas | |
return float(var) # converts string to float #No need to change any part of thise function | |
def clean_social_data(fin, n, o, p): #Multiple arguments? I need to read 3 different columns in the csv file at one time | |
""" Creates a Python list with values from social data column. | |
fin: file in | |
n: column index | |
return: list with column name as first item, followed by values | |
""" | |
with open(fin, newline='') as csvfile_in: | |
datareader = csv.reader(csvfile_in) #Reads it, all good | |
title = next(datareader)[n] # gets column title, then skips headers line | |
data = [title] #This is where things get gnarly. I need a way to basically do all of this but with the specific counts I mentioned | |
for line in datareader: | |
data.append(social_data(line[n])) | |
for line in datareader: | |
data.append(social_data(line[o])) | |
for line in datareader: | |
data.append(social_data(line[p])) # A placeholder for now, not sure if any of that is right or if I'm on the right track. | |
return data | |
f = '2015_buzz_scrape_Jan.csv' #File name | |
data = clean_social_data(f, 8, 9, 11) # Call all arguments at once? | |
StartFragmentimport csv | |
def social_data(var): | |
if var == '': | |
var = 0 # assigns value of zero when not present in scraped data | |
else: | |
var = var.replace(',', '') # replaces commas | |
return float(var) # converts string to float #No need to change any part of thise function | |
def clean_social_data(fin, n, o, p): #Multiple arguments? I need to read 3 different columns in the csv file at one time | |
""" Creates a Python list with values from social data column. | |
fin: file in | |
n: column index | |
return: list with column name as first item, followed by values | |
""" | |
with open(fin, newline='') as csvfile_in: | |
datareader = csv.reader(csvfile_in) #Reads it, all good | |
title = next(datareader)[n] # gets column title, then skips headers line | |
data = [title] #This is where things get gnarly. I need a way to basically do all of this but with the specific counts I mentioned | |
for line in datareader: | |
data.append(social_data(line[n])) | |
for line in datareader: | |
data.append(social_data(line[o])) | |
for line in datareader: | |
data.append(social_data(line[p])) # A placeholder for now, not sure if any of that is right or if I'm on the right track. | |
return data | |
f = '2015_buzz_scrape_Jan.csv' #File name | |
data = clean_social_data(f, 8, 9, 11) # Call all arguments at once? | |
# variable for 1 shares | |
# variable for 100 shares | |
# variable for 1000 shares | |
# variable for 10000 shares | |
#for each row in data | |
# if shares > 1 | |
# increment 1 shares variable by one | |
# if shares > 100 | |
# increment 100 shares variable by one | |
# if shares > 1000 | |
# increment 1000 shares variable by one | |
# if shares > 10000 | |
# incremnet 10000 shares variable by one | |
data_total = sum(data[1:]) #Again, need the total for all 3 columns for at least 1 share, at least 100 shares, at least 1000 shares, and at least 10000 shares | |
print('Total Twitter Shares ' + ' is ' + str(data_total)) # I can currently get the sum total in one column, but I need to split it up and display the other two (on separate lines) | |
# Need if statements saying if >= 100, 1000, and 10000, then print the totals. | |
# I don't know if having all those arguments works | |
# I don't know how to get it to print all those things I'm trying to find on separate linesEndFragment | |
data_total = sum(data[1:]) #Again, need the total for all 3 columns for at least 1 share, at least 100 shares, at least 1000 shares, and at least 10000 shares | |
print('Total Twitter Shares ' + ' is ' + str(data_total)) # I can currently get the sum total in one column, but I need to split it up and display the other two (on separate lines) | |
# Need if statements saying if >= 100, 1000, and 10000, then print the totals. | |
# I don't know if having all those arguments works | |
# I don't know how to get it to print all those things I'm trying to find on separate lines |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment