Skip to content

Instantly share code, notes, and snippets.

@mandlar
Created April 16, 2016 01:26
Show Gist options
  • Save mandlar/2473303a61396f641470b6f26217d2d4 to your computer and use it in GitHub Desktop.
Save mandlar/2473303a61396f641470b6f26217d2d4 to your computer and use it in GitHub Desktop.
import csv
def social_data(var):
if var == '':
var = 0 # assigns value of zero when not present in scraped data
else:
var = var.replace(',', '') # replaces commas
return float(var) # converts string to float #No need to change any part of thise function
def clean_social_data(fin, n, o, p): #Multiple arguments? I need to read 3 different columns in the csv file at one time
""" Creates a Python list with values from social data column.
fin: file in
n: column index
return: list with column name as first item, followed by values
"""
with open(fin, newline='') as csvfile_in:
datareader = csv.reader(csvfile_in) #Reads it, all good
title = next(datareader)[n] # gets column title, then skips headers line
data = [title] #This is where things get gnarly. I need a way to basically do all of this but with the specific counts I mentioned
for line in datareader:
data.append(social_data(line[n]))
for line in datareader:
data.append(social_data(line[o]))
for line in datareader:
data.append(social_data(line[p])) # A placeholder for now, not sure if any of that is right or if I'm on the right track.
return data
f = '2015_buzz_scrape_Jan.csv' #File name
data = clean_social_data(f, 8, 9, 11) # Call all arguments at once?
StartFragmentimport csv
def social_data(var):
if var == '':
var = 0 # assigns value of zero when not present in scraped data
else:
var = var.replace(',', '') # replaces commas
return float(var) # converts string to float #No need to change any part of thise function
def clean_social_data(fin, n, o, p): #Multiple arguments? I need to read 3 different columns in the csv file at one time
""" Creates a Python list with values from social data column.
fin: file in
n: column index
return: list with column name as first item, followed by values
"""
with open(fin, newline='') as csvfile_in:
datareader = csv.reader(csvfile_in) #Reads it, all good
title = next(datareader)[n] # gets column title, then skips headers line
data = [title] #This is where things get gnarly. I need a way to basically do all of this but with the specific counts I mentioned
for line in datareader:
data.append(social_data(line[n]))
for line in datareader:
data.append(social_data(line[o]))
for line in datareader:
data.append(social_data(line[p])) # A placeholder for now, not sure if any of that is right or if I'm on the right track.
return data
f = '2015_buzz_scrape_Jan.csv' #File name
data = clean_social_data(f, 8, 9, 11) # Call all arguments at once?
# variable for 1 shares
# variable for 100 shares
# variable for 1000 shares
# variable for 10000 shares
#for each row in data
# if shares > 1
# increment 1 shares variable by one
# if shares > 100
# increment 100 shares variable by one
# if shares > 1000
# increment 1000 shares variable by one
# if shares > 10000
# incremnet 10000 shares variable by one
data_total = sum(data[1:]) #Again, need the total for all 3 columns for at least 1 share, at least 100 shares, at least 1000 shares, and at least 10000 shares
print('Total Twitter Shares ' + ' is ' + str(data_total)) # I can currently get the sum total in one column, but I need to split it up and display the other two (on separate lines)
# Need if statements saying if >= 100, 1000, and 10000, then print the totals.
# I don't know if having all those arguments works
# I don't know how to get it to print all those things I'm trying to find on separate linesEndFragment
data_total = sum(data[1:]) #Again, need the total for all 3 columns for at least 1 share, at least 100 shares, at least 1000 shares, and at least 10000 shares
print('Total Twitter Shares ' + ' is ' + str(data_total)) # I can currently get the sum total in one column, but I need to split it up and display the other two (on separate lines)
# Need if statements saying if >= 100, 1000, and 10000, then print the totals.
# I don't know if having all those arguments works
# I don't know how to get it to print all those things I'm trying to find on separate lines
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment