Skip to content

Instantly share code, notes, and snippets.

@stephaniemdavis
Last active August 22, 2018 15:55
Show Gist options
  • Select an option

  • Save stephaniemdavis/3725d05606c8c2ec4b7983277f6c2032 to your computer and use it in GitHub Desktop.

Select an option

Save stephaniemdavis/3725d05606c8c2ec4b7983277f6c2032 to your computer and use it in GitHub Desktop.
Lesson 1 of Coursera's Data Analysis with Python course. Datasets housed in ~/Code/intro_data_anal on local host
import os,unicodecsv
# write the csv reader function
def read_csv(filename): # write a function that reads a csv
with open(filename,'rb') as file_object: # open csv file
reader = unicodecsv.DictReader(file_object) # dictionary object created
return list(reader) # creating list of dictionary(reader) objects
enrolled = read_csv('enrollments.csv')
engaged = read_csv('daily_engagement.csv')
submitted = read_csv('project_submissions.csv')
def uniqueness(list_object):
unique_count = 0
dummy_accountkey = 0
for i in range(len(list_object)):
if list_object[i]['account_key'] != dummy_accountkey: #testing for uniqueness
dummy_accountkey = list_object[i]['account_key']
unique_count += 1
return unique_count, len(list_object)
enrollment_account_key,number_of_rows = uniqueness(enrolled)
print enrollment_account_key,'unique Enrollments account keys.'
print number_of_rows,'total rows in the Enrollments csv file.\n'
submissions_account_key,submissions_number_of_rows = uniqueness(submitted)
print submissions_account_key,'unique Project Submissions account keys.'
print submissions_number_of_rows, 'total rows in the Project Submissions csv file.\n'
# MUST create separate function for engaged BECAUSE the dictionary key is different(<acct> versus <account_key> (August 22,2018))
def uniqueness(list_object):
count = 0
dummy_accountkey = 0
for i in range(len(list_object)):
if list_object[i]['acct'] != dummy_accountkey:
dummy_accountkey = list_object[i]['acct']
count += 1
return count, len(list_object)
engagement_account_keys,engagement_rows = uniqueness(engaged)
print engagement_account_keys,'unique Engagement account keys.' # unique account keys
print engagement_rows,'total Engagement rows.' # number of rows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment