Created
July 17, 2015 03:20
-
-
Save cudevmaxwell/66febccf568c13e9815b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Let's explore the MoMA Artworks.csv file! | |
In this toy example, let's see how many artworks have a title starting with | |
Untitled, or are titled Untitled. | |
""" | |
import csv | |
import sys | |
# Keep track of our 'analysis'. | |
artworks_title_starts_with_untitled = 0 | |
artworks_title_is_untitled = 0 | |
# Open the Artworks.csv file. Let the CSV module handle newlines, use utf-8 encoding. | |
with open('Artworks.csv', newline='', encoding='utf-8') as artworks_file: | |
# DictReader is cool. When we enumerate through the file, we will get | |
# dictionaries instead of plain lists. The first line of the | |
# CSV is used to define the fields of the dictionaries. | |
# | |
# Documentation here: https://docs.python.org/3/library/csv.html#csv.DictReader | |
# | |
artworks = csv.DictReader(artworks_file) | |
# Iterate through the artworks in the DictReader. We use enumerate() so that | |
# we get the dictionaries, but we also get a count. We use the count to keep | |
# track of our progress through the file. | |
# | |
# Documentation here: https://docs.python.org/3/library/functions.html#enumerate | |
# | |
for row_number, artwork in enumerate(artworks, start=1): | |
# Write the current row number, then use carriage return character to | |
# move the cursor the beginning of the line. This is a neat trick to | |
# overwrite the output as we go. Since stdout is buffered, flush() makes | |
# sure the output is really pushed to the console. | |
sys.stdout.write("{}\r".format(row_number)) | |
sys.stdout.flush() | |
# Here's our toy 'analysis' | |
if artwork['Title'] == "Untitled": | |
artworks_title_is_untitled += 1 | |
elif artwork['Title'].startswith("Untitled"): | |
artworks_title_starts_with_untitled += 1 | |
print("Number of artworks with a title that starts with 'Untitled': {}".format(artworks_title_starts_with_untitled)) | |
print("Number of artworks titled 'Untitled': {}".format(artworks_title_is_untitled)) | |
# Number of artworks with a title that starts with 'Untitled': 2088 | |
# Number of artworks titled 'Untitled': 2351 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Using Python 3.4