csessig86 · June 29, 2012 21:00
diff --git a/crime_scraper.py b/crime_scraper.py
 # We will be using the Python library Beautiful Soup
 # To scrape the information
 import urllib2
 from bs4 import BeautifulSoup
 import re

 # Note: This arrest log is available at:
 # http://chrisessig.com/arrestlog.PDF
 # It was taken from the Waterloo Police Department's website:
 # http://www.waterloopolice.com/images/arrestlog.PDF

 # Create a CSV where we'll save our data.
 f = open('crime_data.csv', 'w')
 # Add headers
 f.write("last_name" + "," + "first_name" + "," + "age" + "," + "address" + "," + "city" + "," + "arrest_date" + "," + "location" + "," + "charge_one" + "," + "charge_two" + "," + "charge_three" + "," + "charge_four" + "," + "charge_five" + "," + "charge_six" + "," + "charge_seven" + "," + "charge_eight" + "," + "charge_nine" + "," + "charge_ten" + "\n")

 # Use PDFtoHTML to convert into HTML
 # For Mac users, go to command line and type: brew install pdftohtml
 # Command line command convert PDF pages to HTML pages: pdftohtml -c arrestlog.pdf

 # URL of the arrest log index, which includes links to several pages with arrest records on them
 url = 'file:///Users/Essig/Desktop/crime_map/arrestlog_ind.html'
 page = urllib2.urlopen(url)
 soup = BeautifulSoup(page)

 # Go through each record page
 # PDFtoHTML gives us several pages of arrests
 # Depending on how many people were arrested
 # So we create a for loop to go through each one
 for a in soup.find_all('a',href=True):
    # Create new URL for each page and pass that to Beautiful Soup
    new_url = 'file:///Users/Essig/Desktop/crime_map/' + a['href']
    new_page = urllib2.urlopen(new_url)
    new_soup = BeautifulSoup(new_page)
    
    # We'll now starting pulling content from URLs
    # We need to pull names, addresses, arrest dates, etc.
    # Note: Each page gets its own array of names
    
    # This regex looks for the word "Name: "
    # "Name: " is included in the divs containing names of those arrested.
    # Basic format: "Name: Bob Smith"
    name_regex = re.compile('.*Name: *')
    # Search for div containing this regex using BS
    # Then put the content of each div into an array
    # Later, we will remove "Name: "
    names = [post.get_text() for post in new_soup.find_all('div') if name_regex.match(post.get_text())]
    
    # Regex looks for exactly two digits
    # Ages are the only field that will contain exactly two digits
    age_regex = re.compile('\d{2}$')
    # Search for div containing this regex using BS
    # Then put the content of each div into an array
    ages = [post.get_text() for post in new_soup.find_all('div') if age_regex.match(post.get_text())]
    
    # Addresses are contained in the next div after the div with ages
    # We will find that using 'find_next('div')
    # Then put the content of each div into an array
    addresses = [post.find_next('div').get_text() for post in new_soup.find_all('div') if age_regex.match(post.get_text())]
    
    # City is div after div with the word 'Age:' in it.
    # We will find that using 'find_next('div')
    # Then put the content of each div into an array
    city = [post.find_next('div').get_text() for post in new_soup.find_all('div', text="Age:")]
    
    # Regex statement finds the format of the arrest dates: 00/00/2012
    arrest_date_regex = re.compile('\d{2}/\d{2}/\d{4}$')
    # Then find div containing "WATERLOO POLICE DEPARTMENT"
    # Dates are in divs previous to divs containing "WATERLOO POLICE DEPARTMENT"
    # Find these divs with 'find_previous'
    # Make sure they fit the format of the regex statement
    # Then put the content of each div into an array
    arrest_date = [post.find_previous('div').get_text() for post in new_soup.find_all('div', text="WATERLOO POLICE DEPARTMENT") if arrest_date_regex.match(post.find_previous('div').get_text())]
    
    # Location is in the div after the div with the word 'Arresting Agency:' in it.
    # We will find that using 'find_next('div')
    # Then put the content of each div into an array
    location = [post.find_next('div').get_text() for post in new_soup.find_all('div', text="Arresting Agency:")]
    
    # Charges are the trickest to pull
    # Several people have multiple charges
    
    # All charges are in capital letters
    # Regex assumes charges have at least three, uppercased letters
    charges_regex = re.compile('[A-Z][A-Z][A-Z]*')
    # All charges have style 'left:167'
    # This indicates how far to the left they are indented
    # This regex searches for any number of letters and digits
    # Then style 'left:167'
    charges_regex_css = re.compile('[a-z]*\d*\;left:167')
    # We will find text containing 'left:167' attribute
    # Then put the content of each div into an array
    
    # Note: Before each list of charges is a div containing 'Charge Description
    # Example: 'Charge Description', 'SALE/MFG: MARIJUANA', 'DRUG STAMP VIOL',
    # 'Charge Description', 'SALE/MFG: MARIJUANA', etc.
    # We keep the 'Charge Description in the array
    # So we can divide charges up by person later
    charges_div = [post.get_text() for post in new_soup.find_all(attrs={'style' : charges_regex_css})]
    
    # Create empty array for each charge    
    # We're accounting for up to 10 charges.
    # If they have a charge, it will appear
    # Otherwise, we will put an 'x' in the row where the charge appears
    charge_one = []
    charge_two = []
    charge_three = []
    charge_four = []
    charge_five = []
    charge_six = []
    charge_seven = []
    charge_eight = []
    charge_nine = []
    charge_ten = []
    
    # Start with -1 so first time through the for loop below
    # desc_num will equal 0
    desc_num = -1
    desc_index = []
    
    # Number of items in each page's list of charges
    # Each page will be different
    # Since each person has a different amount of charges
    charges_div_length =  len(charges_div)
    
    # We use this to find where 'Charge Description'
    # Appears in the array
    # This will help us split up the charges by person
    # Each appearance of 'Charge Description'
    # Signifies a new list of charges for a new person
    for post in charges_div:
        desc_num = desc_num + 1
        if post == 'Charge Description':
            desc_index.append(desc_num)
    
    # Append the length of the charges_div to our desc_index array
    # This would be the theoredical next 'Charge Description' in the array
    # We use this in for loop below
    # To find how many charges the last person on the page has
    desc_index.append(charges_div_length)
    
    num = -1     
    # Loops through each page
    # And grabs just the first charge for each person
    for post in charges_div:
        # Variable keeps track of where inside the array the charge is
        num = num + 1
        # Grab the field after 'Charge Description'
        # Which would be the first charge for each person
        if post == 'Charge Description':
            charge_one.append(charges_div[num+1])
    
    # This loop uses desc_index
    # Which is our list of locations of 'Charge Description'
    # And either posts the second, etc. charge in their respective array
    # Or an 'x' if it doesn't exist
    num_two = -1
    # We don't feed the last number in the array
    # Through the loop statement
    # Because nothing comes after it.
    # IE: Calling 'desc_index[num_two + 1]' in the loop below
    # Would be invalid
    desc_index_length = len(desc_index) - 1
    for post in desc_index[0:desc_index_length]:
        num_two = num_two + 1
        # If the space between 'Charge Description' locations is 2
        # The person has only one charge
        # So we will put 'x' in the charge_two array
        # Example: 'Charge Description', 'SALE/MFG: MARIJUANA', 'Charge Description'
        if desc_index[num_two + 1] - desc_index[num_two] == 2:
            charge_two.append('x')
            charge_three.append('x')
            charge_four.append('x')
            charge_five.append('x')
            charge_six.append('x')
            charge_seven.append('x')
            charge_eight.append('x')
            charge_nine.append('x')
            charge_ten.append('x')
        # Otherwise we need to append all their charges
        # Which will be between locations of 'Charge Description'
        # Example: 'Charge Description', 'SALE/MFG: MARIJUANA', 'DRUG STAMP VIOL', 'Charge Description'
        # Would be two spots and two charges
        elif desc_index[num_two + 1] - desc_index[num_two] == 3:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append('x')
            charge_four.append('x')
            charge_five.append('x')
            charge_six.append('x')
            charge_seven.append('x')
            charge_eight.append('x')
            charge_nine.append('x')
            charge_ten.append('x')
        elif desc_index[num_two + 1] - desc_index[num_two] == 4:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append(charges_div[desc_index[num_two] + 3])
            charge_four.append('x')
            charge_five.append('x')
            charge_six.append('x')
            charge_seven.append('x')
            charge_eight.append('x')
            charge_nine.append('x')
            charge_ten.append('x')
        elif desc_index[num_two + 1] - desc_index[num_two] == 5:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append(charges_div[desc_index[num_two] + 3])
            charge_four.append(charges_div[desc_index[num_two] + 4])
            charge_five.append('x')
            charge_six.append('x')
            charge_seven.append('x')
            charge_eight.append('x')
            charge_nine.append('x')
            charge_ten.append('x')
        elif desc_index[num_two + 1] - desc_index[num_two] == 6:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append(charges_div[desc_index[num_two] + 3])
            charge_four.append(charges_div[desc_index[num_two] + 4])
            charge_five.append(charges_div[desc_index[num_two] + 5])
            charge_six.append('x')
            charge_seven.append('x')
            charge_eight.append('x')
            charge_nine.append('x')
            charge_ten.append('x')
        elif desc_index[num_two + 1] - desc_index[num_two] == 7:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append(charges_div[desc_index[num_two] + 3])
            charge_four.append(charges_div[desc_index[num_two] + 4])
            charge_five.append(charges_div[desc_index[num_two] + 5])
            charge_six.append(charges_div[desc_index[num_two] + 6])
            charge_seven.append('x')
            charge_eight.append('x')
            charge_nine.append('x')
            charge_ten.append('x')
        elif desc_index[num_two + 1] - desc_index[num_two] == 8:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append(charges_div[desc_index[num_two] + 3])
            charge_four.append(charges_div[desc_index[num_two] + 4])
            charge_five.append(charges_div[desc_index[num_two] + 5])
            charge_six.append(charges_div[desc_index[num_two] + 6])
            charge_seven.append(charges_div[desc_index[num_two] + 7])
            charge_eight.append('x')
            charge_nine.append('x')
            charge_ten.append('x')
        elif desc_index[num_two + 1] - desc_index[num_two] == 9:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append(charges_div[desc_index[num_two] + 3])
            charge_four.append(charges_div[desc_index[num_two] + 4])
            charge_five.append(charges_div[desc_index[num_two] + 5])
            charge_six.append(charges_div[desc_index[num_two] + 6])
            charge_seven.append(charges_div[desc_index[num_two] + 7])
            charge_eight.append(charges_div[desc_index[num_two] + 8])
            charge_nine.append('x')
            charge_ten.append('x')
        elif desc_index[num_two + 1] - desc_index[num_two] == 10:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append(charges_div[desc_index[num_two] + 3])
            charge_four.append(charges_div[desc_index[num_two] + 4])
            charge_five.append(charges_div[desc_index[num_two] + 5])
            charge_six.append(charges_div[desc_index[num_two] + 6])
            charge_seven.append(charges_div[desc_index[num_two] + 7])
            charge_eight.append(charges_div[desc_index[num_two] + 8])
            charge_nine.append(charges_div[desc_index[num_two] + 9])
            charge_ten.append('x')
        elif desc_index[num_two + 1] - desc_index[num_two] == 11:
            charge_two.append(charges_div[desc_index[num_two] + 2])
            charge_three.append(charges_div[desc_index[num_two] + 3])
            charge_four.append(charges_div[desc_index[num_two] + 4])
            charge_five.append(charges_div[desc_index[num_two] + 5])
            charge_six.append(charges_div[desc_index[num_two] + 6])
            charge_seven.append(charges_div[desc_index[num_two] + 7])
            charge_eight.append(charges_div[desc_index[num_two] + 8])
            charge_nine.append(charges_div[desc_index[num_two] + 9])
            charge_ten.append(charges_div[desc_index[num_two] + 10])
        else:
            print "WARNING: LOTS OF CHARGES FOR ONE PERSON. ADD MORE OPTIONS!!!!"

    # We're almost done!
    # We'll now loop though the arrays we made 
    # And print results to a CSV
    for x in range(0, len(names)):
        # Replace the word "Name: " with nothing so we have just the names
        new_names = names[x].replace("Name: ", "")
        
        # Replace commas, so we don't screw up CSV
        new_ages = ages[x].replace(",", " -")
        new_addresses = addresses[x].replace(",", " -")
        new_city = city[x].replace(",", " -")
        new_arrest_date = arrest_date[x].replace(",", " -")
        new_location = location[x].replace(",", " -")
        
        # Here's our arrays for our ten charges
        new_charge_one = charge_one[x].replace(",", " -")
        new_charge_two = charge_two[x].replace(",", " -")
        new_charge_three = charge_three[x].replace(",", " -")
        new_charge_four = charge_four[x].replace(",", " -")
        new_charge_five = charge_five[x].replace(",", " -")
        new_charge_six = charge_six[x].replace(",", " -")
        new_charge_seven = charge_seven[x].replace(",", " -")
        new_charge_eight = charge_eight[x].replace(",", " -")
        new_charge_nine = charge_nine[x].replace(",", " -")
        new_charge_ten = charge_ten[x].replace(",", " -")
        
        # Extra replaces for location need to changed to 'and'
        # Funky symbols in addresses will screw up Google
        new_location02 = new_location.replace("&amp;", "and")
        new_location03 = new_location02.replace("/", " and ")
        
        # Write to CSV
        f.write(new_names + "," + new_ages + "," + new_addresses + "," + new_city + "," + new_arrest_date + "," + new_location03 + " Waterloo IA" + "," + new_charge_one + "," + new_charge_two + "," + new_charge_three + "," + new_charge_four + "," + new_charge_five + "," + new_charge_six + "," + new_charge_seven + "," + new_charge_eight + "," + new_charge_nine + "," + new_charge_ten + "\n")
        
 # Always a good idea to close!
 f.close()
	# We will be using the Python library Beautiful Soup
	# To scrape the information
	import urllib2
	from bs4 import BeautifulSoup
	import re

	# Note: This arrest log is available at:
	# http://chrisessig.com/arrestlog.PDF
	# It was taken from the Waterloo Police Department's website:
	# http://www.waterloopolice.com/images/arrestlog.PDF

	# Create a CSV where we'll save our data.
	f = open('crime_data.csv', 'w')
	# Add headers
	f.write("last_name" + "," + "first_name" + "," + "age" + "," + "address" + "," + "city" + "," + "arrest_date" + "," + "location" + "," + "charge_one" + "," + "charge_two" + "," + "charge_three" + "," + "charge_four" + "," + "charge_five" + "," + "charge_six" + "," + "charge_seven" + "," + "charge_eight" + "," + "charge_nine" + "," + "charge_ten" + "\n")

	# Use PDFtoHTML to convert into HTML
	# For Mac users, go to command line and type: brew install pdftohtml
	# Command line command convert PDF pages to HTML pages: pdftohtml -c arrestlog.pdf

	# URL of the arrest log index, which includes links to several pages with arrest records on them
	url = 'file:///Users/Essig/Desktop/crime_map/arrestlog_ind.html'
	page = urllib2.urlopen(url)
	soup = BeautifulSoup(page)

	# Go through each record page
	# PDFtoHTML gives us several pages of arrests
	# Depending on how many people were arrested
	# So we create a for loop to go through each one
	for a in soup.find_all('a',href=True):
	# Create new URL for each page and pass that to Beautiful Soup
	new_url = 'file:///Users/Essig/Desktop/crime_map/' + a['href']
	new_page = urllib2.urlopen(new_url)
	new_soup = BeautifulSoup(new_page)

	# We'll now starting pulling content from URLs
	# We need to pull names, addresses, arrest dates, etc.
	# Note: Each page gets its own array of names

	# This regex looks for the word "Name: "
	# "Name: " is included in the divs containing names of those arrested.
	# Basic format: "Name: Bob Smith"
	name_regex = re.compile('.Name: ')
	# Search for div containing this regex using BS
	# Then put the content of each div into an array
	# Later, we will remove "Name: "
	names = [post.get_text() for post in new_soup.find_all('div') if name_regex.match(post.get_text())]

	# Regex looks for exactly two digits
	# Ages are the only field that will contain exactly two digits
	age_regex = re.compile('\d{2}$')
	# Search for div containing this regex using BS
	# Then put the content of each div into an array
	ages = [post.get_text() for post in new_soup.find_all('div') if age_regex.match(post.get_text())]

	# Addresses are contained in the next div after the div with ages
	# We will find that using 'find_next('div')
	# Then put the content of each div into an array
	addresses = [post.find_next('div').get_text() for post in new_soup.find_all('div') if age_regex.match(post.get_text())]

	# City is div after div with the word 'Age:' in it.
	# We will find that using 'find_next('div')
	# Then put the content of each div into an array
	city = [post.find_next('div').get_text() for post in new_soup.find_all('div', text="Age:")]

	# Regex statement finds the format of the arrest dates: 00/00/2012
	arrest_date_regex = re.compile('\d{2}/\d{2}/\d{4}$')
	# Then find div containing "WATERLOO POLICE DEPARTMENT"
	# Dates are in divs previous to divs containing "WATERLOO POLICE DEPARTMENT"
	# Find these divs with 'find_previous'
	# Make sure they fit the format of the regex statement
	# Then put the content of each div into an array
	arrest_date = [post.find_previous('div').get_text() for post in new_soup.find_all('div', text="WATERLOO POLICE DEPARTMENT") if arrest_date_regex.match(post.find_previous('div').get_text())]

	# Location is in the div after the div with the word 'Arresting Agency:' in it.
	# We will find that using 'find_next('div')
	# Then put the content of each div into an array
	location = [post.find_next('div').get_text() for post in new_soup.find_all('div', text="Arresting Agency:")]

	# Charges are the trickest to pull
	# Several people have multiple charges

	# All charges are in capital letters
	# Regex assumes charges have at least three, uppercased letters
	charges_regex = re.compile('[A-Z][A-Z][A-Z]*')
	# All charges have style 'left:167'
	# This indicates how far to the left they are indented
	# This regex searches for any number of letters and digits
	# Then style 'left:167'
	charges_regex_css = re.compile('[a-z]\d\;left:167')
	# We will find text containing 'left:167' attribute
	# Then put the content of each div into an array

	# Note: Before each list of charges is a div containing 'Charge Description
	# Example: 'Charge Description', 'SALE/MFG: MARIJUANA', 'DRUG STAMP VIOL',
	# 'Charge Description', 'SALE/MFG: MARIJUANA', etc.
	# We keep the 'Charge Description in the array
	# So we can divide charges up by person later
	charges_div = [post.get_text() for post in new_soup.find_all(attrs={'style' : charges_regex_css})]

	# Create empty array for each charge
	# We're accounting for up to 10 charges.
	# If they have a charge, it will appear
	# Otherwise, we will put an 'x' in the row where the charge appears
	charge_one = []
	charge_two = []
	charge_three = []
	charge_four = []
	charge_five = []
	charge_six = []
	charge_seven = []
	charge_eight = []
	charge_nine = []
	charge_ten = []

	# Start with -1 so first time through the for loop below
	# desc_num will equal 0
	desc_num = -1
	desc_index = []

	# Number of items in each page's list of charges
	# Each page will be different
	# Since each person has a different amount of charges
	charges_div_length = len(charges_div)

	# We use this to find where 'Charge Description'
	# Appears in the array
	# This will help us split up the charges by person
	# Each appearance of 'Charge Description'
	# Signifies a new list of charges for a new person
	for post in charges_div:
	desc_num = desc_num + 1
	if post == 'Charge Description':
	desc_index.append(desc_num)

	# Append the length of the charges_div to our desc_index array
	# This would be the theoredical next 'Charge Description' in the array
	# We use this in for loop below
	# To find how many charges the last person on the page has
	desc_index.append(charges_div_length)

	num = -1
	# Loops through each page
	# And grabs just the first charge for each person
	for post in charges_div:
	# Variable keeps track of where inside the array the charge is
	num = num + 1
	# Grab the field after 'Charge Description'
	# Which would be the first charge for each person
	if post == 'Charge Description':
	charge_one.append(charges_div[num+1])

	# This loop uses desc_index
	# Which is our list of locations of 'Charge Description'
	# And either posts the second, etc. charge in their respective array
	# Or an 'x' if it doesn't exist
	num_two = -1
	# We don't feed the last number in the array
	# Through the loop statement
	# Because nothing comes after it.
	# IE: Calling 'desc_index[num_two + 1]' in the loop below
	# Would be invalid
	desc_index_length = len(desc_index) - 1
	for post in desc_index[0:desc_index_length]:
	num_two = num_two + 1
	# If the space between 'Charge Description' locations is 2
	# The person has only one charge
	# So we will put 'x' in the charge_two array
	# Example: 'Charge Description', 'SALE/MFG: MARIJUANA', 'Charge Description'
	if desc_index[num_two + 1] - desc_index[num_two] == 2:
	charge_two.append('x')
	charge_three.append('x')
	charge_four.append('x')
	charge_five.append('x')
	charge_six.append('x')
	charge_seven.append('x')
	charge_eight.append('x')
	charge_nine.append('x')
	charge_ten.append('x')
	# Otherwise we need to append all their charges
	# Which will be between locations of 'Charge Description'
	# Example: 'Charge Description', 'SALE/MFG: MARIJUANA', 'DRUG STAMP VIOL', 'Charge Description'
	# Would be two spots and two charges
	elif desc_index[num_two + 1] - desc_index[num_two] == 3:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append('x')
	charge_four.append('x')
	charge_five.append('x')
	charge_six.append('x')
	charge_seven.append('x')
	charge_eight.append('x')
	charge_nine.append('x')
	charge_ten.append('x')
	elif desc_index[num_two + 1] - desc_index[num_two] == 4:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append(charges_div[desc_index[num_two] + 3])
	charge_four.append('x')
	charge_five.append('x')
	charge_six.append('x')
	charge_seven.append('x')
	charge_eight.append('x')
	charge_nine.append('x')
	charge_ten.append('x')
	elif desc_index[num_two + 1] - desc_index[num_two] == 5:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append(charges_div[desc_index[num_two] + 3])
	charge_four.append(charges_div[desc_index[num_two] + 4])
	charge_five.append('x')
	charge_six.append('x')
	charge_seven.append('x')
	charge_eight.append('x')
	charge_nine.append('x')
	charge_ten.append('x')
	elif desc_index[num_two + 1] - desc_index[num_two] == 6:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append(charges_div[desc_index[num_two] + 3])
	charge_four.append(charges_div[desc_index[num_two] + 4])
	charge_five.append(charges_div[desc_index[num_two] + 5])
	charge_six.append('x')
	charge_seven.append('x')
	charge_eight.append('x')
	charge_nine.append('x')
	charge_ten.append('x')
	elif desc_index[num_two + 1] - desc_index[num_two] == 7:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append(charges_div[desc_index[num_two] + 3])
	charge_four.append(charges_div[desc_index[num_two] + 4])
	charge_five.append(charges_div[desc_index[num_two] + 5])
	charge_six.append(charges_div[desc_index[num_two] + 6])
	charge_seven.append('x')
	charge_eight.append('x')
	charge_nine.append('x')
	charge_ten.append('x')
	elif desc_index[num_two + 1] - desc_index[num_two] == 8:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append(charges_div[desc_index[num_two] + 3])
	charge_four.append(charges_div[desc_index[num_two] + 4])
	charge_five.append(charges_div[desc_index[num_two] + 5])
	charge_six.append(charges_div[desc_index[num_two] + 6])
	charge_seven.append(charges_div[desc_index[num_two] + 7])
	charge_eight.append('x')
	charge_nine.append('x')
	charge_ten.append('x')
	elif desc_index[num_two + 1] - desc_index[num_two] == 9:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append(charges_div[desc_index[num_two] + 3])
	charge_four.append(charges_div[desc_index[num_two] + 4])
	charge_five.append(charges_div[desc_index[num_two] + 5])
	charge_six.append(charges_div[desc_index[num_two] + 6])
	charge_seven.append(charges_div[desc_index[num_two] + 7])
	charge_eight.append(charges_div[desc_index[num_two] + 8])
	charge_nine.append('x')
	charge_ten.append('x')
	elif desc_index[num_two + 1] - desc_index[num_two] == 10:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append(charges_div[desc_index[num_two] + 3])
	charge_four.append(charges_div[desc_index[num_two] + 4])
	charge_five.append(charges_div[desc_index[num_two] + 5])
	charge_six.append(charges_div[desc_index[num_two] + 6])
	charge_seven.append(charges_div[desc_index[num_two] + 7])
	charge_eight.append(charges_div[desc_index[num_two] + 8])
	charge_nine.append(charges_div[desc_index[num_two] + 9])
	charge_ten.append('x')
	elif desc_index[num_two + 1] - desc_index[num_two] == 11:
	charge_two.append(charges_div[desc_index[num_two] + 2])
	charge_three.append(charges_div[desc_index[num_two] + 3])
	charge_four.append(charges_div[desc_index[num_two] + 4])
	charge_five.append(charges_div[desc_index[num_two] + 5])
	charge_six.append(charges_div[desc_index[num_two] + 6])
	charge_seven.append(charges_div[desc_index[num_two] + 7])
	charge_eight.append(charges_div[desc_index[num_two] + 8])
	charge_nine.append(charges_div[desc_index[num_two] + 9])
	charge_ten.append(charges_div[desc_index[num_two] + 10])
	else:
	print "WARNING: LOTS OF CHARGES FOR ONE PERSON. ADD MORE OPTIONS!!!!"

	# We're almost done!
	# We'll now loop though the arrays we made
	# And print results to a CSV
	for x in range(0, len(names)):
	# Replace the word "Name: " with nothing so we have just the names
	new_names = names[x].replace("Name: ", "")

	# Replace commas, so we don't screw up CSV
	new_ages = ages[x].replace(",", " -")
	new_addresses = addresses[x].replace(",", " -")
	new_city = city[x].replace(",", " -")
	new_arrest_date = arrest_date[x].replace(",", " -")
	new_location = location[x].replace(",", " -")

	# Here's our arrays for our ten charges
	new_charge_one = charge_one[x].replace(",", " -")
	new_charge_two = charge_two[x].replace(",", " -")
	new_charge_three = charge_three[x].replace(",", " -")
	new_charge_four = charge_four[x].replace(",", " -")
	new_charge_five = charge_five[x].replace(",", " -")
	new_charge_six = charge_six[x].replace(",", " -")
	new_charge_seven = charge_seven[x].replace(",", " -")
	new_charge_eight = charge_eight[x].replace(",", " -")
	new_charge_nine = charge_nine[x].replace(",", " -")
	new_charge_ten = charge_ten[x].replace(",", " -")

	# Extra replaces for location need to changed to 'and'
	# Funky symbols in addresses will screw up Google
	new_location02 = new_location.replace("&", "and")
	new_location03 = new_location02.replace("/", " and ")

	# Write to CSV
	f.write(new_names + "," + new_ages + "," + new_addresses + "," + new_city + "," + new_arrest_date + "," + new_location03 + " Waterloo IA" + "," + new_charge_one + "," + new_charge_two + "," + new_charge_three + "," + new_charge_four + "," + new_charge_five + "," + new_charge_six + "," + new_charge_seven + "," + new_charge_eight + "," + new_charge_nine + "," + new_charge_ten + "\n")

	# Always a good idea to close!
	f.close()