Created
July 15, 2022 00:10
-
-
Save phreakin/623f0fec08559e8d8aae9ff645f15310 to your computer and use it in GitHub Desktop.
Simple python script to grab the crime data for Mesa, Arizona
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import pandas as pd | |
import os | |
path = 'D:/IdeaProjects/Dara.gov/data/' # Directory where the data is stored | |
url = 'https://data.mesaaz.gov/resource/39rt-2rfj.csv' # URL of the data | |
filename = 'mesa_police_incidents.csv' # Name of the file to save the data | |
# Check if the path and file exist | |
try: | |
# If not, create the directory and download the data | |
if not os.path.exists(path + filename): | |
df = pd.read_csv(url) | |
csv_file = df.to_csv(path + filename, index=False) | |
print('File not found. Downloading...') | |
print('File downloaded and saved to ' + path + filename) | |
# Id both exist, print a message and continue | |
else: | |
print('File already exists. Skipping Download.') | |
print('File is located at ' + path + filename) | |
# Stop With Error And Print Error Message With Code | |
except Exception as e: | |
print(e) | |
print('Error downloading file. Please try again.') | |
exit(code=500) | |
# If download is successful, print a message and continue | |
finally: | |
print('Processing Data...') | |
# Read the data into a dataframe | |
print('Reading file...') | |
df = pd.read_csv(path + filename) | |
print('File read...') | |
# Drop duplicates in the crime_id and crime_type columns | |
df.drop_duplicates(subset=['crime_id','crime_type'], keep='first', inplace=False, ignore_index=False) | |
print('Duplicate crime_id dropped...') | |
# Drop the columns that have no data in the specified fields | |
# crime_id is the main identifier for each crime, so if empty the row is worthless | |
df.dropna(subset=['crime_id'], inplace=True) | |
# Drop the row if latitude and longitude are empty | |
# Rgis is done because we need the info to map crime data to a map | |
df.dropna(subset=['latitude','longitude'], inplace=True) | |
print('NaN crime_id and latitude,longitude dropped...') | |
print('Missing values removed...') | |
# Save the cleaned dataframe to a new csv file with the same path as raw data | |
df.to_csv(path + 'mesa_police_incidents_clean.csv', index=False) | |
print('Clean data file saved to ' + path + 'mesa_police_incidents_clean.csv') | |
# Print a message and exit | |
# We're done! | |
print('Processing complete.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment