phreakin · July 15, 2022 00:10
diff --git a/getter.py b/getter.py
 #!/usr/bin/python

 import pandas as pd
 import os

 path = 'D:/IdeaProjects/Dara.gov/data/'                     # Directory where the data is stored
 url = 'https://data.mesaaz.gov/resource/39rt-2rfj.csv'      # URL of the data
 filename = 'mesa_police_incidents.csv'                      # Name of the file to save the data

 # Check if the path and file exist
 try:
    # If not, create the directory and download the data
    if not os.path.exists(path + filename):
        df = pd.read_csv(url)
        csv_file = df.to_csv(path + filename, index=False)
        print('File not found. Downloading...')
        print('File downloaded and saved to ' + path + filename)
    
    # Id both exist, print a message and continue
    else:
        print('File already exists. Skipping Download.')
        print('File is located at ' + path + filename)
        
 # Stop With Error And Print Error Message With Code
 except Exception as e:
    print(e)
    print('Error downloading file. Please try again.')
    exit(code=500)
    
 # If download is successful, print a message and continue
 finally:
    print('Processing Data...')
    # Read the data into a dataframe
    print('Reading file...')
    df = pd.read_csv(path + filename)
    print('File read...')

    # Drop duplicates in the crime_id and crime_type columns
    df.drop_duplicates(subset=['crime_id','crime_type'], keep='first', inplace=False, ignore_index=False)
    print('Duplicate crime_id dropped...')
    
    # Drop the columns that have no data in the specified fields
    # crime_id is the main identifier for each crime, so if empty the row is worthless
    df.dropna(subset=['crime_id'], inplace=True)
    
    # Drop the row if latitude and longitude are empty
    # Rgis is done because we need the info to map crime data to a map
    df.dropna(subset=['latitude','longitude'], inplace=True)
    print('NaN crime_id and latitude,longitude dropped...')
    print('Missing values removed...')

    # Save the cleaned dataframe to a new csv file with the same path as raw data
    df.to_csv(path + 'mesa_police_incidents_clean.csv', index=False)
    print('Clean data file saved to ' + path + 'mesa_police_incidents_clean.csv')
    
    # Print a message and exit
    # We're done!
    print('Processing complete.')
	#!/usr/bin/python

	import pandas as pd
	import os

	path = 'D:/IdeaProjects/Dara.gov/data/' # Directory where the data is stored
	url = 'https://data.mesaaz.gov/resource/39rt-2rfj.csv' # URL of the data
	filename = 'mesa_police_incidents.csv' # Name of the file to save the data

	# Check if the path and file exist
	try:
	# If not, create the directory and download the data
	if not os.path.exists(path + filename):
	df = pd.read_csv(url)
	csv_file = df.to_csv(path + filename, index=False)
	print('File not found. Downloading...')
	print('File downloaded and saved to ' + path + filename)

	# Id both exist, print a message and continue
	else:
	print('File already exists. Skipping Download.')
	print('File is located at ' + path + filename)

	# Stop With Error And Print Error Message With Code
	except Exception as e:
	print(e)
	print('Error downloading file. Please try again.')
	exit(code=500)

	# If download is successful, print a message and continue
	finally:
	print('Processing Data...')
	# Read the data into a dataframe
	print('Reading file...')
	df = pd.read_csv(path + filename)
	print('File read...')

	# Drop duplicates in the crime_id and crime_type columns
	df.drop_duplicates(subset=['crime_id','crime_type'], keep='first', inplace=False, ignore_index=False)
	print('Duplicate crime_id dropped...')

	# Drop the columns that have no data in the specified fields
	# crime_id is the main identifier for each crime, so if empty the row is worthless
	df.dropna(subset=['crime_id'], inplace=True)

	# Drop the row if latitude and longitude are empty
	# Rgis is done because we need the info to map crime data to a map
	df.dropna(subset=['latitude','longitude'], inplace=True)
	print('NaN crime_id and latitude,longitude dropped...')
	print('Missing values removed...')

	# Save the cleaned dataframe to a new csv file with the same path as raw data
	df.to_csv(path + 'mesa_police_incidents_clean.csv', index=False)
	print('Clean data file saved to ' + path + 'mesa_police_incidents_clean.csv')

	# Print a message and exit
	# We're done!
	print('Processing complete.')