Created
April 17, 2017 15:59
-
-
Save meg-codes/46f80b249031c228e237c9cab96f789b to your computer and use it in GitHub Desktop.
Code to take a spreadsheet with a varitey of lat/long formulas and make them decimal values, then write fixed CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import pandas as pd | |
import LatLon as latlon | |
import re | |
from LatLon import string2latlon | |
# Requires: pandas, LatLon, py2.7 | |
def makefloatlat(data): | |
'''Take any of the hms formats and produce a floating point value | |
expressed as a string''' | |
# String my data | |
string = str(data) | |
# REs | |
remove_chars = re.compile(r'[\s,]') | |
hemisphere_chars = re.compile(r'([NSEWnsew])') | |
# N.B. Smart quotes and normal quote marks both grabbed (see utf-8 dec above) | |
degree_chars = re.compile(r'([\'\"°″′])') | |
# lat_lon patterns in LatLon format (https://pypi.python.org/pypi/LatLon) | |
dms = 'd% %m% %S% %H' | |
dm = 'd% %m% %H' | |
degree = 'D% %H' | |
# Now make them standard to three patterns by dropping spaces, and then | |
# splitting with commas on hemisphere_chars | |
# Remove notation | |
string = re.sub(remove_chars, '', string) | |
# notate with a comma after the hemisphere chars | |
string = re.sub(hemisphere_chars, r' \1,', string) | |
# Substitute spaces for degree chars | |
string = re.sub(degree_chars, ' ', string) | |
# clean up stray comma introduced above | |
string = string.strip(',') | |
# Clear stray space cruft from re.sub | |
while ' ' in string: | |
string = string.replace(' ', ' ') | |
# split into lat, lon | |
lat, lon = string.split(',') | |
# Thankfully the degree of specificity is the same | |
# i.e. "12 34 56 N" | |
if len(lat.split()) > 3: | |
location = string2latlon(lat, lon, dms) | |
# i.e. "12 34 N" | |
elif len(lat.split()) > 2: | |
location = string2latlon(lat, lon, dm) | |
# i.e. "12.34 N" | |
else: | |
location = string2latlon(lat, lon, degree) | |
return ', '.join(str(i) for i in location.to_string('D')) | |
# Load the CSV as a pandas df | |
df = pd.read_csv('Chesapeake-Leopard-Resolutions.csv') | |
# Drop rows with blanks using pandas drop NA | |
df = df.dropna() | |
# apply makefloatlat() to the column 'lat/long' | |
df['lat/long'] = df['lat/long'].apply(makefloatlat) | |
# Write a fixed CSV | |
df.to_csv(path_or_buf='Chesapeake-Leopard-Resolutions-fixedlatlon.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment