Last active
August 29, 2015 14:04
-
-
Save jimr/ec8b8cc6c9e04fca84fd to your computer and use it in GitHub Desktop.
Helper function for cleaning up user input geographic coordinates. Call get_coords(crappy_x, crappy_y) to get cleaned coordinates. No guarantees, check your results, etc.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def clean_point(point): | |
# junk | |
point = point.strip() | |
junk = [u'\ufeff', ' ', u'\xa0', u'\u2013'] # [, , , –] | |
for symbol in junk: | |
point = point.replace(symbol, '') | |
point = point.lstrip('+') | |
point = point.lstrip("'") # leading quote chars to fool excel | |
point = point.lstrip(',') | |
point = point.rstrip(',') | |
point = point.rstrip('.') | |
point = point.rstrip('N') | |
point = point.lstrip('N') | |
point = point.rstrip('E') | |
point = point.lstrip('E') | |
# e.g 1.23°S -> -1.23° | |
sw_patterns = [re.compile(u'[SW](.*)'), re.compile(u'(.*)[SW]')] | |
for p in sw_patterns: | |
match = p.match(point) | |
if match: | |
coord = match.groups()[0] | |
if not coord.startswith('-'): | |
point = u'-%s' % coord | |
# degrees | |
degrees = [u'\xb0', u'\xba'] # [°, º] | |
for symbol in degrees: | |
# e.g. 50.12345°N | |
p = re.compile(u'(-?\d+(\.\d+)?)%s$' % symbol) | |
match = p.match(point) | |
if match: | |
# If all we have is a number with a trailing degree symbol then we | |
# can just strip it because we're already decimal | |
point = match.groups()[0] | |
for symbol in degrees: | |
point = point.replace(symbol, '.') | |
# minutes | |
minutes = [u'\u2019', u'\u2032', u'\xb4', "'", ':'] # [’, ′, ´, ', :] | |
for symbol in minutes: | |
# if we got degrees and decimal minutes (e.g "65° 42.00714'", which by | |
# now would be "65.42.00714'") we don't want need to replace the minute | |
# symbol with a point because it's already been done. | |
p = re.compile(u'(-?\d+\.\d+(\.\d+)?)%s$' % symbol) | |
match = p.match(point) | |
if match: | |
point = match.groups()[0] | |
break | |
for symbol in minutes: | |
point = point.replace(symbol, '.') | |
# seconds | |
point = point.rstrip(u'\xa8') # ¨ | |
point = point.rstrip(u'\u2033') # ″ | |
point = point.rstrip(u'\u201d') # ” | |
point = point.rstrip('"') | |
return point | |
def dms_to_dec(point): | |
# split on points and multiply by dms (then sum) | |
point_array = point.split('.', 2) | |
total = 0 | |
multiplications = [1.0, 60.0, 3600.0] | |
for i, value in enumerate(point_array): | |
total += float(value) / multiplications[i] | |
return total | |
def get_coords(lat, lon): | |
def _get_coord(point): | |
point = point.strip() | |
if point in ['TBC', 'N/A', '??', '']: | |
return None | |
point = clean_point(point) | |
try: | |
point = float(point) | |
except: | |
point = dms_to_dec(point) | |
return point | |
return _get_coord(lat), _get_coord(lon) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment