Skip to content

Instantly share code, notes, and snippets.

@n-bar
Last active August 10, 2016 12:26
Show Gist options
  • Save n-bar/d6626775efbeb38222e6556c929f44c0 to your computer and use it in GitHub Desktop.
Save n-bar/d6626775efbeb38222e6556c929f44c0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
__version__ = "0.0.4"
import sys
import argparse
from collections import namedtuple
from multiprocessing.dummy import Pool
import requests
Address = namedtuple("Address", ["idx", "address_req", "address_res", "point"])
def raw_input_(input_file):
"""
input_file::file
generator::Task(idx::int, address_req::str)
"""
with input_file as dst:
for i, row in enumerate(dst):
yield Address(i, row.decode("utf-8").strip().strip(";"), None, None)
input_dispatch = {"raw": raw_input_}
def encode_address(host, task):
"""
Принимает хост, адрес, возвращает namedtuple c координатами
Params:
host::str - api-ручка
task::Address - namedtuple(idx::int, address_req::str, addres_res::str?, point::str?)
Return:
result::Address
Notes:
idx - номер задания;
address_req - запрашиваемый адрес;
address_res - найденный адрес, None если что-то пошло не так.
point - долгота и широта разделенные пробелом, строковое нативное представлене геокодера,
None если что-то пошло не так.
"""
try:
res = requests.get(host, params={'geocode': task.address_req, 'format': 'json', 'result': '1'}, timeout=5)
status = res.ok
res = res.json()
if status:
result = Address(task.idx,
res['response']['GeoObjectCollection']['metaDataProperty']['GeocoderResponseMetaData']['request'],
res['response']['GeoObjectCollection'][u'featureMember'][0]['GeoObject']['metaDataProperty']['GeocoderMetaData']['text'],
res['response']['GeoObjectCollection'][u'featureMember'][0]['GeoObject']['Point']['pos'])
else:
result = Address(task.idx, task.address_req, None, None)
except Exception as err:
result = Address(task.idx, task.address_req, None, None)
print(u"encode_address: %s" % err.message, file=sys.stderr)
return result
def debug_output(result, output_file):
"""
result::[Address]
output_file::file
"""
with output_file as dst:
print(u"idx\trequest\tresponse\tlonglat", file=dst)
for address in result:
row = u"{0.idx}\t{0.address_req}\t{0.address_res}\t{0.point}".format(address)
print(row.encode("utf-8"), file=dst)
def raw_output(result, output_file):
"""
result::[Address]
output_file::file
"""
with output_file as dst:
for address in result:
print(address.point, file=dst)
def qc_output(result, output_file):
"""
result::[Address]
output_file::file
"""
def convert_point_4qc(point_str):
"""
Принимает строковое представление точки вида "lon<SPC>lat" возвращает [lat::float, lon::float]
"""
return map(float, point_str.strip().split(" ", 1)[::-1])
with output_file as dst:
print([convert_point_4qc(address.point) for address in result], file=dst)
output_dispatch = {"raw": raw_output,
"debug": debug_output,
"qc": qc_output}
def main():
argparser = argparse.ArgumentParser()
argparser.add_argument('--host', default='https://geocode-maps.yandex.ru/1.x/', help="default: %(default)s", metavar="")
argparser.add_argument('-i', '--input', metavar="", type=argparse.FileType('r'), default=sys.stdin, help="default: stdin")
argparser.add_argument('-o', '--output', metavar="", type=argparse.FileType('w'), default=sys.stdout, help="default: stdout")
argparser.add_argument('--input-format', default="raw", choices=['raw'],
help="default: %(default)s, разделитель строк \\n")
argparser.add_argument('--output-format', default="raw", choices=["raw", "qc", "debug"],
help="""default: %(default)s, список пар lon<SPC>lat через <\\n>;
qc - список пар [lat, lon] через <,>""")
argparser.add_argument('--max-attempt', metavar="", default=10, help="default: %(default)s")
argparser.add_argument('--n-proc', metavar="", default=10, help="default: %(default)s")
args = argparser.parse_args()
if args.input is sys.stdin and args.input.isatty():
raise RuntimeError("input is empty")
task = input_dispatch["raw"](args.input)
if not task:
raise RuntimeError("input is empty")
result = []
attempt = 0
while task and attempt < args.max_attempt:
attempt = attempt + 1
tmp_res = Pool(args.n_proc).map(lambda x: encode_address(args.host, x), task)
task = [x for x in tmp_res if x.point is None]
result.extend(filter(lambda x: x.point is not None, tmp_res))
if task:
print("attempt: #%d; done: %d; task: %d" % (attempt, len(result), len(task)), file=sys.stderr)
if task:
print("MISSED ADDRESSES: %d" % len(task), file=sys.stderr)
for x in sorted(task, key=lambda x: x.idx):
print(u"idx: %d, address: %s" % (x.idx, x.address_req), file=sys.stderr)
output_dispatch[args.output_format](sorted(result, key=lambda x: x.idx), args.output)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment