Last active
August 10, 2016 12:26
-
-
Save n-bar/d6626775efbeb38222e6556c929f44c0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import print_function | |
__version__ = "0.0.4" | |
import sys | |
import argparse | |
from collections import namedtuple | |
from multiprocessing.dummy import Pool | |
import requests | |
Address = namedtuple("Address", ["idx", "address_req", "address_res", "point"]) | |
def raw_input_(input_file): | |
""" | |
input_file::file | |
generator::Task(idx::int, address_req::str) | |
""" | |
with input_file as dst: | |
for i, row in enumerate(dst): | |
yield Address(i, row.decode("utf-8").strip().strip(";"), None, None) | |
input_dispatch = {"raw": raw_input_} | |
def encode_address(host, task): | |
""" | |
Принимает хост, адрес, возвращает namedtuple c координатами | |
Params: | |
host::str - api-ручка | |
task::Address - namedtuple(idx::int, address_req::str, addres_res::str?, point::str?) | |
Return: | |
result::Address | |
Notes: | |
idx - номер задания; | |
address_req - запрашиваемый адрес; | |
address_res - найденный адрес, None если что-то пошло не так. | |
point - долгота и широта разделенные пробелом, строковое нативное представлене геокодера, | |
None если что-то пошло не так. | |
""" | |
try: | |
res = requests.get(host, params={'geocode': task.address_req, 'format': 'json', 'result': '1'}, timeout=5) | |
status = res.ok | |
res = res.json() | |
if status: | |
result = Address(task.idx, | |
res['response']['GeoObjectCollection']['metaDataProperty']['GeocoderResponseMetaData']['request'], | |
res['response']['GeoObjectCollection'][u'featureMember'][0]['GeoObject']['metaDataProperty']['GeocoderMetaData']['text'], | |
res['response']['GeoObjectCollection'][u'featureMember'][0]['GeoObject']['Point']['pos']) | |
else: | |
result = Address(task.idx, task.address_req, None, None) | |
except Exception as err: | |
result = Address(task.idx, task.address_req, None, None) | |
print(u"encode_address: %s" % err.message, file=sys.stderr) | |
return result | |
def debug_output(result, output_file): | |
""" | |
result::[Address] | |
output_file::file | |
""" | |
with output_file as dst: | |
print(u"idx\trequest\tresponse\tlonglat", file=dst) | |
for address in result: | |
row = u"{0.idx}\t{0.address_req}\t{0.address_res}\t{0.point}".format(address) | |
print(row.encode("utf-8"), file=dst) | |
def raw_output(result, output_file): | |
""" | |
result::[Address] | |
output_file::file | |
""" | |
with output_file as dst: | |
for address in result: | |
print(address.point, file=dst) | |
def qc_output(result, output_file): | |
""" | |
result::[Address] | |
output_file::file | |
""" | |
def convert_point_4qc(point_str): | |
""" | |
Принимает строковое представление точки вида "lon<SPC>lat" возвращает [lat::float, lon::float] | |
""" | |
return map(float, point_str.strip().split(" ", 1)[::-1]) | |
with output_file as dst: | |
print([convert_point_4qc(address.point) for address in result], file=dst) | |
output_dispatch = {"raw": raw_output, | |
"debug": debug_output, | |
"qc": qc_output} | |
def main(): | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('--host', default='https://geocode-maps.yandex.ru/1.x/', help="default: %(default)s", metavar="") | |
argparser.add_argument('-i', '--input', metavar="", type=argparse.FileType('r'), default=sys.stdin, help="default: stdin") | |
argparser.add_argument('-o', '--output', metavar="", type=argparse.FileType('w'), default=sys.stdout, help="default: stdout") | |
argparser.add_argument('--input-format', default="raw", choices=['raw'], | |
help="default: %(default)s, разделитель строк \\n") | |
argparser.add_argument('--output-format', default="raw", choices=["raw", "qc", "debug"], | |
help="""default: %(default)s, список пар lon<SPC>lat через <\\n>; | |
qc - список пар [lat, lon] через <,>""") | |
argparser.add_argument('--max-attempt', metavar="", default=10, help="default: %(default)s") | |
argparser.add_argument('--n-proc', metavar="", default=10, help="default: %(default)s") | |
args = argparser.parse_args() | |
if args.input is sys.stdin and args.input.isatty(): | |
raise RuntimeError("input is empty") | |
task = input_dispatch["raw"](args.input) | |
if not task: | |
raise RuntimeError("input is empty") | |
result = [] | |
attempt = 0 | |
while task and attempt < args.max_attempt: | |
attempt = attempt + 1 | |
tmp_res = Pool(args.n_proc).map(lambda x: encode_address(args.host, x), task) | |
task = [x for x in tmp_res if x.point is None] | |
result.extend(filter(lambda x: x.point is not None, tmp_res)) | |
if task: | |
print("attempt: #%d; done: %d; task: %d" % (attempt, len(result), len(task)), file=sys.stderr) | |
if task: | |
print("MISSED ADDRESSES: %d" % len(task), file=sys.stderr) | |
for x in sorted(task, key=lambda x: x.idx): | |
print(u"idx: %d, address: %s" % (x.idx, x.address_req), file=sys.stderr) | |
output_dispatch[args.output_format](sorted(result, key=lambda x: x.idx), args.output) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment