Created
December 18, 2015 13:42
-
-
Save riordan/cdb90df5829d8a69d476 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from postal.expand import expand_address\n", | |
"from postal.parser import parse_address\n", | |
"import json\n", | |
"import glob" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def expandparse(query):\n", | |
" return map(parse_address,expand_address(query))\n", | |
"\n", | |
"def print_autocomplete(query):\n", | |
" sofar = \"\"\n", | |
" for char in query:\n", | |
" sofar += char\n", | |
" print(sofar)\n", | |
" print(expandparse(sofar))\n", | |
" print('\\n')\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"TEST SET: labels\n", | |
"TEST: San Francisco, San Francisco County, CA\n", | |
"[(u'san francisco san francisco county', u'road'), (u'california', u'state')]\n", | |
"[(u'san francisco san francisco', u'road'), (u'county', u'state_district'), (u'ca', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 30 West 26th Street, Manhattan, NY\n", | |
"[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')]\n", | |
"[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: New South Wales, Australia\n", | |
"[(u'new south wales', u'state'), (u'australia', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: California, CA\n", | |
"[(u'california', u'city'), (u'california', u'state')]\n", | |
"[(u'california', u'state'), (u'ca', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: West Bengal, India\n", | |
"[(u'west bengal', u'state'), (u'india', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: North West, Singapore\n", | |
"[(u'northwest', u'road'), (u'singapore', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: Arbil, Iraq\n", | |
"[(u'arbil', u'road'), (u'iraq', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: Madrid, Spain\n", | |
"[(u'madrid', u'city'), (u'spain', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: 1 Main St, Dungannon, United Kingdom\n", | |
"[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united kingdom', u'country')]\n", | |
"[(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united kingdom', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: Hackney City Farm, Haggerston, Greater London\n", | |
"[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater london', u'state_district')]\n", | |
"\n", | |
"\n", | |
"TEST: 1 Grolmanstraße, Berlin, Germany\n", | |
"[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin', u'city'), (u'germany', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: New Zealand\n", | |
"[(u'new zealand', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: McDonald's, Central Singapore, Singapore\n", | |
"[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'city'), (u'singapore', u'country')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: autocomplete_focus\n", | |
"TEST: DiDi dumpling\n", | |
"[(u'didi dumpling', u'house')]\n", | |
"[(u'501', u'house_number'), (u'dumpling', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: union square\n", | |
"[(u'union', u'city'), (u'square', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: union square\n", | |
"[(u'union', u'city'), (u'square', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: hard rock cafe\n", | |
"[(u'hard rock cafe', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: hard rock cafe\n", | |
"[(u'hard rock cafe', u'house')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: exact_matches\n", | |
"TEST: 100 20th street\n", | |
"[(u'100', u'house_number'), (u'20th street', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: 40 20th street\n", | |
"[(u'40', u'house_number'), (u'20th street', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: 120 42nd Street\n", | |
"[(u'120', u'house_number'), (u'42nd street', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: 358 41st street, ny\n", | |
"[(u'358', u'house_number'), (u'41st street', u'road'), (u'new york', u'state')]\n", | |
"[(u'358', u'house_number'), (u'41st street', u'road'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 1359 54 street, ny\n", | |
"[(u'1359', u'house_number'), (u'54 street', u'road'), (u'new york', u'state')]\n", | |
"[(u'1359', u'house_number'), (u'54 street', u'road'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 310 7 street, ny\n", | |
"[(u'310', u'house_number'), (u'7 street', u'road'), (u'new york', u'state')]\n", | |
"[(u'310', u'house_number'), (u'7 street', u'road'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 921 83 street, ny\n", | |
"[(u'921 83 street', u'road'), (u'new york', u'state')]\n", | |
"[(u'921 83 street', u'road'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 518 3 street, ny\n", | |
"[(u'518', u'house_number'), (u'3 street', u'road'), (u'new york', u'state')]\n", | |
"[(u'518', u'house_number'), (u'3 street', u'road'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: landmarks\n", | |
"TEST: statue of liberty\n", | |
"[(u'statue of liberty', u'house')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: search_coarse\n", | |
"TEST: brooklyn\n", | |
"[(u'brooklyn', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: new york\n", | |
"[(u'new york', u'state')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: address_matching\n", | |
"TEST: 30 w 26 st\n", | |
"[(u'30', u'house_number'), (u'west', u'road'), (u'26', u'house_number'), (u'saint', u'road')]\n", | |
"[(u'30', u'house_number'), (u'west 26 street', u'road')]\n", | |
"[(u'30', u'house_number'), (u'w', u'road'), (u'26', u'house_number'), (u'saint', u'road')]\n", | |
"[(u'30', u'house_number'), (u'w 26 street', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: 507 hackney rd\n", | |
"[(u'507', u'house_number'), (u'hackney road', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: 49 Kay Street\n", | |
"[(u'49', u'house_number'), (u'kay street', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: 339 W Main St, Cheshire, 06410\n", | |
"[(u'339', u'house_number'), (u'west main saint cheshire', u'road'), (u'06410', u'postcode')]\n", | |
"[(u'339', u'house_number'), (u'west main street', u'road'), (u'cheshire', u'city'), (u'06410', u'postcode')]\n", | |
"[(u'339', u'house_number'), (u'w main saint cheshire', u'road'), (u'06410', u'postcode')]\n", | |
"[(u'339', u'house_number'), (u'w main street', u'road'), (u'cheshire', u'city'), (u'06410', u'postcode')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: confidence_score\n", | |
"TEST: 1 West 72nd St, New York, NY, 10023\n", | |
"[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'10023', u'postcode')]\n", | |
"[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'10023', u'postcode')]\n", | |
"[(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'10023', u'postcode')]\n", | |
"[(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'10023', u'postcode')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: address_parsing\n", | |
"TEST: 101 saint mark pl 10009\n", | |
"[(u'101', u'house_number'), (u'saint mark place', u'road'), (u'10009', u'postcode')]\n", | |
"[(u'101', u'house_number'), (u'saint mark plain', u'road'), (u'10009', u'postcode')]\n", | |
"\n", | |
"\n", | |
"TEST: 1 water st manhattan ny\n", | |
"[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')]\n", | |
"[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')]\n", | |
"[(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')]\n", | |
"[(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 1 water st manhattan ny\n", | |
"[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')]\n", | |
"[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')]\n", | |
"[(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')]\n", | |
"[(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 450 w 37th st, new york, ny 11232\n", | |
"[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')]\n", | |
"\n", | |
"\n", | |
"TEST: starbucks 10010\n", | |
"[(u'starbucks', u'house'), (u'10010', u'postcode')]\n", | |
"\n", | |
"\n", | |
"TEST: 455 43rd st new york ny 11232\n", | |
"[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')]\n", | |
"[(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')]\n", | |
"\n", | |
"\n", | |
"TEST: 1 main st ny 11201\n", | |
"[(u'1', u'house_number'), (u'main saint', u'road'), (u'new york', u'state'), (u'11201', u'postcode')]\n", | |
"[(u'1', u'house_number'), (u'main saint', u'road'), (u'ny', u'state'), (u'11201', u'postcode')]\n", | |
"[(u'1', u'house_number'), (u'main street', u'road'), (u'new york', u'state'), (u'11201', u'postcode')]\n", | |
"[(u'1', u'house_number'), (u'main street', u'road'), (u'ny', u'state'), (u'11201', u'postcode')]\n", | |
"\n", | |
"\n", | |
"TEST: 186 Tuskegee St SE Atlanta GA\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint southeast atlanta gate', u'road')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint southeast', u'road'), (u'atlanta', u'city'), (u'ga', u'state')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint southeast', u'road'), (u'atlanta', u'city'), (u'georgia', u'state')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta gate', u'road')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta', u'city'), (u'ga', u'state')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta', u'city'), (u'georgia', u'state')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta', u'city'), (u'gate', u'road')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta', u'city'), (u'ga', u'state')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta', u'city'), (u'georgia', u'state')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee street southeast', u'road'), (u'atlanta', u'city'), (u'gate', u'road')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'suburb'), (u'company', u'house'), (u'atlanta gate', u'road')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta', u'city'), (u'gate', u'road')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta', u'city'), (u'ga', u'state')]\n", | |
"[(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta', u'city'), (u'georgia', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 3122 16th St San Francisco, CA 94103\n", | |
"[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94103', u'postcode')]\n", | |
"[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94103', u'postcode')]\n", | |
"[(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94103', u'postcode')]\n", | |
"[(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94103', u'postcode')]\n", | |
"\n", | |
"\n", | |
"TEST: 3010 20th St San Francisco CA 94110\n", | |
"[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94110', u'postcode')]\n", | |
"[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94110', u'postcode')]\n", | |
"[(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94110', u'postcode')]\n", | |
"[(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94110', u'postcode')]\n", | |
"\n", | |
"\n", | |
"TEST: 3577 Jackson St San Francisco, CA 94118\n", | |
"[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94118', u'postcode')]\n", | |
"[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94118', u'postcode')]\n", | |
"[(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94118', u'postcode')]\n", | |
"[(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94118', u'postcode')]\n", | |
"\n", | |
"\n", | |
"TEST: whole foods NY\n", | |
"[(u'whole foods', u'house'), (u'new york', u'state')]\n", | |
"[(u'whole foods', u'house'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: autocomplete_admin_areas\n", | |
"TEST: brooklyn\n", | |
"[(u'brooklyn', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: new york\n", | |
"[(u'new york', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: london\n", | |
"[(u'london', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: san francisco\n", | |
"[(u'san francisco', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: victoria\n", | |
"[(u'victoria', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: wales\n", | |
"[(u'wales', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: new south wales\n", | |
"[(u'new south wales', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: california\n", | |
"[(u'california', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: west bengal\n", | |
"[(u'west bengal', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: madrid\n", | |
"[(u'madrid', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: malmo\n", | |
"[(u'malmo', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: Singarpuram\n", | |
"[(u'singarpuram', u'suburb')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: address_type\n", | |
"TEST: 102 Fleet Street\n", | |
"[(u'102', u'house_number'), (u'fleet street', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: Shepherd and Flock\n", | |
"[(u'shepherd', u'house'), (u'0', u'house_number'), (u'flock', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: 22 Moor Park Lane\n", | |
"[(u'22', u'house_number'), (u'moor park lane', u'road')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: search\n", | |
"TEST: brooklyn\n", | |
"[(u'brooklyn', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: brooklyn, ny\n", | |
"[(u'brooklyn', u'state_district'), (u'new york', u'state')]\n", | |
"[(u'brooklyn', u'city'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: philadelphia\n", | |
"[(u'philadelphia', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: philadelphia, pa\n", | |
"[(u'philadelphia', u'city'), (u'pa', u'state')]\n", | |
"[(u'philadelphia', u'city'), (u'pennsylvania', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: new york, new york\n", | |
"[(u'new york', u'city'), (u'new york', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: new york city\n", | |
"[(u'new york city', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: new york city, usa\n", | |
"[(u'new york city', u'city'), (u'usa', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: 130 dean street brooklyn, ny\n", | |
"[(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn', u'state_district'), (u'new york', u'state')]\n", | |
"[(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn', u'state_district'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: billerica\n", | |
"[(u'billerica', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: billerica, ma\n", | |
"[(u'billerica', u'city'), (u'massachusetts', u'state')]\n", | |
"[(u'billerica', u'city'), (u'ma', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: 15 call street billerica, ma\n", | |
"[(u'15', u'house'), (u'call street', u'road'), (u'billerica', u'city'), (u'massachusetts', u'state')]\n", | |
"[(u'15', u'house'), (u'call street', u'road'), (u'billerica', u'city'), (u'ma', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: union sq\n", | |
"[(u'union', u'city'), (u'square', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: portland\n", | |
"[(u'portland', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: portland, oregon\n", | |
"[(u'portland', u'city'), (u'oregon', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: paris\n", | |
"[(u'paris', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: france\n", | |
"[(u'france', u'country')]\n", | |
"\n", | |
"\n", | |
"TEST: london\n", | |
"[(u'london', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: chelsea, new york\n", | |
"[(u'chelsea', u'suburb'), (u'new york', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: soho, new york\n", | |
"[(u'soho', u'suburb'), (u'new york', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: perugia airport\n", | |
"[(u'perugia airport', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: 101 saint marks pl, new york\n", | |
"[(u'101', u'house_number'), (u'saint marks place', u'road'), (u'new york', u'state')]\n", | |
"[(u'101', u'house_number'), (u'saint marks plain', u'road'), (u'new york', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: newark airport\n", | |
"[(u'newark airport', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: 7 Simon-Dach-Straße\n", | |
"[(u'7', u'house'), (u'simon-dach-strasse', u'road')]\n", | |
"[(u'7', u'house_number'), (u'simon dach strasse', u'road')]\n", | |
"[(u'7', u'house'), (u'simondachstrasse', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: Simon-Dach-Straße 7\n", | |
"[(u'simon-dach-strasse', u'road'), (u'7', u'house_number')]\n", | |
"[(u'simon', u'house'), (u'dach strasse', u'road'), (u'7', u'house_number')]\n", | |
"[(u'simondachstrasse', u'road'), (u'7', u'house_number')]\n", | |
"\n", | |
"\n", | |
"TEST: 30 West 26th Street\n", | |
"[(u'30', u'house_number'), (u'west 26th street', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: statue of liberty\n", | |
"[(u'statue of liberty', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: statue of liberty\n", | |
"[(u'statue of liberty', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: 4th and King\n", | |
"[(u'4th', u'road'), (u'0', u'house_number'), (u'king', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: Lancaster\n", | |
"[(u'lancaster', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: Paris\n", | |
"[(u'paris', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: Manchester\n", | |
"[(u'manchester', u'city')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: autocomplete_venues\n", | |
"TEST: DiDi Dumpling\n", | |
"[(u'didi dumpling', u'house')]\n", | |
"[(u'501', u'house_number'), (u'dumpling', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: hackney city farm\n", | |
"[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: 1 Grolmanstrasse\n", | |
"[(u'1', u'house'), (u'grolmanstrasse', u'road')]\n", | |
"\n", | |
"\n", | |
"TEST: Beach Bablyon\n", | |
"[(u'beach bablyon', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: Waiotapu\n", | |
"[(u'waiotapu', u'house')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n", | |
"TEST SET: quattroshapes_popularity\n", | |
"TEST: chelsea\n", | |
"[(u'chelsea', u'city')]\n", | |
"\n", | |
"\n", | |
"TEST: chelsea, ny\n", | |
"[(u'chelsea', u'suburb'), (u'new york', u'state')]\n", | |
"[(u'chelsea', u'suburb'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: williamsburg\n", | |
"[(u'williamsburg', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: williamsburg, ny\n", | |
"[(u'williamsburg', u'suburb'), (u'new york', u'state')]\n", | |
"[(u'williamsburg', u'suburb'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"TEST: ridgewood\n", | |
"[(u'ridgewood', u'house')]\n", | |
"\n", | |
"\n", | |
"TEST: ridgewood, ny\n", | |
"[(u'ridgewood', u'suburb'), (u'new york', u'state')]\n", | |
"[(u'ridgewood', u'suburb'), (u'ny', u'state')]\n", | |
"\n", | |
"\n", | |
"__________________________________________________\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for files in glob.glob(\"acceptance-tests/test_cases/*.json\"):\n", | |
" # SKIP PLACE AND REVERSE GEO\n", | |
" filename = files[28:-5]\n", | |
" if filename != \"place\" and filename != \"admin_lookup\" and filename != \"reverse_coordinate_wrapping\":\n", | |
" t = json.load(open(files))\n", | |
" print(\"TEST SET: %s\"%(files[28:-5]))\n", | |
" for test in t['tests']:\n", | |
" test_text = test['in']['text']\n", | |
" print(\"TEST: %s\"%test_text)\n", | |
" responses = expandparse(test_text)\n", | |
" for r in responses:\n", | |
" print(r)\n", | |
" print('\\n')\n", | |
" print('__________________________________________________\\n')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment