Skip to content

Instantly share code, notes, and snippets.

@sangheestyle
Last active October 25, 2018 21:35
Show Gist options
  • Save sangheestyle/2b0c0338780d10c2770b6f5b5e1aaa39 to your computer and use it in GitHub Desktop.
Save sangheestyle/2b0c0338780d10c2770b6f5b5e1aaa39 to your computer and use it in GitHub Desktop.
Query geo position on ES

Query geo position on ElasticSearch

Install ES

Assume you have a mac and use brew. ES requires Java8 so do the following:

brew cask install homebrew/cask-versions/java8
brew search [email protected]
elasticsearch

Then, go to http://localhost:9200/ on your browser. If you can see the following, your are done.

{
name: "fsny2mM",
cluster_name: "elasticsearch_sangheekim",
cluster_uuid: "M6W6pX6jTMeVLI3BaAG11w",
version: {
number: "6.4.2",
build_flavor: "oss",
build_type: "tar",
build_hash: "04711c2",
build_date: "2018-09-26T13:34:09.098244Z",
build_snapshot: false,
lucene_version: "7.4.0",
minimum_wire_compatibility_version: "5.6.0",
minimum_index_compatibility_version: "5.0.0"
},
tagline: "You Know, for Search"
}
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from datetime import date\n",
"from elasticsearch import Elasticsearch\n",
"\n",
"es = Elasticsearch()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# prepare index, doc type, and mappings\n",
"current_index = \"geo_cards\"\n",
"current_doc_type = \"geo_card\"\n",
"\n",
"mappings = {\n",
" \"mappings\": {\n",
" current_doc_type: {\n",
" \"properties\": {\n",
" \"text\": {\"type\": \"text\"},\n",
" \"created\": {\n",
" \"type\":\"date\",\n",
" \"format\": \"strict_date_optional_time||epoch_millis\"\n",
" },\n",
" \"pin\": {\n",
" \"properties\": {\n",
" \"location\": {\n",
" \"type\": \"geo_point\"\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'acknowledged': True, 'shards_acknowledged': True, 'index': 'geo_cards'}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create index with mappings\n",
"es.indices.create(index=current_index, body=mappings)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"incheon - daejeon: 137.102451597155\n",
"incheon - busan: 332.3640155092398\n",
"daejeon - busan: 201.40962795589516\n"
]
}
],
"source": [
"# prepare three different geo codes\n",
"from geopy.distance import geodesic\n",
"\n",
"# those are real geo codes from google maps\n",
"geo_incheon_city_hall = (37.4576112, 126.7000163)\n",
"geo_daejeon_city_hall = (36.350461, 127.38263)\n",
"geo_busan_city_hall = (35.1342236,129.0356489)\n",
"\n",
"# show distances\n",
"print(\"incheon - daejeon: \", geodesic(geo_incheon_city_hall, geo_daejeon_city_hall).km)\n",
"print(\"incheon - busan: \", geodesic(geo_incheon_city_hall, geo_busan_city_hall).km)\n",
"print(\"daejeon - busan: \", geodesic(geo_daejeon_city_hall, geo_busan_city_hall).km)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# ES needs a geo code in string (I am not sure)\n",
"def geo_string(geo):\n",
" return ','.join(str(x) for x in geo)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"37.4576112,126.7000163\n",
"36.350461,127.38263\n",
"35.1342236,129.0356489\n"
]
}
],
"source": [
"str_geo_incheon_city_hall = geo_string(geo_incheon_city_hall)\n",
"str_geo_daejeon_city_hall = geo_string(geo_daejeon_city_hall)\n",
"str_geo_busan_city_hall = geo_string(geo_busan_city_hall)\n",
"\n",
"print(str_geo_incheon_city_hall)\n",
"print(str_geo_daejeon_city_hall)\n",
"print(str_geo_busan_city_hall)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# 9 different geo cards with different geo codes and dates\n",
"docs = dict()\n",
"docs[\"incheon_doc1\"] = {\n",
" \"text\": \"incheon_doc1: date(2018, 1, 1)\",\n",
" \"created\": date(2018, 1, 1),\n",
" \"pin\": {\n",
" \"location\": str_geo_incheon_city_hall\n",
" }\n",
"}\n",
"\n",
"docs[\"incheon_doc2\"] = {\n",
" \"text\": \"incheon_doc2: date(2018, 2, 1)\",\n",
" \"created\": date(2018, 2, 1),\n",
" \"pin\": {\n",
" \"location\": str_geo_incheon_city_hall\n",
" }\n",
"}\n",
"\n",
"docs[\"incheon_doc3\"] = {\n",
" \"text\": \"incheon_doc3: date(2018, 3, 1)\",\n",
" \"created\": date(2018, 3, 1),\n",
" \"pin\": {\n",
" \"location\": str_geo_incheon_city_hall\n",
" }\n",
"}\n",
"\n",
"docs[\"daejeon_doc1\"] = {\n",
" \"text\": \"daejeon_doc1: date(2018, 1, 3)\",\n",
" \"created\": date(2018, 1, 3),\n",
" \"pin\": {\n",
" \"location\": str_geo_daejeon_city_hall\n",
" }\n",
"}\n",
"\n",
"docs[\"daejeon_doc2\"] = {\n",
" \"text\": \"daejeon_doc2: date(2018, 2, 3)\",\n",
" \"created\": date(2018, 2, 3),\n",
" \"pin\": {\n",
" \"location\": str_geo_daejeon_city_hall\n",
" }\n",
"}\n",
"\n",
"docs[\"daejeon_doc3\"] = {\n",
" \"text\": \"daejeon_doc3: date(2018, 3, 3)\",\n",
" \"created\": date(2018, 3, 3),\n",
" \"pin\": {\n",
" \"location\": str_geo_daejeon_city_hall\n",
" }\n",
"}\n",
"\n",
"docs[\"busan_doc1\"] = {\n",
" \"text\": \"busan_doc1: date(2018, 1, 2)\",\n",
" \"created\": date(2018, 1, 2),\n",
" \"pin\": {\n",
" \"location\": str_geo_busan_city_hall\n",
" }\n",
"}\n",
"\n",
"docs[\"busan_doc2\"] = {\n",
" \"text\": \"busan_doc2: date(2018, 2, 2)\",\n",
" \"created\": date(2018, 2, 2),\n",
" \"pin\": {\n",
" \"location\": str_geo_busan_city_hall\n",
" }\n",
"}\n",
"\n",
"docs[\"busan_doc3\"] = {\n",
" \"text\": \"busan_doc3: date(2018, 3, 2)\",\n",
" \"created\": date(2018, 3, 2),\n",
" \"pin\": {\n",
" \"location\": str_geo_busan_city_hall\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# index(create) docs to the index\n",
"for doc in docs.values():\n",
" es.index(index=current_index, doc_type=current_doc_type, body=doc)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'took': 2,\n",
" 'timed_out': False,\n",
" '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},\n",
" 'hits': {'total': 9,\n",
" 'max_score': 1.0,\n",
" 'hits': [{'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'UPsfrWYBn8tW-Aw5FprT',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'incheon_doc1: date(2018, 1, 1)',\n",
" 'created': '2018-01-01',\n",
" 'pin': {'location': '37.4576112,126.7000163'}}},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'V_sfrWYBn8tW-Aw5Fprw',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'busan_doc2: date(2018, 2, 2)',\n",
" 'created': '2018-02-02',\n",
" 'pin': {'location': '35.1342236,129.0356489'}}},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'VfsfrWYBn8tW-Aw5Fpro',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'daejeon_doc3: date(2018, 3, 3)',\n",
" 'created': '2018-03-03',\n",
" 'pin': {'location': '36.350461,127.38263'}}},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'WPsfrWYBn8tW-Aw5Fprz',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'busan_doc3: date(2018, 3, 2)',\n",
" 'created': '2018-03-02',\n",
" 'pin': {'location': '35.1342236,129.0356489'}}},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'UfsfrWYBn8tW-Aw5FprY',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'incheon_doc2: date(2018, 2, 1)',\n",
" 'created': '2018-02-01',\n",
" 'pin': {'location': '37.4576112,126.7000163'}}},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'U_sfrWYBn8tW-Aw5Fprj',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'daejeon_doc1: date(2018, 1, 3)',\n",
" 'created': '2018-01-03',\n",
" 'pin': {'location': '36.350461,127.38263'}}},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'VPsfrWYBn8tW-Aw5Fprm',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'daejeon_doc2: date(2018, 2, 3)',\n",
" 'created': '2018-02-03',\n",
" 'pin': {'location': '36.350461,127.38263'}}},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'VvsfrWYBn8tW-Aw5Fprt',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'busan_doc1: date(2018, 1, 2)',\n",
" 'created': '2018-01-02',\n",
" 'pin': {'location': '35.1342236,129.0356489'}}},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'UvsfrWYBn8tW-Aw5Fpre',\n",
" '_score': 1.0,\n",
" '_source': {'text': 'incheon_doc3: date(2018, 3, 1)',\n",
" 'created': '2018-03-01',\n",
" 'pin': {'location': '37.4576112,126.7000163'}}}]}}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# show all docs\n",
"search_body = {\n",
" 'size' : 10000,\n",
" 'query': {\n",
" 'match_all' : {}\n",
" }\n",
"}\n",
"\n",
"es.search(index=current_index, doc_type=current_doc_type, body = search_body)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" build search boy\n",
"\n",
" * query all docs\n",
" * filter by geo distance from incheon within 300km\n",
" * including daejeon not busan\n",
" * sort by created and geo distance from incheon\n",
" * you can reverse sort order\n",
" \n",
" reference:\n",
" * https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-point.html\n",
"\"\"\"\n",
"search_body = {\n",
" \"sort\": [\n",
" {\n",
" \"created\": {\n",
" \"order\": \"asc\"\n",
" }\n",
" },\n",
" {\n",
" \"_geo_distance\": {\n",
" \"pin.location\" : {\n",
" \"lat\" : 37.4576112,\n",
" \"lon\" : 126.7000163\n",
" },\n",
" \"order\": \"asc\",\n",
" \"unit\": \"km\",\n",
" \"mode\": \"min\",\n",
" \"distance_type\": \"arc\",\n",
" \"ignore_unmapped\": True\n",
" }\n",
" },\n",
" ],\n",
" \"query\": {\n",
" \"bool\" : {\n",
" \"must\" : {\n",
" \"match_all\" : {}\n",
" },\n",
" \"filter\" : {\n",
" \"geo_distance\" : {\n",
" \"distance\" : \"300km\",\n",
" \"pin.location\" : {\n",
" \"lat\" : 37.4576112,\n",
" \"lon\" : 126.7000163\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'took': 5,\n",
" 'timed_out': False,\n",
" '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},\n",
" 'hits': {'total': 6,\n",
" 'max_score': None,\n",
" 'hits': [{'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'UPsfrWYBn8tW-Aw5FprT',\n",
" '_score': None,\n",
" '_source': {'text': 'incheon_doc1: date(2018, 1, 1)',\n",
" 'created': '2018-01-01',\n",
" 'pin': {'location': '37.4576112,126.7000163'}},\n",
" 'sort': [1514764800000, 0.0]},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'U_sfrWYBn8tW-Aw5Fprj',\n",
" '_score': None,\n",
" '_source': {'text': 'daejeon_doc1: date(2018, 1, 3)',\n",
" 'created': '2018-01-03',\n",
" 'pin': {'location': '36.350461,127.38263'}},\n",
" 'sort': [1514937600000, 137.2574397592]},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'UfsfrWYBn8tW-Aw5FprY',\n",
" '_score': None,\n",
" '_source': {'text': 'incheon_doc2: date(2018, 2, 1)',\n",
" 'created': '2018-02-01',\n",
" 'pin': {'location': '37.4576112,126.7000163'}},\n",
" 'sort': [1517443200000, 0.0]},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'VPsfrWYBn8tW-Aw5Fprm',\n",
" '_score': None,\n",
" '_source': {'text': 'daejeon_doc2: date(2018, 2, 3)',\n",
" 'created': '2018-02-03',\n",
" 'pin': {'location': '36.350461,127.38263'}},\n",
" 'sort': [1517616000000, 137.2574397592]},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'UvsfrWYBn8tW-Aw5Fpre',\n",
" '_score': None,\n",
" '_source': {'text': 'incheon_doc3: date(2018, 3, 1)',\n",
" 'created': '2018-03-01',\n",
" 'pin': {'location': '37.4576112,126.7000163'}},\n",
" 'sort': [1519862400000, 0.0]},\n",
" {'_index': 'geo_cards',\n",
" '_type': 'geo_card',\n",
" '_id': 'VfsfrWYBn8tW-Aw5Fpro',\n",
" '_score': None,\n",
" '_source': {'text': 'daejeon_doc3: date(2018, 3, 3)',\n",
" 'created': '2018-03-03',\n",
" 'pin': {'location': '36.350461,127.38263'}},\n",
" 'sort': [1520035200000, 137.2574397592]}]}}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"es.search(index=current_index, doc_type=current_doc_type, body=search_body)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\nReference:\\n\\n * https://elasticsearch-py.readthedocs.io/en/master/index.html\\n * https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html\\n * https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-sort.html\\n'"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"Reference:\n",
"\n",
" * https://elasticsearch-py.readthedocs.io/en/master/index.html\n",
" * https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html\n",
" * https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-sort.html\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment