Last active
March 4, 2020 22:25
-
-
Save jaklinger/6a644956f32e3e8b0d5e41c543ee49e1 to your computer and use it in GitHub Desktop.
How to get keyword expansion using elasticsearch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
def make_query(url, q, alg, field, shard_size=1000, size=25): | |
"""Get keywords relating to the input query, directly from Elasticsearch | |
Args: | |
url (str): The Elasticsearch endpoint you want to query | |
q (str): The query you want to retrieve keywords for | |
alg (str): An algorithm from https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significantterms-aggregation.html#_parameters | |
field (str): The name of the field in your Elasticsearch data to query | |
shard_size (str): The shard sample size to build your keywords on. Basically big means slow but good. | |
size (int): The number of results to return. | |
Returns: | |
a list of keywords | |
""" | |
query = {"query" : { "match" : {field : q } }, | |
"size": 0, | |
"aggregations" : { | |
"my_sample" : { | |
"sampler" : {"shard_size" : shard_size}, | |
"aggregations": { | |
"keywords" : { | |
"significant_text" : { | |
"size": size, | |
"field" : field, | |
alg:{} | |
} | |
} | |
} | |
} | |
} | |
} | |
return [row['key'] for row in requests.post(f'{url}/_search', data=json.dumps(query), | |
headers={'Content-Type':'application/json'}).json()['aggregations']['my_sample']['keywords']['buckets']] | |
for q in ('graphene', 'pandas python', 'meat', 'playstation', 'raspberry'): | |
print(q) | |
print(make_query(url=URL, q=q, alg='jlh', field='textBody_abstract_article')) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment