jaklinger · March 4, 2020 22:25
diff --git a/es_keyword_expansion.py b/es_keyword_expansion.py
 import requests
 import json

 def make_query(url, q, alg, field, shard_size=1000, size=25):
    """Get keywords relating to the input query, directly from Elasticsearch
    
    Args:
        url (str): The Elasticsearch endpoint you want to query
        q (str): The query you want to retrieve keywords for
        alg (str): An algorithm from https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significantterms-aggregation.html#_parameters
        field (str): The name of the field in your Elasticsearch data to query
        shard_size (str): The shard sample size to build your keywords on. Basically big means slow but good.
        size (int): The number of results to return.
    Returns:
        a list of keywords
    """
    query = {"query" : { "match" : {field : q } },
             "size": 0,
             "aggregations" : {
                 "my_sample" : {
                     "sampler" : {"shard_size" : shard_size},
                     "aggregations": {
                        "keywords" : {
                            "significant_text" : {
                                "size": size,
                                "field" : field,
                                alg:{}
                             }
                        }
                    }
                }
            }
        }
    return [row['key'] for row in requests.post(f'{url}/_search', data=json.dumps(query),
                                                headers={'Content-Type':'application/json'}).json()['aggregations']['my_sample']['keywords']['buckets']]

 for q in ('graphene', 'pandas python', 'meat', 'playstation', 'raspberry'):
    print(q)                                                                     
    print(make_query(url=URL, q=q, alg='jlh', field='textBody_abstract_article'))
    print()
	import requests
	import json

	def make_query(url, q, alg, field, shard_size=1000, size=25):
	"""Get keywords relating to the input query, directly from Elasticsearch

	Args:
	url (str): The Elasticsearch endpoint you want to query
	q (str): The query you want to retrieve keywords for
	alg (str): An algorithm from https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significantterms-aggregation.html#_parameters
	field (str): The name of the field in your Elasticsearch data to query
	shard_size (str): The shard sample size to build your keywords on. Basically big means slow but good.
	size (int): The number of results to return.
	Returns:
	a list of keywords
	"""
	query = {"query" : { "match" : {field : q } },
	"size": 0,
	"aggregations" : {
	"my_sample" : {
	"sampler" : {"shard_size" : shard_size},
	"aggregations": {
	"keywords" : {
	"significant_text" : {
	"size": size,
	"field" : field,
	alg:{}
	}
	}
	}
	}
	}
	}
	return [row['key'] for row in requests.post(f'{url}/_search', data=json.dumps(query),
	headers={'Content-Type':'application/json'}).json()['aggregations']['my_sample']['keywords']['buckets']]

	for q in ('graphene', 'pandas python', 'meat', 'playstation', 'raspberry'):
	print(q)
	print(make_query(url=URL, q=q, alg='jlh', field='textBody_abstract_article'))
	print()