Last active
December 15, 2015 14:29
-
-
Save versae/5274430 to your computer and use it in GitHub Desktop.
Playing with Parsley to generate grammars on the fly to parse graph queries in a subset of Natural Language.
The resulting structure can be transformed into Cypher or Gremlin with no much effort.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import parsley | |
from pprint import pprint | |
class Types(dict): | |
def __init__(self, *args, **kwargs): | |
super(Types, self).__init__(*args, **kwargs) | |
self.types = dict() | |
def get(self, type, default=None): | |
if default and type not in self.types: | |
return default | |
self.types.setdefault(type, -1) | |
self.types[type] += 1 | |
return self.types[type] | |
types = Types() | |
grammar = parsley.makeGrammar(""" | |
facet = (('with' | 'that' ws ('has' | 'have') | 'who' ws ('has' | 'have')) (ws ('a' | 'the'))?) -> "" | |
conditions = "and" -> "and" | |
| "or" -> "and" | |
| -> "and" | |
op = "that" ws "starts" ws "with" ws -> 'startswith' | |
| "that" ws "ends" ws "with" ws -> 'endswith' | |
| "greater" ws "than" ws -> 'gt' | |
| "lower" ws "than" ws -> 'lt' | |
| -> 'iexact' | |
person_value = ('James' | 'John') | |
person = ('Person' | 'People' | 'person' | 'people') | |
-> {'type': 'Person', 'alias': "Person{0}".format(types.get('Person', 0))} | |
person_property = ('name' | 'names' | 'age' | 'ages') | |
person_properties = person_property:first (ws (',' | "and") ws person_property)*:rest | |
-> [first] + rest | |
| -> [] | |
person_facet = (person_properties:r ws ("of" | "from") ws ("the" ws)?)? person:t ws facet ws person_property:p "of"? ws (op)?:f ws person_value:v ws | |
-> {'conditions': [(f, ('property', t['alias'], p), v)], 'origin': t, 'result': {"alias": t['alias'], "properties": r}} | |
| person_facet:left ws 'and' ws person_facet:right | |
-> ('and', left, right) | |
| person_facet:left ws 'or' ws person_facet:right | |
-> ('or', left, right) | |
place_value = ('California' | 'Texas' | digit*:n -> int("".join(n))) | |
place = ('Place' | 'places' | 'place' | 'places') | |
-> {'type': 'Place', 'alias': "Place{0}".format(types.get('Place', 0))} | |
place_property = ('name' | 'names' | 'population' | 'populations') | |
place_properties = place_property:first (ws (',' | "and") ws place_property)*:rest | |
-> [first] + rest | |
| -> [] | |
place_facet = (person_properties:r ws ("of" | "from") ws ("the" ws)?)? place:t ws facet ws place_property:p ws "of"? (op)?:f ws place_value:v ws | |
-> {'conditions': [(f, ('property', t['alias'], p), v)], 'origin': t, 'result': {"alias": t['alias'], "properties": r}} | |
| place_facet:left ws 'and' ws place_facet:right | |
-> ('and', left, right) | |
| place_facet:left ws 'or' ws place_facet:right | |
-> ('or', left, right) | |
lives_in = ("lives in" | "live in") | |
-> {'type': 'LivesIn', 'alias': "LivesIn{0}".format(types.get('LivesIn', 0))} | |
person_lives_in_place = person_facet:source (conditions:cond)? (ws ("who" | "that")?)? ws lives_in:rel ws ("a" | "the")? ws place_facet:target | |
-> { | |
"origin": [source["origin"], target["origin"]], | |
"pattern": {"source": source["origin"], "target": target["origin"], "relation": rel}, | |
"conditions": source["conditions"] + target["conditions"], | |
"result": [source["result"], target["result"]], | |
} | |
dict = person_lives_in_place | |
""", {"types": types}) | |
def query_generator(query_dict): | |
conditions_list = [] | |
for lookup, property_tuple, match in query_dict["conditions"]: | |
#if property_tuple == u"property": | |
type_property = u"{0}.{1}".format(*property_tuple[1:]) | |
if lookup == "exact": | |
lookup = u"=" | |
match = u"'{0}'".format(match) | |
elif lookup == "iexact": | |
lookup = u"=~" | |
match = u"'(?i){0}'".format(match) | |
elif lookup == "contains": | |
lookup = u"=~" | |
match = u".*{0}.*".format(match) | |
elif lookup == "icontains": | |
lookup = u"=~" | |
match = u"(?i).*{0}.*".format(match) | |
elif lookup == "startswith": | |
lookup = u"=~" | |
match = u"{0}.*".format(match) | |
elif lookup == "istartswith": | |
lookup = u"=~" | |
match = u"(?i){0}.*".format(match) | |
elif lookup == "endswith": | |
lookup = u"=~" | |
match = u".*{0}".format(match) | |
elif lookup == "iendswith": | |
lookup = u"=~" | |
match = u"(?i).*{0}".format(match) | |
elif lookup == "regex": | |
lookup = u"=~" | |
match = u"{0}".format(match) | |
elif lookup == "iregex": | |
lookup = u"=~" | |
match = u"(?i){0}".format(match) | |
elif lookup == "gt": | |
lookup = u">" | |
match = u"{0}".format(match) | |
elif lookup == "gte": | |
lookup = u">" | |
match = u"{0}".format(match) | |
elif lookup == "lt": | |
lookup = u"<" | |
match = u"{0}".format(match) | |
elif lookup == "lte": | |
lookup = u"<" | |
match = u"{0}".format(match) | |
# elif lookup in ["in", "inrange"]: | |
# lookup = u"IN" | |
# match = u"['{0}']".format(u"', '".join([_escape(m) | |
# for m in match])) | |
# elif lookup == "isnull": | |
# if match: | |
# lookup = u"=" | |
# else: | |
# lookup = u"<>" | |
# match = u"null" | |
# elif lookup in ["eq", "equals"]: | |
# lookup = u"=" | |
# match = u"'{0}'".format(_escape(match)) | |
# elif lookup in ["neq", "notequals"]: | |
# lookup = u"<>" | |
# match = u"'{0}'".format(_escape(match)) | |
else: | |
lookup = lookup | |
match = u"" | |
condition = u"{0} {1} {2}".format(type_property, lookup, match) | |
conditions_list.append(condition) | |
conditions = u" AND ".join(conditions_list) | |
origins_list = [] | |
for origin_dict in query_dict["origin"]: | |
origin = u"{alias}=node(\"label:{type}\")".format(**origin_dict) | |
origins_list.append(origin) | |
origins = u", ".join(origins_list) | |
results_list = [] | |
for result_dict in query_dict["result"]: | |
for property_name in result_dict["properties"]: | |
result = u"{0}.{1}".format(result_dict["alias"], property_name) | |
results_list.append(result) | |
resutls = u", ".join(results_list) | |
return u"START {0} WHERE {1} RETURN {2}".format(origins, conditions, resutls) | |
query = 'age of the person who has a name that starts with John and lives in a place with the name of California' | |
query_dict = grammar(query).dict() | |
pprint(query_dict) | |
# {'conditions': [('startswith', ('property', 'Person0', 'name'), 'John'), | |
# ('equals', ('property', 'Place0', 'name'), 'California')], | |
# 'origin': [{'alias': 'Person0', 'type': 'Person'}, | |
# {'alias': 'Place0', 'type': 'Place'}], | |
# 'pattern': {'relation': {'alias': 'LivesIn0', 'type': 'LivesIn'}, | |
# 'source': {'alias': 'Person0', 'type': 'Person'}, | |
# 'target': {'alias': 'Place0', 'type': 'Place'}}, | |
# 'result': [{'alias': 'Person0', 'properties': ['age']}, | |
# {'alias': 'Place0', 'properties': []}]} | |
query_generator(query_dict) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment