Skip to content

Instantly share code, notes, and snippets.

@versae
Last active December 15, 2015 14:29
Show Gist options
  • Save versae/5274430 to your computer and use it in GitHub Desktop.
Save versae/5274430 to your computer and use it in GitHub Desktop.
Playing with Parsley to generate grammars on the fly to parse graph queries in a subset of Natural Language. The resulting structure can be transformed into Cypher or Gremlin with no much effort.
import parsley
from pprint import pprint
class Types(dict):
def __init__(self, *args, **kwargs):
super(Types, self).__init__(*args, **kwargs)
self.types = dict()
def get(self, type, default=None):
if default and type not in self.types:
return default
self.types.setdefault(type, -1)
self.types[type] += 1
return self.types[type]
types = Types()
grammar = parsley.makeGrammar("""
facet = (('with' | 'that' ws ('has' | 'have') | 'who' ws ('has' | 'have')) (ws ('a' | 'the'))?) -> ""
conditions = "and" -> "and"
| "or" -> "and"
| -> "and"
op = "that" ws "starts" ws "with" ws -> 'startswith'
| "that" ws "ends" ws "with" ws -> 'endswith'
| "greater" ws "than" ws -> 'gt'
| "lower" ws "than" ws -> 'lt'
| -> 'iexact'
person_value = ('James' | 'John')
person = ('Person' | 'People' | 'person' | 'people')
-> {'type': 'Person', 'alias': "Person{0}".format(types.get('Person', 0))}
person_property = ('name' | 'names' | 'age' | 'ages')
person_properties = person_property:first (ws (',' | "and") ws person_property)*:rest
-> [first] + rest
| -> []
person_facet = (person_properties:r ws ("of" | "from") ws ("the" ws)?)? person:t ws facet ws person_property:p "of"? ws (op)?:f ws person_value:v ws
-> {'conditions': [(f, ('property', t['alias'], p), v)], 'origin': t, 'result': {"alias": t['alias'], "properties": r}}
| person_facet:left ws 'and' ws person_facet:right
-> ('and', left, right)
| person_facet:left ws 'or' ws person_facet:right
-> ('or', left, right)
place_value = ('California' | 'Texas' | digit*:n -> int("".join(n)))
place = ('Place' | 'places' | 'place' | 'places')
-> {'type': 'Place', 'alias': "Place{0}".format(types.get('Place', 0))}
place_property = ('name' | 'names' | 'population' | 'populations')
place_properties = place_property:first (ws (',' | "and") ws place_property)*:rest
-> [first] + rest
| -> []
place_facet = (person_properties:r ws ("of" | "from") ws ("the" ws)?)? place:t ws facet ws place_property:p ws "of"? (op)?:f ws place_value:v ws
-> {'conditions': [(f, ('property', t['alias'], p), v)], 'origin': t, 'result': {"alias": t['alias'], "properties": r}}
| place_facet:left ws 'and' ws place_facet:right
-> ('and', left, right)
| place_facet:left ws 'or' ws place_facet:right
-> ('or', left, right)
lives_in = ("lives in" | "live in")
-> {'type': 'LivesIn', 'alias': "LivesIn{0}".format(types.get('LivesIn', 0))}
person_lives_in_place = person_facet:source (conditions:cond)? (ws ("who" | "that")?)? ws lives_in:rel ws ("a" | "the")? ws place_facet:target
-> {
"origin": [source["origin"], target["origin"]],
"pattern": {"source": source["origin"], "target": target["origin"], "relation": rel},
"conditions": source["conditions"] + target["conditions"],
"result": [source["result"], target["result"]],
}
dict = person_lives_in_place
""", {"types": types})
def query_generator(query_dict):
conditions_list = []
for lookup, property_tuple, match in query_dict["conditions"]:
#if property_tuple == u"property":
type_property = u"{0}.{1}".format(*property_tuple[1:])
if lookup == "exact":
lookup = u"="
match = u"'{0}'".format(match)
elif lookup == "iexact":
lookup = u"=~"
match = u"'(?i){0}'".format(match)
elif lookup == "contains":
lookup = u"=~"
match = u".*{0}.*".format(match)
elif lookup == "icontains":
lookup = u"=~"
match = u"(?i).*{0}.*".format(match)
elif lookup == "startswith":
lookup = u"=~"
match = u"{0}.*".format(match)
elif lookup == "istartswith":
lookup = u"=~"
match = u"(?i){0}.*".format(match)
elif lookup == "endswith":
lookup = u"=~"
match = u".*{0}".format(match)
elif lookup == "iendswith":
lookup = u"=~"
match = u"(?i).*{0}".format(match)
elif lookup == "regex":
lookup = u"=~"
match = u"{0}".format(match)
elif lookup == "iregex":
lookup = u"=~"
match = u"(?i){0}".format(match)
elif lookup == "gt":
lookup = u">"
match = u"{0}".format(match)
elif lookup == "gte":
lookup = u">"
match = u"{0}".format(match)
elif lookup == "lt":
lookup = u"<"
match = u"{0}".format(match)
elif lookup == "lte":
lookup = u"<"
match = u"{0}".format(match)
# elif lookup in ["in", "inrange"]:
# lookup = u"IN"
# match = u"['{0}']".format(u"', '".join([_escape(m)
# for m in match]))
# elif lookup == "isnull":
# if match:
# lookup = u"="
# else:
# lookup = u"<>"
# match = u"null"
# elif lookup in ["eq", "equals"]:
# lookup = u"="
# match = u"'{0}'".format(_escape(match))
# elif lookup in ["neq", "notequals"]:
# lookup = u"<>"
# match = u"'{0}'".format(_escape(match))
else:
lookup = lookup
match = u""
condition = u"{0} {1} {2}".format(type_property, lookup, match)
conditions_list.append(condition)
conditions = u" AND ".join(conditions_list)
origins_list = []
for origin_dict in query_dict["origin"]:
origin = u"{alias}=node(\"label:{type}\")".format(**origin_dict)
origins_list.append(origin)
origins = u", ".join(origins_list)
results_list = []
for result_dict in query_dict["result"]:
for property_name in result_dict["properties"]:
result = u"{0}.{1}".format(result_dict["alias"], property_name)
results_list.append(result)
resutls = u", ".join(results_list)
return u"START {0} WHERE {1} RETURN {2}".format(origins, conditions, resutls)
query = 'age of the person who has a name that starts with John and lives in a place with the name of California'
query_dict = grammar(query).dict()
pprint(query_dict)
# {'conditions': [('startswith', ('property', 'Person0', 'name'), 'John'),
# ('equals', ('property', 'Place0', 'name'), 'California')],
# 'origin': [{'alias': 'Person0', 'type': 'Person'},
# {'alias': 'Place0', 'type': 'Place'}],
# 'pattern': {'relation': {'alias': 'LivesIn0', 'type': 'LivesIn'},
# 'source': {'alias': 'Person0', 'type': 'Person'},
# 'target': {'alias': 'Place0', 'type': 'Place'}},
# 'result': [{'alias': 'Person0', 'properties': ['age']},
# {'alias': 'Place0', 'properties': []}]}
query_generator(query_dict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment