Last active
August 11, 2019 17:41
-
-
Save jarble/3eb4abaea6c7b4cbcf4bdafc5a6403fa to your computer and use it in GitHub Desktop.
This Python script generates a "pseudocode parser" for the Nearley parser generator. It's based on an adaptive parsing algorithm or "adaptive grammar."
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import copy | |
import sys | |
import subprocess | |
def matches_pattern(string,pattern): | |
if len(string) != len(pattern): | |
return False | |
for i in range(0,len(string)): | |
if is_expr_(pattern[i]): | |
for j in range(0,len(string)): | |
if pattern[i] == pattern[j] and string[i] != string[j]: | |
return False | |
elif string[i] != pattern[i]: | |
return False | |
return True | |
def rewrite_rule(input_string,input_pattern,output_pattern): | |
output_string = copy.deepcopy(output_pattern) | |
if not matches_pattern(input_string,input_pattern): | |
return False | |
for i in range(0, len(input_pattern)): | |
if input_pattern[i][0].islower() and input_pattern[i] != input_string[i]: | |
return False | |
else: | |
for j in range(0,len(output_pattern)): | |
if output_pattern[j] == input_pattern[i]: | |
output_string[j] = input_string[i] | |
return output_string | |
def replace_matching_subsequence(input_string,input_pattern,output_pattern): | |
output_string = copy.deepcopy(input_string) | |
for i in range(0,len(input_string)-len(input_pattern)+1): | |
output_substring = rewrite_rule(input_string[i:len(input_pattern)+i],input_pattern,output_pattern) | |
if output_substring != False: | |
if type_of(output_substring) != None: | |
output_string[i : i+len(input_pattern)] = [output_substring] | |
else: | |
output_string[i : i+len(input_pattern)] = output_substring | |
return output_string | |
return False | |
def matches_patterns(a,patterns): | |
for pattern in patterns: | |
if matches_pattern(a,pattern): | |
return True | |
return False | |
types = {} | |
def simplify_pattern(pattern): | |
if type(pattern) == list: | |
return pattern | |
output = [] | |
for current in pattern.split(" "): | |
if current == ":number": | |
output += [is_number] | |
elif current[0] == ":" and (current[1:len(current)] in ["bool","string","set","list","imperative"]): | |
output += [is_type_pattern(current[1:len(current)])] | |
elif current == ":expr": | |
output += [is_expr] | |
else: | |
output += [current] | |
return output | |
def simplify_patterns(patterns): | |
return [simplify_pattern(x) for x in [b.strip() for b in patterns.strip().split("\n")]] | |
def is_expr_(a): | |
if type(a) == str and (a[0].isupper()): | |
if not a in types: | |
types[a] = "expr" | |
return True | |
return False | |
def is_expr(a): | |
return type_of(a) != None | |
def matches_type_pattern(a,pattern): | |
if(len(a) != len(pattern)): | |
return False | |
elif type(a) == str and a == pattern: | |
return True | |
for i in range(0,len(a)): | |
if(type(pattern[i]) == str and pattern[i] != a[i] or callable(pattern[i]) and not pattern[i](a[i])): | |
return False | |
return True | |
global type_patterns | |
type_patterns = {} | |
def matches_type_patterns(a,patterns): | |
for pattern in patterns: | |
if matches_type_pattern(a,pattern): | |
return True | |
return False | |
def is_number(a): | |
global type_patterns | |
if (type(a) == str and (a.isdigit() or a[0].isupper())) or matches_type_patterns(a,type_patterns["number"]): | |
if(type(a) == str): | |
types[a] = "number" | |
return True | |
else: | |
return False | |
def is_type_pattern(the_type): | |
def to_return(a): | |
global type_patterns | |
if ( | |
is_expr_(a) | |
or matches_type_patterns(a,type_patterns[the_type]) | |
): | |
if(type(a) == str): | |
types[a] = the_type | |
return True | |
else: | |
return False | |
return to_return | |
type_patterns["list"] = simplify_patterns(''' | |
:expr in :list | |
[ :expr for :expr in :list ] | |
[ :expr for :expr in :list if :bool ] | |
type of :expr | |
dot product of :list and :list | |
cross product of :list and :list | |
determinant of :list | |
largest number in :list | |
smallest number in :list | |
list ( itertools . permutations ( :list ) ) | |
:list [ :: -1 ] | |
sorted ( :list ) | |
:list + :list | |
''') | |
type_patterns["number"] = simplify_patterns(''' | |
- :number | |
Math . PI | |
:number % :number | |
:number + :number | |
:number - :number | |
:number * :number | |
:number / :number | |
:number ** :number | |
maximize :number subject to :bool | |
absolute value of :number | |
sqrt ( :number ) | |
cbrt :number | |
diff ( :number , :number ) | |
integral of :number with respect to :number | |
integral of :number | |
integral of :number from :number to :number | |
integral of :number with respect to :number from :number to :number | |
greatest common factor of :number and :number | |
all solutions of :number | |
sin ( :number ) | |
cos ( :number ) | |
tan ( :number ) | |
ln :number | |
limit of :number as :number -> :number | |
domain of the function :number | |
range of the function :number | |
a random number between :number and :number | |
distance from :list to :list | |
''') | |
type_patterns["bool"] = simplify_patterns(''' | |
:number == :number | |
forall :expr ( :bool ) | |
:bool == :bool | |
:list == :list | |
:set == :set | |
:number != :number | |
:bool != :bool | |
:list != :list | |
:set != :set | |
:number > :number | |
:number < :number | |
:number >= :number | |
:number <= :number | |
:number is a number | |
:number is a string | |
:number is a list | |
:bool or :bool | |
:bool and :bool | |
issubsetof ( :set , :set ) | |
there exists :expr such that :bool | |
true | |
false | |
''') | |
type_patterns["set"] = simplify_patterns(''' | |
intersection of :set and :set | |
union of :set and :set''') | |
type_patterns["string"] = simplify_patterns(''' | |
substring of :string from :number to :number | |
longest match of the regex :string in the string :string | |
:string is a substring of :string | |
:string matches the regex :string | |
:replace :string in the string :string with :string''') | |
type_patterns["imperative"] = simplify_patterns(''' | |
console . log ( :expr ) ; | |
while ( :bool ) { :imperative } | |
var :expr = :expr ; | |
:expr = :expr ; | |
input ( :string ) ; | |
return :expr ; | |
if :bool then :imperative | |
if :bool then :imperative else :imperative | |
''') | |
def pattern_to_string(pattern): | |
str1 = "" | |
num = 0 | |
for i in pattern: | |
if(callable(i)): | |
num += 1 | |
str1 += " A" + str(num) | |
else: | |
str1 += " " + i | |
return str1 + " --> " + str1 | |
rewrite_rule_string = "" | |
for current in type_patterns.values(): | |
for i in current: | |
rewrite_rule_string += "\n" + pattern_to_string(i) | |
global rewrite_rules | |
global synonyms | |
synonyms = [] | |
rewrite_rules = [[list(filter(None,i.split(" "))),list(filter(None,j.split(" ")))] for [i,j] in [a.split("-->") for a in [b.strip() for b in | |
(rewrite_rule_string+''' | |
first A letters of B --> substring of B from 0 to A | |
A is a substring of B --> A is a substring of B | |
A is a superset of B --> A is a subset of B | |
A contains B --> B is in A | |
the string A contains the string B --> B is a substring of A | |
A is more than B --> A > B | |
A and B --> A and B | |
A squared --> A to the power of 2 | |
A cubed --> A to the power of 2 | |
although --> though | |
greater --> more | |
but --> and | |
A though B --> A but B | |
A is greater than or equal to B --> A >= B | |
A is not equal to B --> A != B | |
A is less than or equal to B --> A <= B | |
A divided by B --> A / B | |
A is less than B --> B > A | |
A is C than B and D than E --> ( A is C than B ) and ( A is D than E ) | |
( A is C than B ) and D than E --> ( A is C than B ) and ( A is D than E ) | |
A minus B --> A - B | |
sum of A and B --> A plus B | |
A is not more than B --> A <= B | |
A is not less than B --> A >= B | |
A is not one of B --> A is not in B | |
A is not in B --> (A is in B) == false | |
( A ) --> A | |
∞ --> infinity | |
sum of A and B --> A + B | |
A must be B --> A == B | |
A must not be B --> A != B | |
A cannot be B --> A != B | |
the A --> A | |
A is not B --> ( A != B ) | |
A is neither B nor C --> ( A is not B ) and ( A is not C ) | |
A is not a B --> ( A is a B ) == false | |
A multiplied by B --> A * B | |
A to the power of B --> A ** B | |
[ A for A in B where D ] --> [ A for A in B if D ] | |
[ A in B where D ] --> [ A for A in B where D ] | |
every number in L that is greater than B --> [ a in B where a > B ] | |
all permutations of X --> list ( itertools . permutations ( X ) ) | |
every permutation of X --> all permutations of X | |
product of A and B --> A * B | |
A mod B --> A % B | |
A is divisible by B --> ( A % B ) == 0 | |
A is not X by Y --> ( A is X by Y ) == false | |
A is not a number --> A is not a number | |
if A then B otherwise C --> if A then B else C | |
not A --> ( A == false ) | |
unless A then B otherwise C --> if ( not A ) then B else C | |
A is a match of the regex B --> A matches the regex B | |
the string A matches the regex B --> A is a match of the regex B | |
A are in B --> A is a subset of B | |
plus --> + | |
A is not a B of C --> ( A is a B of C ) == false | |
none of A are in B --> A is not a subset of B | |
derivative of A with respect to B --> diff ( A , B ) | |
A percent of B --> A * 0.10 * B | |
A does not equal B --> A != B | |
A ≠ B --> A != B | |
A if B --> if B then A | |
B implies A --> A if B | |
A unless B --> if ( not A ) then B | |
distance between A and B --> distance from A to B | |
sin A --> sin ( A ) | |
cos A --> cos ( A ) | |
tan A --> tan ( A ) | |
sine of A --> sin A | |
cosine of A --> cos A | |
tangent of A --> tan A | |
cotangent of A --> 1 / ( tan A ) | |
secant of A --> 1 / ( cos A ) | |
secant of A --> 1 / ( cos A ) | |
A = B --> A == B | |
=/= --> != | |
A ÷ B --> A / B | |
A % of B --> A percent of B | |
quotient of A and B --> A / B | |
A iff B --> ( A implies B ) and ( B implies A ) | |
⇔ --> iff | |
[ A | B <- C ] --> [ A for B in C ] | |
| A | --> absolute value of A | |
square root of A --> sqrt ( A ) | |
cube root of A --> cbrt A | |
^ --> ** | |
one --> 1 | |
two --> 2 | |
three --> 3 | |
four --> 4 | |
five --> 5 | |
six --> 6 | |
seven --> 7 | |
eight --> 8 | |
nine --> 9 | |
ten --> 10 | |
minimize A subject to B --> maximize A subject to - ( B ) | |
maximize A so that B --> maximize A subject to B | |
maximize A such that B --> maximize A so that B | |
maximize A where B --> maximize A so that B | |
A ∪ B --> union of A and B | |
A ∩ B --> intersection of A and B | |
A || B --> A or B | |
A && B --> A and B | |
larger --> greater | |
largest --> greatest | |
maximum --> greatest | |
minimum --> smallest | |
natural logarithm of A --> ln A | |
regex --> regular expression | |
¬ A --> not A | |
A ≥ B --> A >= B | |
A ≤ B --> A <= B | |
A ⊃ B --> A is a superset of B | |
A ⊂ B --> A is a subset of B | |
A is a subset of B --> issubsetof ( A , B ) | |
iff --> if and only if | |
A is implied by B --> B implies A | |
∀ A ( B ) --> forall A ( B ) | |
↔ --> iff | |
A if B --> if B then A | |
A ∨ B --> A or B | |
∃ A ( B ) --> there exists A such that B | |
if ( A ) { B } --> if A then B | |
A is any of B --> A is in B | |
A is in B --> A in B | |
A equals B --> A == B | |
A times B --> A * B | |
A is a factor of B --> B is divisible by A | |
longest substring of A that matches the regex B --> longest match of the regex B in the string A | |
A is B --> A == B | |
A reversed --> A [ :: -1 ] | |
A spelled backwards --> A reversed | |
A sorted --> sorted ( A ) | |
A in alphabetical order --> sorted ( A ) | |
until A { B } --> while ( A == false ) { B } | |
A while B --> while ( B ) { A } | |
! A --> A == false | |
A := B --> A = B ; | |
set A to B --> A := B | |
initialize A to B --> var A = B ; | |
A += B --> A := A + B | |
A -= B --> A := A - B | |
A *= B --> A := A * B | |
A /= B --> A := A / B | |
add A to B --> B += A | |
divide B by A --> B /= A | |
subtract B from A --> A -= B | |
multiply A by B --> A *= B | |
let A = B --> initialize A to B | |
let A be B --> let A = B | |
A until B --> until B { A } | |
print A --> console . log ( A ) ; | |
π --> Math . PI | |
pi --> π | |
if A { B } --> if A then B | |
while A { B } --> while ( A ) { B } | |
while A do B end --> while A { B } | |
if A then B end --> if A { B } | |
A <- 1 --> A := 1 | |
while A < B do C endwhile --> while A < B do C end | |
''').strip().split("\n")]]] | |
for current in rewrite_rules: | |
if len(current[0]) == 1 and current[0] != current[1]: | |
is_added = False | |
if is_added == False: | |
synonyms += [[current[0],current[1]]] | |
#print(synonyms) | |
def token_with_whitespace(a): | |
return a[0] == "\"" and a[1].isalpha() or a[0].isalpha() | |
def join_tokens(token_list): | |
result = "" | |
for i in range(0,len(token_list)): | |
if i == 0: | |
result += token_list[i] | |
elif token_with_whitespace(token_list[i]) and token_with_whitespace(token_list[i-1]): | |
result += " __ " + token_list[i] | |
else: | |
result += " _ " + token_list[i] | |
return result | |
def get_synonyms_list(synonym): | |
global synonyms | |
to_return = [[synonym]] | |
changed = True | |
while changed: | |
changed = False | |
for current in synonyms: | |
if current[1] in to_return and not current[0] in to_return: | |
to_return += [current[0]] | |
changed = True | |
elif current[0] in to_return and not current[1] in to_return: | |
to_return += [current[1]] | |
changed = True | |
to_return = [["\""+b+"\"" for b in a] for a in to_return] | |
if len(to_return) == 1: | |
return to_return[0][0] | |
#print(to_return) | |
return "("+" | ".join([join_tokens(a) for a in to_return])+")" | |
#print(get_synonyms_list("though")) | |
def recursive_rewrite(input_string): | |
global rewrite_rules | |
outputs = [input_string] | |
changed = True | |
while changed: | |
changed = False | |
for current in outputs: | |
new_outputs = [] | |
for [a,b] in rewrite_rules: | |
output = replace_matching_subsequence(current,a,b) | |
if output != False and output not in outputs and output not in new_outputs: | |
new_outputs += [output] | |
changed = True | |
if(len(output) == 1 and type_of(output[0]) != None): | |
return [output] | |
outputs = new_outputs | |
return [] | |
index_of = {} | |
def unparse(text): | |
if type(text) == str: | |
if text.isupper(): | |
return index_of[text] | |
else: | |
return "\""+text+"\""; | |
if type(text) == list: | |
return "("+" + \" \" + ".join([unparse(a) for a in text])+")" | |
else: | |
raise Exception("Cannot parse "+str(text)) | |
def type_of(a): | |
if is_expr_(a): | |
return "expr" | |
elif is_number(a): | |
return "number" | |
else: | |
for current in type_patterns.keys(): | |
if not current in ["number","expr"] and is_type_pattern(current)(a): | |
return current | |
def print_grammar_rule(split_text): | |
rewritten = recursive_rewrite(split_text) | |
type_of(rewritten) | |
if rewritten == []: | |
return "" | |
input_pattern = [] | |
for i in range(0,len(split_text)): | |
if is_expr_(split_text[i]): | |
index_of[split_text[i]] = "d["+str((i+1)*2-2)+"]" | |
input_pattern += [types[split_text[i]]] | |
else: | |
input_pattern += [get_synonyms_list(split_text[i])] | |
return type_of(rewritten[0][0])+" -> "+join_tokens(input_pattern)+"{% function(d) {return "+unparse(rewritten[0][0])+";}%}\n" | |
nearley_output_string = ''' | |
start -> _ (statements | expr) _ {% function(d) {return d[1][0]; } %} | |
imperative -> "(" _ statements _ ")" {% function(d) {return d[2];}%} | |
statements -> imperative | statements ( _ | _ (";" | "and" | "and" __ "then") _ ) imperative {% function(d) {return d[0] + d[2]; } %} | |
expr --> (bool|number|list|set|string) {% function(d) {return d[0]; } %} | |
set -> _name {% function(d) {return d[0]; } %} | "(" _ set _ ")" {% function(d) {return "("+d[2]+")";}%} | |
list -> _name {% function(d) {return d[0]; } %} | "(" _ list _ ")" {% function(d) {return "("+d[2]+")";}%} | |
number -> _name {% function(d) {return d[0]; } %} | "(" _ number _ ")" {% function(d) {return "("+d[2]+")";}%} | |
bool -> _name {% function(d) {return d[0]; } %} | "(" _ bool _ ")" {% function(d) {return "("+d[2]+")";}%} | |
string -> _name {% function(d) {return d[0]; } %} | "(" _ string _ ")" {% function(d) {return "("+d[2]+")";}%} | |
expr -> _name {% function(d) {return d[0]; } %} | |
_name -> [a-zA-Z_] {% id %} | |
| _name [\w_] {% function(d) {return d[0] + d[1]; } %} | |
number -> _number {% function(d) {return d[0]} %} | |
_float -> | |
_int {% id %} | |
| _int "." _posint {% function(d) {return d[0] + d[1] + d[2]; }%} | |
_posint -> | |
[0-9] {% id %} | |
| _posint [0-9] {% function(d) {return d[0] + d[1]} %} | |
_int -> | |
"-" _posint {% function(d) {return d[0] + d[1]; }%} | |
| _posint {% id %} | |
_number -> | |
_float {% id %} | |
| _float "e" _int {% function(d){return d[0] + d[1] + d[2]; } %} | |
_ -> null | _ [\s] {% function() {} %} | |
__ -> [\s] | __ [\s] {% function() {} %} | |
''' | |
marpa_output_string = '''use strict; | |
use warnings; | |
use Marpa::R2; | |
use Data::Dump;''' | |
for current in rewrite_rules: | |
types = {} | |
nearley_output_string += print_grammar_rule(current[0]) | |
print(nearley_output_string) | |
with open("parser.ne", "wb") as file: | |
file.write(nearley_output_string.encode('utf8')) | |
thing = "hello" | |
print(thing and thing[0].isalpha()) | |
#subprocess.run("python -m lark.tools.nearley parser.ne main nearley > parser.py".split(" "),shell=True) | |
#subprocess.run("nearleyc parser.ne -o parser.js".split(" "),shell=True) | |
import parser | |
#print(parser.parse("A is not less than B")) | |
print("type something to test the parser, or type exit to quit") | |
text_input = "" | |
while text_input != "exit": | |
text_input = input() | |
print(recursive_rewrite(text_input.split(" "))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment