Skip to content

Instantly share code, notes, and snippets.

View kelvingakuo's full-sized avatar

Kelvin Gakuo kelvingakuo

View GitHub Profile
class Parser(self):
def advance(self):
""" Advance the tokens in use
"""
self.current_token, self.next_token = self.next_token, next(self.tokens, None)
def accept(self, expected, raise_err = True):
""" Helper function to check if the next token is what we expect
def show_tree(tree):
""" (Level order) Traverse and print parse tree
"""
queue = []
queue.append(tree)
while(len(queue) != 0):
items = len(queue)
while(items > 0):
class Node(object):
def __init__(self, val):
self.value = val # Value of node
self.children = [] # Can have multiple children
def add_child(self, node):
self.children.append(node)
return node
class ParsingErrorException(Exception):
__module__ = 'builtins'
class LexingErrorException(Exception):
__module__ = 'builtins'
# Usage
raise LexingErrorException(f"Syntax error occured near: {}")
<query> ::= "SELECT " <columns> " FROM " <name> <terminal> | "SELECT " <columns> " FROM " <name> " WHERE " <conditionList> <terminal>
<columns> ::= (<name> ", ")+ | "*"
<name> ::= <letter>+ | <letter>+ "_" | <letter>+ "_" <digit>+
<conditionList> ::= <condition> <comparator> <condition>
<comparator> ::= " AND " | " OR "
<condition> ::= <name> <operator> <term>
<operator> ::= " = " | " > " | " >= " | " < " | " <= "
<term> ::= <digit> | <digit> "." <digit> | <name>
<letter> ::= [a-z]+ | [A-Z]+
<digit> ::= [1-9]+
import re
acceptable = re.Scanner(patterns)
query = "SELECT * FROM table WHERE col_a = 1.5 AND col_b = 250;"
matched_tokens, uknown_tokens = acceptable.scan(query)
import re
keywords = r"(SELECT|WHERE|FROM|AND|OR|NOT)"
patterns = [
(keywords, lambda scanner, token: {"token_type": "keyword", "token": token}),
(r"[a-zA-Z_][a-zA-Z_0-9]*", lambda s, t: {"token_type": "name", "token": t}),
(r"\*", lambda s, t: {"token_type": "all_cols","token": t}),
(r">=|>|<=|<|=", lambda s, t: {"token_type": "operator","token": t}),
(r"[-+]?\d*\.\d+", lambda s, t: {"token_type": "float","token": t}),
(r"\d+", lambda s, t: {"token_type": "integer","token": t}),
def inner_merge_join(left: List[dict], right: List[dict], on: str) -> List[dict]:
""" (Sort) Merge JOIN: Sort both by JOIN key. Loop through both simulateneously.
If matching, advance inner till no match. If no match, advance the smaller attr
INNER JOIN only implemented
"""
output = []
sorted_left = sorted(left, key = lambda dic: dic[on])
def hash_func(ind: int, mp_size: int):
""" A simple hash function (uses a version of the mid-square method and modulus)
Params:
ind (int) - The index to hash
mp_size (int) - The size of the hash map
Returns:
key (int) - The hashed output
"""
def nested_loop_join(left: List[dict], right: List[dict], on: str, how: str) -> List[dict]:
""" Nested loop JOIN strategy: For every row in outer, find matching one in inner
If left/right JOIN and there's no match on inner, return outer. Else, skip
Params:
left (list<dict>) - List of dicts representing one relation
right (list<dict>) - List of dicts representing the other relation
on (string) - The key in the relations to JOIN on
how (string) - The JOIN (left, right, inner)