Kelvin Gakuo kelvingakuo

Data nerd, making the world a better place, a step at a time.

kelvingakuo / lookahead.py

Created November 19, 2021 11:34

	class Parser(self):
	def advance(self):
	""" Advance the tokens in use
	"""
	self.current_token, self.next_token = self.next_token, next(self.tokens, None)


	def accept(self, expected, raise_err = True):
	""" Helper function to check if the next token is what we expect

kelvingakuo / level_order_traversal.py

Last active November 19, 2021 11:30

	def show_tree(tree):
	""" (Level order) Traverse and print parse tree
	"""
	queue = []
	queue.append(tree)

	while(len(queue) != 0):
	items = len(queue)

	while(items > 0):

kelvingakuo / tree.py

Created November 19, 2021 11:07

	class Node(object):
	def __init__(self, val):
	self.value = val # Value of node
	self.children = [] # Can have multiple children

	def add_child(self, node):
	self.children.append(node)
	return node

kelvingakuo / custom_exceptions.py

Created November 19, 2021 11:01

	class ParsingErrorException(Exception):
	__module__ = 'builtins'


	class LexingErrorException(Exception):
	__module__ = 'builtins'


	# Usage
	raise LexingErrorException(f"Syntax error occured near: {}")

kelvingakuo / sql_grammar.bnf

Last active November 18, 2021 18:56

	<query> ::= "SELECT " <columns> " FROM " <name> <terminal> \| "SELECT " <columns> " FROM " <name> " WHERE " <conditionList> <terminal>
	<columns> ::= (<name> ", ")+ \| "*"
	<name> ::= <letter>+ \| <letter>+ "_" \| <letter>+ "_" <digit>+
	<conditionList> ::= <condition> <comparator> <condition>
	<comparator> ::= " AND " \| " OR "
	<condition> ::= <name> <operator> <term>
	<operator> ::= " = " \| " > " \| " >= " \| " < " \| " <= "
	<term> ::= <digit> \| <digit> "." <digit> \| <name>
	<letter> ::= [a-z]+ \| [A-Z]+
	<digit> ::= [1-9]+

kelvingakuo / lexer.py

Last active November 18, 2021 18:53

	import re

	acceptable = re.Scanner(patterns)

	query = "SELECT * FROM table WHERE col_a = 1.5 AND col_b = 250;"

	matched_tokens, uknown_tokens = acceptable.scan(query)

kelvingakuo / valid_sql_tokens.py

Last active March 27, 2022 08:57

	import re

	keywords = r"(SELECT\|WHERE\|FROM\|AND\|OR\|NOT)"
	patterns = [
	(keywords, lambda scanner, token: {"token_type": "keyword", "token": token}),
	(r"[a-zA-Z_][a-zA-Z_0-9]*", lambda s, t: {"token_type": "name", "token": t}),
	(r"\*", lambda s, t: {"token_type": "all_cols","token": t}),
	(r">=\|>\|<=\|<\|=", lambda s, t: {"token_type": "operator","token": t}),
	(r"[-+]?\d*\.\d+", lambda s, t: {"token_type": "float","token": t}),
	(r"\d+", lambda s, t: {"token_type": "integer","token": t}),

kelvingakuo / inner_sort_merge_join.py

Created November 12, 2021 09:48

	def inner_merge_join(left: List[dict], right: List[dict], on: str) -> List[dict]:
	""" (Sort) Merge JOIN: Sort both by JOIN key. Loop through both simulateneously.

	If matching, advance inner till no match. If no match, advance the smaller attr

	INNER JOIN only implemented
	"""
	output = []

	sorted_left = sorted(left, key = lambda dic: dic[on])

kelvingakuo / inner_hash_join.py

Created November 11, 2021 11:48

	def hash_func(ind: int, mp_size: int):
	""" A simple hash function (uses a version of the mid-square method and modulus)

	Params:
	ind (int) - The index to hash
	mp_size (int) - The size of the hash map

	Returns:
	key (int) - The hashed output
	"""

kelvingakuo / nested_loop_join.py

Last active March 27, 2022 09:08

	def nested_loop_join(left: List[dict], right: List[dict], on: str, how: str) -> List[dict]:
	""" Nested loop JOIN strategy: For every row in outer, find matching one in inner

	If left/right JOIN and there's no match on inner, return outer. Else, skip

	Params:
	left (list<dict>) - List of dicts representing one relation
	right (list<dict>) - List of dicts representing the other relation
	on (string) - The key in the relations to JOIN on
	how (string) - The JOIN (left, right, inner)

	<query> ::= "SELECT " <columns> " FROM " <name> <terminal> \| "SELECT " <columns> " FROM " <name> " WHERE " <conditionList> <terminal>
	<columns> ::= (<name> ", ")+ \| "*"
	<name> ::= <letter>+ \| <letter>+ "_" \| <letter>+ "_" <digit>+
	<conditionList> ::= <condition> <comparator> <condition>
	<comparator> ::= " AND " \| " OR "
	<condition> ::= <name> <operator> <term>
	<operator> ::= " = " \| " > " \| " >= " \| " < " \| " <= "
	<term> ::= <digit> \| <digit> "." <digit> \| <name>
	<letter> ::= [a-z]+ \| [A-Z]+
	<digit> ::= [1-9]+