Last active
March 28, 2021 10:42
-
-
Save burakemir/541c709e570ea770bf21f855034c1dba to your computer and use it in GitHub Desktop.
Typed Racket adaptation of Push-model SQL interpreter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
YEAR | DESCRIPTION | |
---|---|---|
1968 | Elefants Druk and Chukha arrive in Zurich Zoo | |
1976 | Snow leopards move to a new area | |
1979 | Zoo Zurich has 21 millionth visitor | |
1979 | on occasion of 50th anniversary entry fee reduced to 1 CHF | |
1985 | first PC installed |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang typed/racket | |
;; This code is the query-plan language and definitional interpreter from | |
;; Section 2 of Tiark Rompf, Nada Amin (2019). | |
;; A SQL to C compiler in 500 lines of code. | |
;; Journal of Functional Programming, 29, E9. doi:10.1017/S0956796819000054 | |
(require/typed csv-reading [make-csv-reader-maker (-> Any (-> Input-Port (-> (Listof String))))]) | |
(define-type Fields (Listof String)) | |
(define-type Schema (Listof String)) | |
(struct Record ([fields : Fields] [schema : Schema]) #:transparent) | |
(define-type Ref (U Field Value)) | |
(struct Field ([name : String]) #:transparent) | |
(struct Value ([val : String]) #:transparent) | |
(define-type Predicate (U Eq Ne)) | |
(struct Eq ([a : Ref] [b : Ref]) #:transparent) | |
(struct Ne ([a : Ref] [b : Ref]) #:transparent) | |
(define-type Operator (U Scan Print Project Filter Join HashJoin Group)) | |
(struct Scan ([filename : String])) | |
(struct Print ([parent : Operator])) | |
(struct Project ([out : Schema] [in : Schema] [parent : Operator])) | |
(struct Filter ([p : Predicate] [parent : Operator])) | |
(struct Join ([parent1 : Operator] [parent2 : Operator])) | |
(struct SemiJoin ([parent1 : Operator] [parent2 : Operator])) | |
(struct AntiJoin ([parent1 : Operator] [parent2 : Operator])) | |
;; Group and HashJoin from Section 4 are left as an exercise. | |
(struct HashJoin ([parent1 : Operator] [parent2 : Operator])) | |
(struct Group ([keys : Schema] [agg : Schema] [parent : Operator])) | |
(define csv-reader | |
(make-csv-reader-maker | |
'((separator-chars #\,) | |
(comment-chars #\#) | |
(strip-leading-whitespace? . #t) | |
(strip-trailing-whitespace? . #t)))) | |
(: process-csv (-> String (-> Record Void) Void)) | |
(define (process-csv filename yld) | |
(define next-row (csv-reader (open-input-file filename))) | |
(let ([schema (next-row)]) | |
(let loop ([row (next-row)]) | |
(cond | |
[(not (empty? row)) (yld (Record row schema)) (loop (next-row))])))) | |
(: lookup (-> Schema String Index)) | |
(define (lookup in fname) | |
(let ([res (index-of in fname)]) | |
(cond | |
[(false? res) (raise-arguments-error 'project-record | |
"field not found" | |
"field" fname | |
"schema" in)] | |
[(index? res) res]))) | |
(: project-record (-> Record Schema Record)) | |
(define (project-record r out) | |
(: mapschema (-> Schema Fields Fields)) | |
(define (mapschema in fields) | |
(map (λ: ([fname : String]) (list-ref fields (lookup in fname))) out)) | |
(let* ([in : Schema (Record-schema r)] | |
[fields : Fields (Record-fields r)] | |
[newfields : Fields (mapschema in fields)]) | |
(Record newfields out))) | |
(: eval-pred (-> Predicate Record Boolean)) | |
(define (eval-pred p r) | |
(: eval-ref (-> Ref String)) | |
(define (eval-ref ref) | |
(cond | |
[(Field? ref) (let ([index : Index (lookup (Record-schema r) (Field-name ref))]) | |
(list-ref (Record-fields r) index))] | |
[(Value? ref) (Value-val ref)])) | |
(cond | |
[(Eq? p) (equal? (eval-ref (Eq-a p)) (eval-ref (Eq-b p)))] | |
[(Ne? p) (not (equal? (eval-ref (Ne-a p)) (eval-ref (Ne-b p))))])) | |
(: join-info (-> Record Record (Values Schema Record Record))) | |
(define (join-info r1 r2) | |
(let* ([keys (set-intersect (Record-schema r1) (Record-schema r2))] | |
[vals1 (project-record r1 keys)] | |
[vals2 (project-record r2 keys)]) | |
(values keys vals1 vals2))) | |
(: exec (-> Operator (-> Record Void) Void)) | |
(define (exec op yld) | |
(cond | |
[(Scan? op) (process-csv (Scan-filename op) yld)] | |
[(Print? op) (exec (Print-parent op) | |
(λ (r) (writeln (Record-fields r))))] | |
[(Filter? op) (exec (Filter-parent op) | |
(λ (r) (cond [(eval-pred (Filter-p op) r) (yld r)])))] | |
[(Project? op) (exec (Project-parent op) | |
(λ (r) (yld (project-record r (Project-out op)))))] | |
[(Join? op) | |
(exec (Join-parent1 op) | |
(λ (r1) | |
(exec (Join-parent2 op) | |
(λ (r2) (let-values ([([keys : Schema] [vals1 : Record] [vals2 : Record]) | |
(join-info r1 r2)]) | |
(cond | |
[(equal? vals1 vals2) | |
(yld (Record (append (Record-fields r1) (Record-fields r2)) | |
(append (Record-schema r1) (Record-schema r2))))]) | |
)))))] | |
[(SemiJoin? op) | |
(exec (Join-parent1 op) | |
(λ (r1) | |
(exec (Join-parent2 op) | |
(λ (r2) | |
(let-values ([([keys : Schema] [vals1 : Record] [vals2 : Record]) | |
(join-info r1 r2)]) | |
(cond | |
[(equal? vals1 vals2) (yld r1)]))))))] | |
[(AntiJoin? op) | |
(exec (Join-parent1 op) | |
(λ (r1) | |
(exec (Join-parent2 op) | |
(λ (r2) | |
(let-values ([([keys : Schema] [vals1 : Record] [vals2 : Record]) | |
(join-info r1 r2)]) | |
(cond | |
[(not (equal? vals1 vals2)) | |
(yld r1)]))))))])) | |
(module+ test | |
(define q (Print (Scan "test.csv"))) | |
(define p (Print (Filter (Eq (Field "AUTHORS") (Value "Frederick P. Brooks")) (Scan "test.csv")))) | |
(define r (Print (Filter (Ne (Field "YEAR") (Value "1975")) (Scan "test.csv")))) | |
;; Source: https://www.zoo.ch/de/der-zoo-zuerich/historische-ereignisse | |
(define z (Print (Scan "events.csv"))) | |
(exec z void) | |
(define tmp '("AUTHORS" "YEAR" "TITLE" "YEAR" "DESCRIPTION")) | |
(define dst '("AUTHORS" "YEAR" "DESCRIPTION")) | |
(define example-join (Print (Project dst tmp (Join (Scan "test.csv") (Scan "events.csv"))))) | |
(exec example-join void) | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AUTHORS | YEAR | TITLE | |
---|---|---|---|
Herbert A. Simon | 1968 | The Sciences of the Artificial | |
Frederick P. Brooks | 1975 | The Mythical Man-Month | |
Joseph Weizenbaum | 1976 | Computer Power and Human Reason | |
Seymour Papert | 1980 | Mindstorms: Children, Computers and Powerful Ideas | |
Harold Abelson and Gerald Jay Sussman and Julie Sussman | 1979 | Structure and Interpretation of Computer Programs | |
Robin Milner and Robert Harper and David MacQueen and Mads Tofte | 1990 | The Definition of Standard ML (Revised Edition) | |
Matthias Felleisen and Robert Bruce Findler and Matthew Flatt and Shriram Krishnamurthi | 2001 | How to Design Programs (1st Edition) | |
Matthias Felleisen and Robert Bruce Findler and Matthew Flatt and Shriram Krishnamurthi | 2014 | How to Design Programs (2nd Edition) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment