Skip to content

Instantly share code, notes, and snippets.

@shhyou
Last active December 26, 2015 10:08
Show Gist options
  • Save shhyou/7134097 to your computer and use it in GitHub Desktop.
Save shhyou/7134097 to your computer and use it in GitHub Desktop.
#lang racket
(require parser-tools/lex
(prefix-in : parser-tools/lex-sre) ; prefix RE operators by `:`
racket/match
racket/format)
;;;;;;;;;;;;;;; regular expressions ;;;;;;;;;;;;;;;
(define-lex-abbrevs
(:letter (:or (char-range "A" "Z") (char-range "a" "z")))
(:digit10 (char-range "0" "9")))
; comments and whitespaces
(define-lex-abbrevs
(:comment-start (:: "/*" (complement (:: any-string "*/" any-string))))
(:comment (:: :comment-start "*/"))
(:whitespace (:or " " "\t" "\r" "\n" "\f" "\v")))
; identifiers and reserved words
(define-lex-abbrevs
(:identifier (:: :letter (:* (:or :letter :digit10 "_")))))
; numeric tokens
(define-lex-abbrevs
(:integer (:+ :digit10))
(:exponent-part (:: (:or "e" "E")
(:? "+" "-")
:integer))
(:float (:or (:: (:or (:: (:+ :digit10) "." (:* :digit10))
(:: (:* :digit10) "." (:+ :digit10)))
(:? :exponent-part))
(:: :integer :exponent-part))))
; string
(define-lex-abbrevs
(:string-characters (:& any-char (:~ #\" #\\ #\newline))) ; `:~` for character complement
(:escape-sequence (:or "\\t" "\\r" "\\n" "\\f" "\\v" "\\\\" "\\\""))
(:string (:: "\"" (:* (:or :string-characters :escape-sequence)) "\"")))
; symbols
(define-lex-abbrevs
(:sym-arith (:or "+" "-" "*" "/"))
(:sym-rel (:or "<" ">" ">=" "<=" "!=" "=="))
(:sym-logic (:or "||" "&&" "!"))
(:sym-set "=")
(:sym-sep (:or "{" "}" "[" "]" "(" ")" ";" "," ".")))
(define c---lexer
(lexer
(:comment `(comment ,lexeme))
(:comment-start (error 'c---lexer
(format "Non-terminating comment at ~a:~a : ~a\n"
(position-line start-pos) (position-col start-pos) lexeme)))
(:whitespace (c---lexer input-port)) ; recursion: strip whitespaces
(:integer `(int ,lexeme))
(:float `(float ,lexeme))
(:string `(string ,lexeme))
(:identifier `(identifier ,(string->symbol lexeme)))
(:sym-arith `(symbol arithmetic ,lexeme))
(:sym-rel `(symbol relational ,lexeme))
(:sym-logic `(symbol logical ,lexeme))
(:sym-set `(symbol assignment =))
(:sym-sep `(symbol separator ,lexeme))
((eof) 'eof)))
;;;;;;;;;;;;;;;;;;;;; main ;;;;;;;;;;;;;;;;;;;;;;;
(define reserved-words
'(return typedef if else int float for struct union void while))
(define do-lex
(lambda (str-in)
(letrec
[(sym-tbl (make-hash))
(loop!
(lambda ()
(let [(tok (c---lexer str-in))]
(cond
[(eq? tok 'eof)
#f]
[else
(match tok
[`(comment ,c)
(display c) (newline)]
[`(identifier ,vid)
(hash-set! sym-tbl vid (+ 1 (hash-ref sym-tbl vid 0)))]
[_ #f])
(loop!)]))))]
(loop!)
(sort (filter (lambda (itm) (not (memq (car itm) reserved-words))) (hash->list sym-tbl))
(lambda (a b) (string<? (symbol->string (car a)) (symbol->string (car b))))))))
(define main^
(lambda (argv)
(let* [(input (if (null? argv) (current-input-port) (open-input-file (car argv))))
(tt (port-count-lines! input))
(freq-lst (do-lex input))]
(define print-freq
(lambda (xs)
(display (~a (car xs) #:min-width 25))
(display (format "~a\n" (cdr xs)))))
(display "\nFrequency of identifiers:\n")
(for-each print-freq freq-lst))))
(define main
(main^ (vector->list (current-command-line-arguments))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment