Last active
December 26, 2015 10:08
-
-
Save shhyou/7134097 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
(require parser-tools/lex | |
(prefix-in : parser-tools/lex-sre) ; prefix RE operators by `:` | |
racket/match | |
racket/format) | |
;;;;;;;;;;;;;;; regular expressions ;;;;;;;;;;;;;;; | |
(define-lex-abbrevs | |
(:letter (:or (char-range "A" "Z") (char-range "a" "z"))) | |
(:digit10 (char-range "0" "9"))) | |
; comments and whitespaces | |
(define-lex-abbrevs | |
(:comment-start (:: "/*" (complement (:: any-string "*/" any-string)))) | |
(:comment (:: :comment-start "*/")) | |
(:whitespace (:or " " "\t" "\r" "\n" "\f" "\v"))) | |
; identifiers and reserved words | |
(define-lex-abbrevs | |
(:identifier (:: :letter (:* (:or :letter :digit10 "_"))))) | |
; numeric tokens | |
(define-lex-abbrevs | |
(:integer (:+ :digit10)) | |
(:exponent-part (:: (:or "e" "E") | |
(:? "+" "-") | |
:integer)) | |
(:float (:or (:: (:or (:: (:+ :digit10) "." (:* :digit10)) | |
(:: (:* :digit10) "." (:+ :digit10))) | |
(:? :exponent-part)) | |
(:: :integer :exponent-part)))) | |
; string | |
(define-lex-abbrevs | |
(:string-characters (:& any-char (:~ #\" #\\ #\newline))) ; `:~` for character complement | |
(:escape-sequence (:or "\\t" "\\r" "\\n" "\\f" "\\v" "\\\\" "\\\"")) | |
(:string (:: "\"" (:* (:or :string-characters :escape-sequence)) "\""))) | |
; symbols | |
(define-lex-abbrevs | |
(:sym-arith (:or "+" "-" "*" "/")) | |
(:sym-rel (:or "<" ">" ">=" "<=" "!=" "==")) | |
(:sym-logic (:or "||" "&&" "!")) | |
(:sym-set "=") | |
(:sym-sep (:or "{" "}" "[" "]" "(" ")" ";" "," "."))) | |
(define c---lexer | |
(lexer | |
(:comment `(comment ,lexeme)) | |
(:comment-start (error 'c---lexer | |
(format "Non-terminating comment at ~a:~a : ~a\n" | |
(position-line start-pos) (position-col start-pos) lexeme))) | |
(:whitespace (c---lexer input-port)) ; recursion: strip whitespaces | |
(:integer `(int ,lexeme)) | |
(:float `(float ,lexeme)) | |
(:string `(string ,lexeme)) | |
(:identifier `(identifier ,(string->symbol lexeme))) | |
(:sym-arith `(symbol arithmetic ,lexeme)) | |
(:sym-rel `(symbol relational ,lexeme)) | |
(:sym-logic `(symbol logical ,lexeme)) | |
(:sym-set `(symbol assignment =)) | |
(:sym-sep `(symbol separator ,lexeme)) | |
((eof) 'eof))) | |
;;;;;;;;;;;;;;;;;;;;; main ;;;;;;;;;;;;;;;;;;;;;;; | |
(define reserved-words | |
'(return typedef if else int float for struct union void while)) | |
(define do-lex | |
(lambda (str-in) | |
(letrec | |
[(sym-tbl (make-hash)) | |
(loop! | |
(lambda () | |
(let [(tok (c---lexer str-in))] | |
(cond | |
[(eq? tok 'eof) | |
#f] | |
[else | |
(match tok | |
[`(comment ,c) | |
(display c) (newline)] | |
[`(identifier ,vid) | |
(hash-set! sym-tbl vid (+ 1 (hash-ref sym-tbl vid 0)))] | |
[_ #f]) | |
(loop!)]))))] | |
(loop!) | |
(sort (filter (lambda (itm) (not (memq (car itm) reserved-words))) (hash->list sym-tbl)) | |
(lambda (a b) (string<? (symbol->string (car a)) (symbol->string (car b)))))))) | |
(define main^ | |
(lambda (argv) | |
(let* [(input (if (null? argv) (current-input-port) (open-input-file (car argv)))) | |
(tt (port-count-lines! input)) | |
(freq-lst (do-lex input))] | |
(define print-freq | |
(lambda (xs) | |
(display (~a (car xs) #:min-width 25)) | |
(display (format "~a\n" (cdr xs))))) | |
(display "\nFrequency of identifiers:\n") | |
(for-each print-freq freq-lst)))) | |
(define main | |
(main^ (vector->list (current-command-line-arguments)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment