Created
September 6, 2012 22:50
-
-
Save ideamonk/3661015 to your computer and use it in GitHub Desktop.
Parsing English
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; Author: Abhishek Mishra <[email protected]> | |
; ; http://www.reddit.com/r/dailyprogrammer/comments/zfeb2/9062012_challenge_96_intermediate_parsing_english/ | |
; ; seven hundred and fourty-four million | |
; (-> 7 (* 100) (+ 40 4) (* 1000000) ) | |
; ; ten-million and ninety-four | |
; (-> 10 (* 1000000) (+ 90 4) ) | |
; ; One-Thousand and Thirty-Four | |
; (-> 1 (* 1000) (+ 30 4) ) | |
; ; Two -Billion and One-Hundred-Fourty-Five-Thousand | |
; (-> 2 (* 1000000000) (+ (-> 1 (* 100) (+ 40 5) (* 1000) ) ) ) | |
; (-> 2 (* 1000000000) (+ (-> 1 (* 100) (+ 40) (+ 5) (* 1000)))) | |
; ; eleven-thousand and eight-hundred | |
; (-> 11 (* 1000) (+ (-> 8 (* 100)) ) ) | |
; ; three-thousand and twenty | |
; (-> 3 (* 1000) (+ (-> 20))) | |
; assuming multipliers have locality indicated with '-' | |
; e.g. hundred and twenty-thousand means: 100 and 20,000 | |
; hundred and twenty thousand means: (100 and 20) thousands | |
(def mapping { | |
:zero " (+ 0) " | |
:one " (+ 1) " | |
:two " (+ 2) " | |
:three " (+ 3) " | |
:four " (+ 4) " | |
:five " (+ 5) " | |
:six " (+ 6) " | |
:seven " (+ 7) " | |
:eight " (+ 8) " | |
:nine " (+ 9) " | |
:ten " (+ 10) " | |
:eleven " (+ 11) " | |
:twelve " (+ 12) " | |
:thirteen " (+ 13) " | |
:fourteen " (+ 14) " | |
:fifteen " (+ 15) " | |
:sixteen " (+ 16) " | |
:seventeen " (+ 17) " | |
:eighteen " (+ 18) " | |
:nineteen " (+ 19) " | |
:twenty " (+ 20) " | |
:thirty " (+ 30) " | |
:forty " (+ 40) " | |
:fifty " (+ 50) " | |
:sixty " (+ 60) " | |
:seventy " (+ 70) " | |
:eighty " (+ 80) " | |
:ninety " (+ 90) " | |
; multipliers - | |
:hundred " (* 100) " | |
:thousand " (* 1000) " | |
:million " (* 1000000) " | |
:billion " (* 1000000000) " | |
:trillion " (* 1000000000000) " | |
}) | |
(defn parse [l] | |
(let [f (first l)] | |
(cond | |
(= f "and") (concat ["(+ "] (parse [(first (rest l))]) [") "] (parse (rest (rest l))) ) | |
(not (empty? l)) (concat ["(-> "] (map str (map mapping (map keyword (re-seq #"\w+" f)))) (parse (rest l)) [")"]) | |
))) | |
(defn gimme-number [s] | |
(load-string (apply str (parse (re-seq #"[a-z-]+" (.toLowerCase s)))))) | |
(gimme-number "Two-Million and One-Hundred-Forty-Five Thousand") | |
; 2000145000 | |
(gimme-number "Forty-Five-Hundred") | |
; 4500 | |
(gimme-number "Forty-Five-Hundred and ninety-eight") | |
; 4598 | |
(gimme-number "three-thousand and twenty") | |
; 3020 | |
(gimme-number "One-Thousand and Thirty-Four") | |
; 1034 | |
(gimme-number "Ten-Million and Ninety-Four") | |
; 10000094 | |
(gimme-number "Seven-Hundred and Forty-Four Million") | |
; 744000000 | |
(gimme-number "Five-hundred and fifty-five million and four-hundred and forty thousand and twenty-five") | |
;555000440025 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment