Created
April 5, 2011 12:06
-
-
Save vishnuvyas/903480 to your computer and use it in GitHub Desktop.
A port of a simple grammar generator in PAIP from common lisp to clojure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns gramgen | |
{:doc | |
"GramGen generates sentences from a CFG. Comes with an inbuilt cfg - This is based on the program on | |
chapter 2, and it can be written much simpler, but this program is written the way it is because I wanted | |
to try out some new features of clojure. | |
to generate a random tree use the function generate-tree, which generates a random parse tree from | |
the grammar we have provided. If you want to use a more advanced grammar, then use the create grammar | |
function to create a complex grammars. Grammars are represented as seqences where the first of the | |
seqence is the LHS for the grammar and the rest are RHS choices for the grammar. If the rhs itself | |
consists of a sequence, it is considered a non-terminal and atomic items such as strings are considered | |
terminals. " } | |
(:use [clojure.contrib.str-utils :only [str-join]])) | |
(defprotocol ProductionRuleProtocol | |
"A simple protoocol implementing the production rule protocol - which has a lhs and rhs" | |
(lhs [this] "get the left-hand side of this production rule") | |
(some-rhs [this] | |
"randomly choose one right hand side from the right hand side available for this rule")) | |
(defrecord ProductionRule [l r] | |
ProductionRuleProtocol | |
(lhs [this] l) | |
(some-rhs [this] (rand-nth r))) | |
(defprotocol GrammarProtocol | |
"A Protocol that defines the grammar, which is a collection of associated production rules" | |
(add-rule [this rule] "adds a rule to this grammar") | |
(generate-tree [this lhs] "using the lhs, which is the starting point, generate a parse-tree") | |
(generate-sent [this lhs] | |
"same as generate tree, but just generates the sentence without any other nodes")) | |
(defrecord SimpleEnglishGrammar [rules] | |
GrammarProtocol | |
;; create a new version of the grammar by associating one more rule to this grammar. | |
(add-rule [this rule] | |
(SimpleEnglishGrammar. (assoc (:rules this) (lhs rule) rule))) | |
(generate-tree [this prodsym] | |
;; start generating a tree by recursively expanding the rule if our lhs | |
;; is of type seqence, otherwise just return one item from it. | |
(let [rule (get (:rules this) prodsym) rhs (some-rhs rule)] | |
(if (seq? rhs) | |
(map #(generate-tree this %) rhs) | |
[(lhs rule) rhs])) | |
) | |
(generate-sent [this prodsym] | |
;; start generating a tree by recursively expanding the rule if our lhs | |
;; is of type seqence, otherwise just return one item from it. | |
(let [rule (get (:rules this) prodsym) rhs (some-rhs rule)] | |
(if (seq? rhs) | |
(mapcat #(generate-sent this %) rhs) | |
[rhs])) | |
)) | |
(defn create-grammar | |
"create-grammar creates a bunch of rules from a sequence of pairs, first a keyword or symbol naming the | |
lhs and second a sequence of either seqs or non-terminals which serve as the lhs." | |
[rulelist] | |
(let [grammar (SimpleEnglishGrammar. {}) | |
production-rule #(ProductionRule. (first %) (rest %))] | |
(reduce #(add-rule %1 (production-rule %2)) grammar rulelist))) | |
(def *simple-grammar* | |
'((sent (np vp)) | |
(np (dt nn)) | |
(vp (vb np)) | |
(dt "the" "a") | |
(nn "man" "woman" "cat" "ball") | |
(vb "likes" "calls" "plays" "throws"))) | |
(defn rand-tree | |
"generate a tree from lhs" | |
[lhs] | |
(let [grammar (create-grammar *simple-grammar*)] | |
(generate-tree grammar lhs))) | |
(defn rand-sent [] | |
(let [grammar (create-grammar *simple-grammar*)] | |
(str-join \space (generate-sent grammar 'sent)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment