Created
May 16, 2012 10:33
-
-
Save naoyat/2709356 to your computer and use it in GitHub Desktop.
正規表現とオートマトン
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(use srfi-1) ;; cons* -> Gauche の組み込み list* でも可 | |
(define *ullman* #f) ;; Ullman先生方式なら#t | |
(define *verbose* #f) | |
;;; Finite Automata | |
(define (FA start finals states transitions) | |
(list 'FA start finals states transitions)) | |
(define (FA? f) (eq? 'FA (car f))) | |
(define (fa-start-state f) (second f)) | |
(define (fa-final-states f) (third f)) | |
(define (fa-states f) (fourth f)) | |
(define (fa-transitions f) (fifth f)) | |
(define (fa-prettyprint fa name) | |
(format #t "Finite Automaton ~a:\n" name) | |
(format #t " states: ~a\n" (fa-states fa)) | |
(format #t " start state: ~a\n" (fa-start-state fa)) | |
(format #t " final states: ~a\n" (fa-final-states fa)) | |
(format #t " transitions:\n") | |
(for-each (lambda (tr) | |
(format #t " ") | |
(trans-prettyprint tr)) | |
(fa-transitions fa))) | |
(define (fa->dot fa name) | |
(format #t "digraph \"~a\" {\n" name) | |
(format #t " graph [ rankdir = LR ];\n") | |
(format #t " node [ fontname = \"Courier\", fontsize = 14, shape = circle, width = 0.3, height = 0.3, margin = 0.01 ];\n") | |
(format #t " edge [ fontname = \"Courier\", color = black, weight = 1 ];\n") | |
(format #t " start [ shape = plaintext ];\n") | |
(format #t " start -> ~a;\n" (fa-start-state fa)) | |
(let1 finals (fa-final-states fa) | |
(dolist (st finals) | |
(format #t " ~a [ shape = circle, peripheries = 2 ];\n" st))) | |
(dolist (tr (fa-transitions fa)) | |
(let ([state1 (trans-state1 tr)] | |
[state2 (trans-state2 tr)] | |
[input (trans-input tr)]) | |
(when (eq? 'eps input) (set! input "ϵ")) | |
(format #t " ~a -> ~a [ label = \"~a\" ];\n" state1 state2 input))) | |
(format #t "}\n")) | |
(define (fa-draw fa label imgfile) | |
(let* ([match (rxmatch #/([^.]+)\.(gif|png|ps|svg)/ imgfile)] | |
[dotfile (format "~a.dot" (match 1))] | |
[suffix (match 2)]) | |
(with-output-to-file dotfile | |
(lambda () (fa->dot fa label))) | |
(sys-system (format "dot -T~a ~a -o ~a\n" suffix dotfile imgfile)) | |
(sys-system (format "open ~a" imgfile)))) | |
;;; States | |
(define (genstate) (gensym)) ;; uniqueなステート番号を振ってくれる何か | |
;;; Transitions | |
(define (Trans state1 input state2) | |
(cons state1 (cons input state2))) | |
(define (trans-state1 tr) (car tr)) | |
(define (trans-input tr) (cadr tr)) | |
(define (trans-state2 tr) (cddr tr)) | |
(define (trans-prettyprint tr) | |
(let ([state1 (trans-state1 tr)] | |
[state2 (trans-state2 tr)] | |
[input (trans-input tr)]) | |
(if (eq? 'eps input) | |
(format #t "~a + ϵ -> ~a\n" state1 state2) | |
(format #t "~a + '~a' -> ~a\n" state1 input state2)))) | |
;; single input | |
(define (Single input) | |
(let ([start (genstate)] | |
[final (genstate)]) | |
(let1 tr (Trans start input final) | |
(FA start (list final) (list start final) (list tr))))) | |
;; concatenation (AB..) | |
(define (Concat . faa) | |
(let* ([fa1 (car faa)] | |
[fa1-start (fa-start-state fa1)]) | |
(let loop ((last-finals (fa-final-states fa1)) | |
(states (fa-states fa1)) | |
(transitions (fa-transitions fa1)) | |
(rest (cdr faa))) | |
(if (null? rest) | |
(FA fa1-start last-finals states transitions) | |
(let* ([fa2 (car rest)] | |
[concat-trs (map (cut Trans <> 'eps (fa-start-state fa2)) last-finals)]) | |
(loop (fa-final-states fa2) | |
(append states (fa-states fa2)) | |
(append transitions concat-trs (fa-transitions fa2)) | |
(cdr rest))))))) | |
;; union (A + B + ..) | |
(define (Union . faa) | |
(let ([start (genstate)] | |
[final (genstate)]) | |
(let ([states+ | |
(append (list start) (append-map fa-states faa) (list final))] | |
[transitions+ | |
(append-map (lambda (fa) | |
(let ([s-> (Trans start 'eps (fa-start-state fa))] | |
[->f* (map (cut Trans <> 'eps final) (fa-final-states fa))]) | |
(append (list s->) (fa-transitions fa) ->f*))) | |
faa)]) | |
(FA start (list final) states+ transitions+)))) | |
(define (char-range from-char to-char) | |
(let ([from (char->integer from-char)] | |
[to (char->integer to-char)]) | |
(reverse! (map integer->char (iota (+ (- to from) 1) from))))) | |
;; Kleene star (A*) | |
(define (Kleene* a) | |
(let ([start (genstate)] | |
[final (genstate)] | |
[a-start (fa-start-state a)] | |
[a-finals (fa-final-states a)] | |
[states (fa-states a)] | |
[transitions (fa-transitions a)]) | |
(let ([start->final (Trans start 'eps final)] | |
[start->a-start (Trans start 'eps a-start)] | |
[a-finals->final (map (cut Trans <> 'eps final) a-finals)] | |
[a-finals->a-start (map (cut Trans <> 'eps (if *ullman* a-start start)) a-finals)]) | |
(let ([states+ (cons* start final states)] | |
[transitions+ (append (list start->final start->a-start) | |
a-finals->final a-finals->a-start transitions)]) | |
(FA start (list final) states+ transitions+))))) | |
;; Kleene plus (A+) | |
(define (Kleene+ a) | |
(let ([start (genstate)] | |
[final (genstate)] | |
[a-start (fa-start-state a)] | |
[a-finals (fa-final-states a)] | |
[states (fa-states a)] | |
[transitions (fa-transitions a)]) | |
(let ([start->a-start (Trans start 'eps a-start)] | |
[a-finals->final (map (cut Trans <> 'eps final) a-finals)] | |
[a-finals->a-start (map (cut Trans <> 'eps (if *ullman* a-start start)) a-finals)]) | |
(let ([states+ (cons* start final states)] | |
[transitions+ (append (list start->a-start) | |
a-finals->final a-finals->a-start transitions)]) | |
(FA start (list final) states+ transitions+))))) | |
;; A? | |
(define (Kleene? a) | |
(let ([start (genstate)] | |
[final (genstate)] | |
[a-start (fa-start-state a)] | |
[a-finals (fa-final-states a)] | |
[states (fa-states a)] | |
[transitions (fa-transitions a)]) | |
(let ([start->final (Trans start 'eps final)] | |
[start->a-start (Trans start 'eps a-start)] | |
[a-finals->final (map (cut Trans <> 'eps final) a-finals)]) | |
(let ([states+ (cons* start final states)] | |
[transitions+ (append (list start->final start->a-start) | |
a-finals->final transitions)]) | |
(FA start (list final) states+ transitions+))))) | |
;; あるstateで入力inputを受けた時に進む先 | |
(define (fa-transitions-from-a-state fa state input) | |
(filter-map (lambda (tr) | |
(if (and (eq? (trans-state1 tr) state) | |
(eq? (trans-input tr) input)) | |
(trans-state2 tr) | |
#f)) | |
(fa-transitions fa))) | |
(define (fa-transitions-from-states fa states input) | |
(uniq (append-map (cut fa-transitions-from-a-state fa <> input) states))) | |
;; あるstateで受け付けるepsilon以外の入力 | |
(define (fa-non-eps-inputs-from-a-state fa state) | |
(filter-map (lambda (tr) | |
(if (and (eq? (trans-state1 tr) state) | |
(not (eq? (trans-input tr) 'eps))) | |
(trans-input tr) | |
#f)) | |
(fa-transitions fa))) | |
(define (fa-non-eps-inputs-from-states fa states) | |
(sort (uniq (append-map (cut fa-non-eps-inputs-from-a-state fa <>) states)))) | |
;; closure | |
(define (CL fa state) | |
(define (eps-trans state) (fa-transitions-from-a-state fa state 'eps)) | |
(define ht (make-hash-table 'eq?)) | |
(hash-table-put! ht state #t) | |
(let loop ((states (list state))) | |
(if (null? states) | |
(hash-table-keys ht) | |
(let1 new-states (remove (cut hash-table-exists? ht <>) | |
(append-map eps-trans states)) | |
(for-each (cut hash-table-put! ht <> #t) new-states) | |
(loop new-states))))) | |
(define (uniq ls . args) | |
(let* ([ht-type (if (null? args) 'eq? (car args))] | |
[ht (make-hash-table ht-type)]) | |
(for-each (cut hash-table-put! ht <> #t) ls) | |
(hash-table-keys ht))) | |
(define (CLs fa states) | |
(uniq (append-map (cut CL fa <>) states))) | |
;; NFA -> DFA 変換 | |
(define (NFA->DFA nfa) | |
(let ([start (fa-start-state nfa)] | |
[finals (fa-final-states nfa)] | |
[states (fa-states nfa)] | |
[transitions (fa-transitions nfa)] | |
[S (make-hash-table 'equal?)] ; state groups | |
[T (make-hash-table 'equal?)] ; transitions | |
[F (make-hash-table 'equal?)] ; final states | |
[visited (make-hash-table 'eq?)]) | |
(define (has-final? states) | |
(any (cut memq <> states) finals)) | |
(define (sub from* from-state) | |
(unless (hash-table-get visited from-state #f) | |
(hash-table-put! visited from-state #t) | |
(when *verbose* | |
(format #t " From states ~a:\n" from*)) | |
(let1 inputs (fa-non-eps-inputs-from-states nfa from*) | |
(unless (null? inputs) | |
(when *verbose* | |
(format #t " transition inputs from this closure: ~a\n" inputs)) | |
(for-each (lambda (in) | |
(let* ([to (fa-transitions-from-states nfa from* in)] | |
[to* (CLs nfa to)]) | |
(when *verbose* | |
(format #t " - ~a + ~a -> ~a\n" from* in to*)) | |
(let1 to-state (or (hash-table-get S to* #f) (genstate)) | |
(hash-table-put! S to* to-state) | |
(hash-table-put! T (Trans from-state in to-state) #t) | |
(when (has-final? to*) | |
(hash-table-put! F to-state #t)) | |
(sub to* to-state)))) | |
inputs))))) | |
(when *verbose* | |
(format #t "[NFA->DFA]\n")) | |
(let* ([D-start* (CLs nfa (list start))] | |
[D-start-state (genstate)]) | |
(when *verbose* | |
(format #t " CL(~a) = ~a\n" start D-start*)) | |
(hash-table-put! S D-start* D-start-state) | |
(when (has-final? D-start*) | |
(hash-table-put! F D-start-state #t)) | |
(sub D-start* D-start-state) | |
(let* ([D-final-states (hash-table-keys F)] | |
[D-states (hash-table-values S)] | |
[D-transitions (hash-table-keys T)]) | |
(when *verbose* | |
(hash-table-for-each S (lambda (k v) | |
(format #t " - new state ~a corresponds ~a\n" v k)))) | |
(FA D-start-state D-final-states D-states D-transitions))))) | |
(define (string->NFA str) | |
(let loop ((cs (string->list str)) (concat '()) (union '())) | |
(define (concatted) | |
(apply Concat (reverse! concat))) | |
(define (render) | |
(let1 u (reverse! (cons (concatted) union)) | |
(if (= 1 (length u)) | |
(car u) | |
(apply Union u)))) | |
(if (null? cs) (render) | |
(let1 c (car cs) | |
(cond [(eq? #\( c) | |
(receive (obj rest) (loop (cdr cs) '() '()) | |
(loop rest (cons obj concat) union))] | |
[(eq? #\) c) | |
(values (render) (cdr cs))] | |
[(eq? #\* c) | |
(loop (cdr cs) (list (Kleene* (concatted))) union)] | |
[(eq? #\+ c) | |
(loop (cdr cs) '() (cons (concatted) union))] | |
[else | |
(loop (cdr cs) (cons (Single (car cs)) concat) union)] ))))) | |
(define (regexp->NFA str) | |
(define (read-bracket cs) | |
(let loop ((cs cs) (chars '())) | |
(let1 c (car cs) | |
(cond [(eq? #\] c) | |
(values (apply Union (map Single (reverse! chars))) (cdr cs))] | |
[(eq? #\- c) | |
(if (null? chars) | |
(loop (cdr cs) (cons #\- chars)) | |
(loop (cddr cs) (append (char-range (car chars) (cadr cs)) | |
(cdr chars))))] | |
[else | |
(loop (cdr cs) (cons c chars))])))) | |
(let loop ((cs (string->list str)) (concat '()) (union '())) | |
(define (concatted) | |
(apply Concat (reverse! concat))) | |
(define (render) | |
(let1 u (reverse! (cons (concatted) union)) | |
(if (= 1 (length u)) | |
(car u) | |
(apply Union u)))) | |
(if (null? cs) (render) | |
(let1 c (car cs) | |
(cond [(eq? #\( c) | |
(receive (obj rest) (loop (cdr cs) '() '()) | |
(loop rest (cons obj concat) union))] | |
[(eq? #\) c) | |
(values (render) (cdr cs))] | |
[(eq? #\[ c) | |
(receive (obj rest) (read-bracket (cdr cs)) | |
(loop rest (cons obj concat) union))] | |
[(eq? #\* c) | |
(loop (cdr cs) (list (Kleene* (concatted))) union)] | |
[(eq? #\+ c) | |
(loop (cdr cs) (list (Kleene+ (concatted))) union)] | |
[(eq? #\? c) | |
(loop (cdr cs) (list (Kleene? (concatted))) union)] | |
[(eq? #\| c) | |
(loop (cdr cs) '() (cons (concatted) union))] | |
[else | |
(loop (cdr cs) (cons (Single (car cs)) concat) union)] ))))) | |
;; | |
;; sandbox | |
;; | |
(define a (Single #\a)) | |
(define b (Single #\b)) | |
(define c (Single #\c)) | |
(fa-prettyprint a "a") | |
(fa-prettyprint b "b") | |
(fa-prettyprint c "c") | |
(define a* (Kleene* a)) | |
(define a+ (Kleene+ a)) | |
(fa-prettyprint a* "a*") | |
(fa-prettyprint a+ "a+" ) | |
(fa-prettyprint (NFA->DFA a*) "a*") | |
(fa-prettyprint (NFA->DFA a+) "a+") | |
(define ab (Concat a b)) | |
(define a+b (Union a b)) | |
(fa-prettyprint ab "ab") | |
(fa-prettyprint a+b "a+b") | |
(define abc (Concat a b c)) | |
(define a+b+c (Union a b c)) | |
(fa-prettyprint abc "abc") | |
(fa-prettyprint a+b+c "a+b+c") | |
(fa-prettyprint (NFA->DFA a+b+c) "a+b+c") | |
(fa-draw a* "a*" "a-star.gif") | |
(fa-draw a+ "a+" "a-plus.gif") | |
(fa-draw abc "abc" "abc.gif") | |
(fa-draw a+b+c "a+b+c" "a+b+c.gif") | |
(fa-draw (NFA->DFA a*) "DFA for a*" "a-star-dfa.gif") | |
(fa-draw (NFA->DFA a+) "DFA for a+" "a-plus-dfa.gif") | |
(fa-draw (NFA->DFA abc) "DFA for abc" "abc-dfa.gif") | |
(fa-draw (NFA->DFA a+b+c) "DFA for a+b+c" "a+b+c-dfa.gif") | |
(define _a+b+c_* (Kleene* a+b+c)) | |
(define _a+b+c_+ (Kleene+ a+b+c)) | |
(fa-draw _a+b+c_* "(a+b+c)*" "a+b+c-star.gif") | |
(fa-draw (NFA->DFA _a+b+c_*) "DFA for (a+b+c)*" "a+b+c-star-dfa.gif") | |
(fa-draw _a+b+c_+ "(a+b+c)+" "a+b+c-plus.gif") | |
(fa-draw (NFA->DFA _a+b+c_+) "DFA for (a+b+c)+" "a+b+c-plus-dfa.gif") | |
(define rx1 (Concat (Kleene* (Union (Single 1) (Single 0))) | |
(Single 1))) ;; "(1+0)*1" | |
(fa-draw rx1 "(1+0)*1" "rx1.gif") | |
(fa-draw (NFA->DFA rx1) "DFA for (1+0)*1" "rx1-dfa.gif") | |
(define rx2 (Concat (Kleene* (Union (Single 1) (Single 0))) | |
(Single 1) | |
(Kleene* (Union (Single 1) (Single 0))))) ;; "(1+0)*1(1+0)*" | |
(fa-draw rx2 "(1+0)*1(1+0)*" "rx2.gif") | |
(fa-draw (NFA->DFA rx2) "DFA for (1+0)*1(1+0)*" "rx2-dfa.gif") | |
(fa-draw (string->NFA "abc") "abc" "abc.gif") | |
(fa-draw (string->NFA "a*b") "a*b" "a-star-b.gif") | |
(fa-draw (string->NFA "(a+b)(c+d)") "(a+b)(c+d)" "abcd.gif") | |
(fa-draw (string->NFA "(a+b)(c+d)") "(a+b)(c+d)" "abcd.gif") | |
(fa-draw (NFA->DFA (string->NFA "(a+b)(c+d)")) "DFA for (a+b)(c+d)" "abcd-dfa.gif") | |
(fa-draw (string->NFA "(a+b)*c(d+e)") "(a+b)*c(d+e)" "abcde1.gif") | |
(fa-draw (NFA->DFA (string->NFA "(ab+cd)*ef(gh+ij)")) "(ab+cd)*ef(gh+ij)" "abcde2.gif") | |
(fa-draw (regexp->NFA "abc") "abc" "abc.gif") | |
(fa-draw (regexp->NFA "a*b") "a*b" "a-star-b.gif") | |
(fa-draw (regexp->NFA "[ab][cd]") "[ab][cd]" "abcd.gif") | |
(fa-draw (regexp->NFA "[a-z]?") "[a-z]?" "a-z.gif") | |
(fa-draw (regexp->NFA "[ab]*c[de]") "[ab]*c[de]" "abcde1.gif") | |
(fa-draw (NFA->DFA (regexp->NFA "(ab|cd)*ef(gh|ij)")) "(ab|cd)*ef(gh|ij)" "abcde2.gif") | |
(set! *ullman* #t) | |
(fa-draw (string->NFA "a*") "a*" "a-star-ullman.gif") | |
(set! *ullman* #f) | |
(fa-draw (string->NFA "a*") "a*" "a-star-aiken.gif") | |
(fa-draw (string->NFA "(a+b)*c") "(a+b)*c" "abc1.gif") | |
(fa-draw (regexp->NFA "[ab]*c") "[ab]*c" "abc2.gif") | |
(fa-draw (regexp->NFA "[ab][c-e]") "[ab][c-e]" "abc-e.gif") | |
(fa-draw (NFA->DFA (regexp->NFA "[ab][c-e]")) "DFA for [ab][c-e]" "abc-e-dfa.gif") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment