Created
September 1, 2011 08:14
-
-
Save llibra/1185691 to your computer and use it in GitHub Desktop.
Sunday Quick Search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; char-code-limit - 1の大きさの配列を表として使うバージョン | |
;; 0x10ffffという巨大な表を作るため、空間効率が非常に悪い | |
;; 表を外部に出せば、同じパターンを繰り返し検索する場合には効果的と思われるが、 | |
;; 検索する回数が少ないと元を取れない | |
(defun quick-search/array (string-x string-y) | |
(declare (optimize speed (debug 0) (safety 0)) | |
(type simple-string string-x string-y)) | |
(let* ((length-x (length string-x)) | |
(length-y (length string-y)) | |
(boundary (- length-y length-x))) | |
(declare (type fixnum length-x length-y)) | |
(labels ((compare (start) | |
(dotimes (n length-x t) | |
(unless (eql (aref string-x n) (aref string-y (+ start n))) | |
(return nil)))) | |
(make-shift-table () | |
(do ((n 0 (1+ n)) | |
(table (make-array #.(1- char-code-limit) | |
:element-type 'fixnum | |
:initial-element (1+ length-x)))) | |
((= n length-x) table) | |
(setf (aref table (char-code (aref string-x n))) | |
(- length-x n)))) | |
(shift-length (char table) | |
(aref table (char-code char)))) | |
(do* ((table (make-shift-table)) | |
(n 0 (+ n (shift-length (aref string-y next) table))) | |
(next (+ n length-x) (+ n length-x))) | |
((> n boundary) nil) | |
(declare (type fixnum n)) | |
(when (compare n) (return n)))))) | |
;; ハッシュテーブルを表として使うバージョン | |
;; メモリ消費量は少ないが、実行時のハッシュテーブル参照にコストがかかる | |
(defun quick-search/hash (string-x string-y) | |
(declare (optimize speed (debug 0) (safety 0)) | |
(type simple-string string-x string-y)) | |
(let* ((length-x (length string-x)) | |
(length-y (length string-y)) | |
(boundary (- length-y length-x))) | |
(declare (type fixnum length-x length-y)) | |
(labels ((compare (start) | |
(dotimes (n length-x t) | |
(unless (eql (aref string-x n) (aref string-y (+ start n))) | |
(return nil)))) | |
(make-shift-table () | |
(do ((n 0 (1+ n)) | |
(table (make-hash-table))) | |
((= n length-x) table) | |
(setf (gethash (char-code (aref string-x n)) table) | |
(- length-x n)))) | |
(shift-length (char table) | |
(let* ((code (char-code char)) | |
(value (gethash code table))) | |
(the fixnum (if value value (1+ length-x)))))) | |
(do* ((table (make-shift-table)) | |
(n 0 (+ n (shift-length (aref string-y next) table))) | |
(next (+ n length-x) (+ n length-x))) | |
((> n boundary) nil) | |
(declare (type fixnum n)) | |
(when (compare n) (return n)))))) | |
;; 最下位オクテットだけに注目した表を作るバージョン | |
;; 配列を使うが、大きさは256なのでメモリ消費が少なく、参照のコストも低い | |
;; シフトするときに本来より移動量が少なくなる可能性がある折衷案 | |
(defun quick-search/fuzzy (string-x string-y) | |
(declare (optimize speed (debug 0) (safety 0)) | |
(type simple-string string-x string-y)) | |
(let* ((length-x (length string-x)) | |
(length-y (length string-y)) | |
(boundary (- length-y length-x))) | |
(declare (type fixnum length-x length-y)) | |
(labels ((compare (start) | |
(dotimes (n length-x t) | |
(unless (eql (aref string-x n) (aref string-y (+ start n))) | |
(return nil)))) | |
(lowest-octet (i) | |
(logand i #xff)) | |
(make-shift-table () | |
(do ((n 0 (1+ n)) | |
(table (make-array 256 :element-type 'fixnum | |
:initial-element (1+ length-x)))) | |
((= n length-x) table) | |
(let ((code (char-code (aref string-x n)))) | |
(setf (aref table (lowest-octet code)) | |
(- length-x n))))) | |
(shift-length (char table) | |
(aref table (lowest-octet (char-code char))))) | |
(do* ((table (make-shift-table)) | |
(n 0 (+ n (shift-length (aref string-y next) table))) | |
(next (+ n length-x) (+ n length-x))) | |
((> n boundary) nil) | |
(declare (type fixnum n)) | |
(when (compare n) (return n)))))) | |
;; 総当たり | |
(defun brute-force (string-x string-y) | |
(declare (optimize speed (debug 0) (safety 0)) | |
(type simple-string string-x string-y)) | |
(let* ((length-x (length string-x)) | |
(length-y (length string-y)) | |
(boundary (- length-y length-x))) | |
(declare (type fixnum length-x length-y)) | |
(labels ((compare (start) | |
(dotimes (n length-x t) | |
(unless (eql (aref string-x n) (aref string-y (+ start n))) | |
(return nil))))) | |
(dotimes (n (1+ boundary) nil) | |
(when (compare n) (return n)))))) | |
;; SBCL 1.0.50での計測結果 | |
;; 郵便番号データ(http://www.post.japanpost.jp/zipcode/download.html) | |
;; UTF-8/LFに変換したものをサンプルデータに使う | |
(defparameter *csv* | |
(with-open-file (s "13tokyo.csv" :external-format :utf-8) | |
(let ((buf (make-string (file-length s)))) | |
(read-sequence buf s) | |
buf))) | |
;; 短い文字列の検索 | |
(progn | |
(time (loop for n below 100 | |
maximize (quick-search/array "三宅島" *csv*))) | |
(time (loop for n below 100 | |
maximize (quick-search/hash "三宅島" *csv*))) | |
(time (loop for n below 100 | |
maximize (quick-search/fuzzy "三宅島" *csv*))) | |
(time (loop for n below 100 | |
maximize (brute-force "三宅島" *csv*))) | |
(time (loop for n below 100 | |
maximize (search "三宅島" *csv*)))) | |
#| | |
Evaluation took: | |
1.015 seconds of real time | |
0.984375 seconds of total run time (0.703125 user, 0.281250 system) | |
[ Run times consist of 0.310 seconds GC time, and 0.675 seconds non-GC time. ] | |
96.95% CPU | |
2,375,367,484 processor cycles | |
445,645,600 bytes consed | |
Evaluation took: | |
0.563 seconds of real time | |
0.546875 seconds of total run time (0.546875 user, 0.000000 system) | |
97.16% CPU | |
1,333,861,536 processor cycles | |
65,464 bytes consed | |
Evaluation took: | |
0.125 seconds of real time | |
0.125000 seconds of total run time (0.125000 user, 0.000000 system) | |
100.00% CPU | |
285,249,090 processor cycles | |
95,952 bytes consed | |
Evaluation took: | |
0.312 seconds of real time | |
0.312500 seconds of total run time (0.312500 user, 0.000000 system) | |
100.00% CPU | |
722,638,280 processor cycles | |
0 bytes consed | |
Evaluation took: | |
1.391 seconds of real time | |
1.390625 seconds of total run time (1.390625 user, 0.000000 system) | |
100.00% CPU | |
3,223,313,660 processor cycles | |
0 bytes consed | |
|# | |
;; 長い文字列の検索 | |
(progn | |
(time (loop for n below 100 | |
maximize (quick-search/array "江戸川(1〜3丁目、4丁目1〜14番)" | |
*csv*))) | |
(time (loop for n below 100 | |
maximize (quick-search/hash "江戸川(1〜3丁目、4丁目1〜14番)" | |
*csv*))) | |
(time (loop for n below 100 | |
maximize (quick-search/fuzzy "江戸川(1〜3丁目、4丁目1〜14番)" | |
*csv*))) | |
(time (loop for n below 100 | |
maximize (brute-force "江戸川(1〜3丁目、4丁目1〜14番)" | |
*csv*))) | |
(time (loop for n below 100 | |
maximize (search "江戸川(1〜3丁目、4丁目1〜14番)" *csv*)))) | |
#| | |
Evaluation took: | |
0.953 seconds of real time | |
0.953125 seconds of total run time (0.703125 user, 0.250000 system) | |
[ Run times consist of 0.340 seconds GC time, and 0.614 seconds non-GC time. ] | |
100.00% CPU | |
2,242,954,602 processor cycles | |
445,645,600 bytes consed | |
Evaluation took: | |
0.094 seconds of real time | |
0.093750 seconds of total run time (0.093750 user, 0.000000 system) | |
100.00% CPU | |
202,415,262 processor cycles | |
65,400 bytes consed | |
Evaluation took: | |
0.015 seconds of real time | |
0.015625 seconds of total run time (0.015625 user, 0.000000 system) | |
106.67% CPU | |
42,310,968 processor cycles | |
96,216 bytes consed | |
Evaluation took: | |
0.172 seconds of real time | |
0.171875 seconds of total run time (0.171875 user, 0.000000 system) | |
100.00% CPU | |
406,886,326 processor cycles | |
0 bytes consed | |
Evaluation took: | |
0.828 seconds of real time | |
0.812500 seconds of total run time (0.812500 user, 0.000000 system) | |
98.07% CPU | |
1,931,473,950 processor cycles | |
0 bytes consed | |
|# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment