Created
February 27, 2011 14:54
-
-
Save sonota88/846231 to your computer and use it in GitHub Desktop.
Emacs Lisp to count Japanese zenkaku/hankaku character mixed string.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defun asciip (char-code) | |
(and (<= 32 char-code) | |
(<= char-code 126))) | |
;; (asciip ?a) ; => t | |
;; (asciip ?あ) ; => nil | |
;; see also: japan-util.el | |
;; char-code のリスト | |
(defconst japanese-hankaku-chars (string-to-list | |
"アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンァィゥェォッャュョ、。・゙゚`ー「」")) | |
;; その他に半角として扱う文字があればここに入れる | |
;; char-code のリスト | |
(defvar misc-hankaku-chars (string-to-list | |
"")) | |
(defun japanese-hankaku-p (char-code) | |
(memq char-code | |
japanese-hankaku-chars)) | |
(defun misc-hankaku-p (char-code) | |
(memq char-code | |
misc-hankaku-chars)) | |
;; (japanese-hankaku-p ?ョ) ; => nil | |
;; (japanese-hankaku-p ?」) ; => (65379) | |
(defun zenkakup (char-code) | |
(and (not (asciip char-code)) | |
(not (japanese-hankaku-p char-code)) | |
(not (misc-hankaku-p char-code)))) | |
;; (zenkakup ?a) ; => nil | |
;; (zenkakup ?あ) ; => t | |
(defun count-chars-as-zenkaku-in-string (str) | |
(let ((count 0) | |
(zenkaku-count 0) | |
char-code) | |
(while (< count (length str)) | |
(setq char-code (aref str count)) | |
(setq zenkaku-count | |
(+ zenkaku-count | |
(if (zenkakup char-code) 1 0.5))) | |
(setq count (1+ count))) | |
;; zenkaku-count | |
(ceiling zenkaku-count) ; 切り上げ | |
)) | |
;; (count-chars-as-zenkaku-in-string "") ; => 0 (#o0, #x0) | |
;; (count-chars-as-zenkaku-in-string "a") ; => 1 (#o1, #x1) | |
;; (count-chars-as-zenkaku-in-string "ab") ; => 1 (#o1, #x1) | |
;; (count-chars-as-zenkaku-in-string "abc") ; => 2 (#o2, #x2) | |
;; (count-chars-as-zenkaku-in-string "123") ; => 2 (#o2, #x2) | |
;; (count-chars-as-zenkaku-in-string "a b c ") ; => 3 (#o3, #x3) | |
;; (count-chars-as-zenkaku-in-string "abあ") ; => 2 (#o2, #x2) | |
;; (count-chars-as-zenkaku-in-string "abcあ") ; => 3 (#o3, #x3) | |
;; (count-chars-as-zenkaku-in-string "ab(あ[い]う)") ; => 6 (#o6, #x6) | |
;; (count-chars-as-zenkaku-in-string "ジャンパー") ; => 4 (#o4, #x4) | |
(defun count-chars-as-zenkaku (begin end) | |
(let ((pos begin) | |
(count 0) | |
(zenkaku-count 0)) | |
(while (< pos end) | |
(setq zenkaku-count | |
(+ zenkaku-count | |
(cond | |
((eq (char-after pos) ?\n) | |
0) | |
((zenkakup (char-after pos)) | |
1) | |
(t | |
0.5) | |
) | |
)) | |
(setq pos (1+ pos))) | |
;; zenkaku-count | |
(ceiling zenkaku-count) ; 切り上げ | |
)) | |
(defun count-japanese-chars (begin end) | |
(interactive "r") | |
(message "%d" (count-chars-as-zenkaku begin end))) | |
;; (global-set-key (kbd "C-M-=") 'count-japanese-chars) | |
;; (defun count-lines-and-chars () | |
;; (if mark-active | |
;; (format "%d jchars" | |
;; (count- (region-beginning) (region-end)) | |
;; ) | |
;; "")) | |
;; (add-to-list 'default-mode-line-format | |
;; '(:eval (count-lines-and-chars))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment