Skip to content

Instantly share code, notes, and snippets.

@sonota88
Created February 27, 2011 14:54
Show Gist options
  • Save sonota88/846231 to your computer and use it in GitHub Desktop.
Save sonota88/846231 to your computer and use it in GitHub Desktop.
Emacs Lisp to count Japanese zenkaku/hankaku character mixed string.
(defun asciip (char-code)
(and (<= 32 char-code)
(<= char-code 126)))
;; (asciip ?a) ; => t
;; (asciip ?あ) ; => nil
;; see also: japan-util.el
;; char-code のリスト
(defconst japanese-hankaku-chars (string-to-list
"アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンァィゥェォッャュョ、。・゙゚`ー「」"))
;; その他に半角として扱う文字があればここに入れる
;; char-code のリスト
(defvar misc-hankaku-chars (string-to-list
""))
(defun japanese-hankaku-p (char-code)
(memq char-code
japanese-hankaku-chars))
(defun misc-hankaku-p (char-code)
(memq char-code
misc-hankaku-chars))
;; (japanese-hankaku-p ?ョ) ; => nil
;; (japanese-hankaku-p ?」) ; => (65379)
(defun zenkakup (char-code)
(and (not (asciip char-code))
(not (japanese-hankaku-p char-code))
(not (misc-hankaku-p char-code))))
;; (zenkakup ?a) ; => nil
;; (zenkakup ?あ) ; => t
(defun count-chars-as-zenkaku-in-string (str)
(let ((count 0)
(zenkaku-count 0)
char-code)
(while (< count (length str))
(setq char-code (aref str count))
(setq zenkaku-count
(+ zenkaku-count
(if (zenkakup char-code) 1 0.5)))
(setq count (1+ count)))
;; zenkaku-count
(ceiling zenkaku-count) ; 切り上げ
))
;; (count-chars-as-zenkaku-in-string "") ; => 0 (#o0, #x0)
;; (count-chars-as-zenkaku-in-string "a") ; => 1 (#o1, #x1)
;; (count-chars-as-zenkaku-in-string "ab") ; => 1 (#o1, #x1)
;; (count-chars-as-zenkaku-in-string "abc") ; => 2 (#o2, #x2)
;; (count-chars-as-zenkaku-in-string "123") ; => 2 (#o2, #x2)
;; (count-chars-as-zenkaku-in-string "a b c ") ; => 3 (#o3, #x3)
;; (count-chars-as-zenkaku-in-string "abあ") ; => 2 (#o2, #x2)
;; (count-chars-as-zenkaku-in-string "abcあ") ; => 3 (#o3, #x3)
;; (count-chars-as-zenkaku-in-string "ab(あ[い]う)") ; => 6 (#o6, #x6)
;; (count-chars-as-zenkaku-in-string "ジャンパー") ; => 4 (#o4, #x4)
(defun count-chars-as-zenkaku (begin end)
(let ((pos begin)
(count 0)
(zenkaku-count 0))
(while (< pos end)
(setq zenkaku-count
(+ zenkaku-count
(cond
((eq (char-after pos) ?\n)
0)
((zenkakup (char-after pos))
1)
(t
0.5)
)
))
(setq pos (1+ pos)))
;; zenkaku-count
(ceiling zenkaku-count) ; 切り上げ
))
(defun count-japanese-chars (begin end)
(interactive "r")
(message "%d" (count-chars-as-zenkaku begin end)))
;; (global-set-key (kbd "C-M-=") 'count-japanese-chars)
;; (defun count-lines-and-chars ()
;; (if mark-active
;; (format "%d jchars"
;; (count- (region-beginning) (region-end))
;; )
;; ""))
;; (add-to-list 'default-mode-line-format
;; '(:eval (count-lines-and-chars)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment