Created
November 25, 2009 13:22
-
-
Save kosh04/242697 to your computer and use it in GitHub Desktop.
iconv library for newLISP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; -*- mode:newlisp; coding:utf-8 -*- | |
;; @module iconv.lsp | |
;; @description Yet Another Iconv Library | |
;; @version 0.5 初版 | |
;; @version 0.6 Windows(DLL)でも使えるように | |
;; @version 0.7 関数を増やした | |
;; @version 0.8 変換後のNULL文字に対応したつもり | |
;; @version 0.8b newlisp_sjisでのバッファあふれ修正 | |
;; @version 0.8c Rename iconv-handler -> call-with-iconv-descriptor | |
;; @version 0.8d a few modified. | |
;; @version 0.9 SunOS 5.10 sparc にて動作テスト。 | |
;; ロード時にIconv:initを呼び出すように変更 | |
;; @version 0.9b Tested FreeBSD 8.1 | |
;; @version 0.9c Tested CYGWIN_NT-5.1 | |
;; @author KOBAYASHI Shigeru <shigeru.kb[at]gmail.com>, 2009-2011 | |
;; @location https://raw.github.com/gist/242697 | |
;; @example | |
;; (load "iconv.lsp") | |
;; ;(Iconv:init) | |
;; | |
;; $ echo -n 'おはよう、朝だよ!' | iconv -t EUC-JP > euc.txt | |
;; (write-file "euc.txt" (Iconv:encode "おはよう、朝だよ!" "EUC-JP")) | |
;; | |
;; $ cat euc.txt | iconv -f EUC-JP | |
;; (Iconv:decode (read-file "euc.txt") "EUC-JP") | |
;; => "おはよう、朝だよ!" | |
;; | |
;; (let ((str "\xa3\xb1\xa1\xdc\xa3\xb1\xa1\xe1\xa3\xb2")) | |
;; (Iconv:decode str "EUC-JP")) | |
;; => "1+1=2" | |
;; | |
;; (define (my-unicode str) | |
;; (Iconv:convert str "UTF-8" "UTF-32LE")) | |
;; (my-unicode "new") => "n\000\000\000e\000\000\000w\000\000\000" | |
;; | |
;; (define (my-utf8 str) | |
;; (Iconv:convert str "UTF-32LE" "UTF-8")) | |
;; (my-utf8 (unicode "new")) => "new\000" | |
;; (my-utf8 (my-unicode "new")) => "new" | |
;; @KnownBugs | |
;; 端末以外から利用すると正しく表示されないかもしれない | |
;; @TODO | |
;; (! "iconv --list") list all known coded character sets | |
;; メモリ不足を避けるために分割して変換する関数も欲しい | |
;; 変換用に用意するバッファのサイズが適当過ぎる | |
;; ポインタ変数の分かりやすい表記方法があれば取り込みたい (p_str, *str) | |
;; iconv/libiconv を区別する方法 | |
;; エラーを投げるよりも無理矢理変換する方が良い? | |
;;; Code: | |
(context 'Iconv) | |
; See man 3 iconv. | |
; | |
; size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); | |
; iconv_t iconv_open(const char *tocode, const char *fromcode); | |
; int iconv_close(iconv_t cd); | |
;; NOTE: | |
;; - KaoriYa.net provides "iconv.dll" | |
;; - GnuWin32 provides "libiconv2.dll" | |
(define libiconv-lib | |
(case ostype | |
("Win32" "iconv.dll") ; or "libiconv.dll" "libiconv2.dll" | |
("Cygwin" "cygiconv-2.dll") | |
("Linux" "libc.so.6") ; Ubuntu 9.04 | |
("SunOS" "libc.so.1") ; SunOS 5.10 | |
("BSD" "libiconv.so") ; FreeBSD 8.1 | |
(true "libc.so.6"))) | |
;; @syntax (Iconv:init [<library-name>]) | |
;; @return true (but not meaningful) | |
;; Loadup iconv library functions. | |
;; | |
;; @example | |
;; (Iconv:init) | |
;; (Iconv:init "C:/usr/lib/libiconv.dll") ; specifies library pathname | |
(define (init (libname nil)) | |
(when libname | |
(setq libiconv-lib libname)) | |
(cond | |
((member ostype '("Win32" "Cygwin")) | |
(define iconv (import libiconv-lib "libiconv")) | |
(define iconv_open (import libiconv-lib "libiconv_open")) | |
(define iconv_close (import libiconv-lib "libiconv_close"))) | |
(true | |
(define iconv (import libiconv-lib "iconv")) | |
(define iconv_open (import libiconv-lib "iconv_open")) | |
(define iconv_close (import libiconv-lib "iconv_close")))) | |
true) | |
(define newlisp-encoding | |
(if (primitive? unicode) "UTF-8" "Shift_JIS")) | |
(define (error) | |
(throw-error (apply format (args)))) | |
;; @syntax (unwind-protect <protected-form> <cleanup-form*>) | |
;; @return the value of <protected-form>. | |
;; @location http://www.lispworks.com/documentation/HyperSpec/Body/s_unwind.htm | |
;; evaluates protected-form and guarantees that cleanup-forms are executed | |
;; before unwind-protect exits, whether it terminates normally or is | |
;; aborted by a control transfer of some kind. | |
(letex ((result (sym (uuid)))) | |
(define-macro (unwind-protect ) | |
(local (result) | |
(if (catch (eval (args 0)) 'result) | |
(begin (map eval (1 (args))) result) | |
(begin (map eval (1 (args))) (throw-error (5 result)))))) | |
) | |
(define (call-with-iconv-descriptor proc fromcode tocode) | |
(let ((cd (iconv_open tocode fromcode))) | |
(if (= cd -1) | |
(error "iconv_open: %s" (last (sys-error)))) | |
(unwind-protect | |
(proc cd) | |
(if (= (iconv_close cd) -1) | |
(error "iconv_close: %s" (last (sys-error))))))) | |
(if (= (& (sys-info -1) 0x100) 0x100) ; 64-bit? | |
(define void* "Lu") | |
(define void* "lu")) | |
(define (convert-1 cd inbuf) | |
(iconv cd 0 0 0 0) | |
(letn (;; source buffer | |
(src inbuf) | |
(**src (pack void* (address src))) | |
(src_len (length src)) | |
(*src_len (pack void* src_len)) | |
;; distribute buffer | |
;; FIXME: もうちょっと使い勝手の良いメモリ領域の確保ができるはず | |
(dst (dup "\000\000\000\000" (+ (* 2 src_len) 4))) | |
(**dst (pack void* (address dst))) | |
(dst_len (- (length dst) 1)) | |
(*dst_len (pack void* dst_len)) | |
result) | |
;; Do iconv convert | |
(setf result (iconv cd **src *src_len **dst *dst_len)) | |
(if (= result -1) | |
(error "iconv: %s" (last (sys-error)))) | |
;; NOTE: The converted string may contain null characters. | |
(slice dst 0 (- dst_len (first (unpack void* *dst_len)))))) | |
;; @syntax (Iconv:convert <string> <fromcode> <tocode>) | |
;; @return Returns the converted string <fromcode> to <tocode>. | |
(define (convert str fromcode tocode) | |
"Convert string FROMCODE to TOCODE." | |
(call-with-iconv-descriptor (lambda (cd) | |
(convert-1 cd str)) | |
(or fromcode newlisp-encoding) | |
(or tocode newlisp-encoding))) | |
;; @syntax (Iconv:encode <string> <tocode>) | |
;; @return Returns the converted string internal to <tocode>. | |
(define (encode str tocode) | |
"Convert string internal to TOCODE." | |
(convert str newlisp-encoding tocode)) | |
;; @syntax (Iconv:decode <string> <fromcode>) | |
;; @return Returns the converted string <fromcode> to internal. | |
(define (decode str fromcode) | |
"Convert string FROMCODE to internal." | |
(convert str fromcode newlisp-encoding)) | |
;; Shift_JIS | |
;; EUC-JP | |
;; ISO-2022-JP | |
;; UTF-8 | |
;; ISO-8859-1 | |
;; ISO-8859-15 | |
;; WINDOWS-1252 | |
(or (catch (Iconv:init) 'init-result) | |
(write 2 "WARNING: iconv.lsp initialize error\n")) | |
(context MAIN) | |
;;; EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment