Skip to content

Instantly share code, notes, and snippets.

@kosh04
Created November 25, 2009 13:22
Show Gist options
  • Save kosh04/242697 to your computer and use it in GitHub Desktop.
Save kosh04/242697 to your computer and use it in GitHub Desktop.
iconv library for newLISP
;;; -*- mode:newlisp; coding:utf-8 -*-
;; @module iconv.lsp
;; @description Yet Another Iconv Library
;; @version 0.5 初版
;; @version 0.6 Windows(DLL)でも使えるように
;; @version 0.7 関数を増やした
;; @version 0.8 変換後のNULL文字に対応したつもり
;; @version 0.8b newlisp_sjisでのバッファあふれ修正
;; @version 0.8c Rename iconv-handler -> call-with-iconv-descriptor
;; @version 0.8d a few modified.
;; @version 0.9 SunOS 5.10 sparc にて動作テスト。
;; ロード時にIconv:initを呼び出すように変更
;; @version 0.9b Tested FreeBSD 8.1
;; @version 0.9c Tested CYGWIN_NT-5.1
;; @author KOBAYASHI Shigeru <shigeru.kb[at]gmail.com>, 2009-2011
;; @location https://raw.github.com/gist/242697
;; @example
;; (load "iconv.lsp")
;; ;(Iconv:init)
;;
;; $ echo -n 'おはよう、朝だよ!' | iconv -t EUC-JP > euc.txt
;; (write-file "euc.txt" (Iconv:encode "おはよう、朝だよ!" "EUC-JP"))
;;
;; $ cat euc.txt | iconv -f EUC-JP
;; (Iconv:decode (read-file "euc.txt") "EUC-JP")
;; => "おはよう、朝だよ!"
;;
;; (let ((str "\xa3\xb1\xa1\xdc\xa3\xb1\xa1\xe1\xa3\xb2"))
;; (Iconv:decode str "EUC-JP"))
;; => "1+1=2"
;;
;; (define (my-unicode str)
;; (Iconv:convert str "UTF-8" "UTF-32LE"))
;; (my-unicode "new") => "n\000\000\000e\000\000\000w\000\000\000"
;;
;; (define (my-utf8 str)
;; (Iconv:convert str "UTF-32LE" "UTF-8"))
;; (my-utf8 (unicode "new")) => "new\000"
;; (my-utf8 (my-unicode "new")) => "new"
;; @KnownBugs
;; 端末以外から利用すると正しく表示されないかもしれない
;; @TODO
;; (! "iconv --list") list all known coded character sets
;; メモリ不足を避けるために分割して変換する関数も欲しい
;; 変換用に用意するバッファのサイズが適当過ぎる
;; ポインタ変数の分かりやすい表記方法があれば取り込みたい (p_str, *str)
;; iconv/libiconv を区別する方法
;; エラーを投げるよりも無理矢理変換する方が良い?
;;; Code:
(context 'Iconv)
; See man 3 iconv.
;
; size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
; iconv_t iconv_open(const char *tocode, const char *fromcode);
; int iconv_close(iconv_t cd);
;; NOTE:
;; - KaoriYa.net provides "iconv.dll"
;; - GnuWin32 provides "libiconv2.dll"
(define libiconv-lib
(case ostype
("Win32" "iconv.dll") ; or "libiconv.dll" "libiconv2.dll"
("Cygwin" "cygiconv-2.dll")
("Linux" "libc.so.6") ; Ubuntu 9.04
("SunOS" "libc.so.1") ; SunOS 5.10
("BSD" "libiconv.so") ; FreeBSD 8.1
(true "libc.so.6")))
;; @syntax (Iconv:init [<library-name>])
;; @return true (but not meaningful)
;; Loadup iconv library functions.
;;
;; @example
;; (Iconv:init)
;; (Iconv:init "C:/usr/lib/libiconv.dll") ; specifies library pathname
(define (init (libname nil))
(when libname
(setq libiconv-lib libname))
(cond
((member ostype '("Win32" "Cygwin"))
(define iconv (import libiconv-lib "libiconv"))
(define iconv_open (import libiconv-lib "libiconv_open"))
(define iconv_close (import libiconv-lib "libiconv_close")))
(true
(define iconv (import libiconv-lib "iconv"))
(define iconv_open (import libiconv-lib "iconv_open"))
(define iconv_close (import libiconv-lib "iconv_close"))))
true)
(define newlisp-encoding
(if (primitive? unicode) "UTF-8" "Shift_JIS"))
(define (error)
(throw-error (apply format (args))))
;; @syntax (unwind-protect <protected-form> <cleanup-form*>)
;; @return the value of <protected-form>.
;; @location http://www.lispworks.com/documentation/HyperSpec/Body/s_unwind.htm
;; evaluates protected-form and guarantees that cleanup-forms are executed
;; before unwind-protect exits, whether it terminates normally or is
;; aborted by a control transfer of some kind.
(letex ((result (sym (uuid))))
(define-macro (unwind-protect )
(local (result)
(if (catch (eval (args 0)) 'result)
(begin (map eval (1 (args))) result)
(begin (map eval (1 (args))) (throw-error (5 result))))))
)
(define (call-with-iconv-descriptor proc fromcode tocode)
(let ((cd (iconv_open tocode fromcode)))
(if (= cd -1)
(error "iconv_open: %s" (last (sys-error))))
(unwind-protect
(proc cd)
(if (= (iconv_close cd) -1)
(error "iconv_close: %s" (last (sys-error)))))))
(if (= (& (sys-info -1) 0x100) 0x100) ; 64-bit?
(define void* "Lu")
(define void* "lu"))
(define (convert-1 cd inbuf)
(iconv cd 0 0 0 0)
(letn (;; source buffer
(src inbuf)
(**src (pack void* (address src)))
(src_len (length src))
(*src_len (pack void* src_len))
;; distribute buffer
;; FIXME: もうちょっと使い勝手の良いメモリ領域の確保ができるはず
(dst (dup "\000\000\000\000" (+ (* 2 src_len) 4)))
(**dst (pack void* (address dst)))
(dst_len (- (length dst) 1))
(*dst_len (pack void* dst_len))
result)
;; Do iconv convert
(setf result (iconv cd **src *src_len **dst *dst_len))
(if (= result -1)
(error "iconv: %s" (last (sys-error))))
;; NOTE: The converted string may contain null characters.
(slice dst 0 (- dst_len (first (unpack void* *dst_len))))))
;; @syntax (Iconv:convert <string> <fromcode> <tocode>)
;; @return Returns the converted string <fromcode> to <tocode>.
(define (convert str fromcode tocode)
"Convert string FROMCODE to TOCODE."
(call-with-iconv-descriptor (lambda (cd)
(convert-1 cd str))
(or fromcode newlisp-encoding)
(or tocode newlisp-encoding)))
;; @syntax (Iconv:encode <string> <tocode>)
;; @return Returns the converted string internal to <tocode>.
(define (encode str tocode)
"Convert string internal to TOCODE."
(convert str newlisp-encoding tocode))
;; @syntax (Iconv:decode <string> <fromcode>)
;; @return Returns the converted string <fromcode> to internal.
(define (decode str fromcode)
"Convert string FROMCODE to internal."
(convert str fromcode newlisp-encoding))
;; Shift_JIS
;; EUC-JP
;; ISO-2022-JP
;; UTF-8
;; ISO-8859-1
;; ISO-8859-15
;; WINDOWS-1252
(or (catch (Iconv:init) 'init-result)
(write 2 "WARNING: iconv.lsp initialize error\n"))
(context MAIN)
;;; EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment