Created
June 6, 2017 06:39
-
-
Save oflow/18d536024de7c5e16670398c9c7e041a to your computer and use it in GitHub Desktop.
tmux-2.5でEast Asian Ambiguous Characterを全角文字の幅で表示するやつと半角ボーダーをまとめたやつ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -uprN tmux-2.5/options-table.c tmux-2.5-patched/options-table.c | |
--- tmux-2.5/options-table.c 2017-05-29 16:08:04.000000000 +0900 | |
+++ tmux-2.5-patched/options-table.c 2017-06-06 13:30:20.390849587 +0900 | |
@@ -129,6 +129,14 @@ const struct options_table_entry options | |
.separator = "," | |
}, | |
+#ifndef NO_USE_UTF8CJK | |
+ { .name = "utf8-cjk", | |
+ .type = OPTIONS_TABLE_FLAG, | |
+ .scope = OPTIONS_TABLE_SERVER, | |
+ .default_num = 1 | |
+ }, | |
+#endif | |
+ | |
{ .name = "assume-paste-time", | |
.type = OPTIONS_TABLE_NUMBER, | |
.scope = OPTIONS_TABLE_SESSION, | |
@@ -298,6 +306,14 @@ const struct options_table_entry options | |
.default_num = 0 | |
}, | |
+#ifndef NO_USE_PANE_BORDER_ASCII | |
+ { .name = "pane-border-ascii", | |
+ .type = OPTIONS_TABLE_FLAG, | |
+ .scope = OPTIONS_TABLE_SESSION, | |
+ .default_num = 0 | |
+ }, | |
+#endif | |
+ | |
{ .name = "prefix", | |
.type = OPTIONS_TABLE_KEY, | |
.scope = OPTIONS_TABLE_SESSION, | |
diff -uprN tmux-2.5/tmux.c tmux-2.5-patched/tmux.c | |
--- tmux-2.5/tmux.c 2017-05-16 22:18:51.000000000 +0900 | |
+++ tmux-2.5-patched/tmux.c 2017-06-06 13:35:34.550881544 +0900 | |
@@ -189,17 +189,26 @@ main(int argc, char **argv) | |
{ | |
char *path, *label, tmp[PATH_MAX]; | |
char *shellcmd = NULL, **var; | |
+#ifndef NO_USE_UTF8CJK | |
+ char *ctype; | |
+#endif | |
const char *s, *shell; | |
int opt, flags, keys; | |
const struct options_table_entry *oe; | |
+#ifdef NO_USE_UTF8CJK | |
if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) { | |
if (setlocale(LC_CTYPE, "") == NULL) | |
+#else | |
+ if ((ctype = setlocale(LC_CTYPE, "")) == NULL) | |
+#endif | |
errx(1, "invalid LC_ALL, LC_CTYPE or LANG"); | |
s = nl_langinfo(CODESET); | |
if (strcasecmp(s, "UTF-8") != 0 && strcasecmp(s, "UTF8") != 0) | |
errx(1, "need UTF-8 locale (LC_CTYPE) but have %s", s); | |
- } | |
+#ifdef NO_USE_UTF8CJK | |
+ } | |
+#endif | |
setlocale(LC_TIME, ""); | |
tzset(); | |
@@ -327,6 +336,16 @@ main(int argc, char **argv) | |
options_set_number(global_w_options, "mode-keys", keys); | |
} | |
+#ifndef NO_USE_UTF8CJK | |
+ if (!strncmp(ctype, "ja", 2) || !strncmp(ctype, "ko", 2) || !strncmp(ctype, "zh", 2)) { | |
+ options_set_number(global_options, "utf8-cjk", 1); | |
+ options_set_number(global_s_options, "pane-border-ascii", 1); | |
+ } else { | |
+ options_set_number(global_options, "utf8-cjk", 0); | |
+ options_set_number(global_s_options, "pane-border-ascii", 0); | |
+ } | |
+#endif | |
+ | |
/* | |
* If socket is specified on the command-line with -S or -L, it is | |
* used. Otherwise, $TMUX is checked and if that fails "default" is | |
diff -uprN tmux-2.5/tmux.h tmux-2.5-patched/tmux.h | |
--- tmux-2.5/tmux.h 2017-05-29 16:12:06.000000000 +0900 | |
+++ tmux-2.5-patched/tmux.h 2017-06-06 13:36:33.795477435 +0900 | |
@@ -60,6 +60,13 @@ struct tmuxproc; | |
#define TMUX_CONF "/etc/tmux.conf" | |
#endif | |
+/* If "pane-border-ascii" is not used, "utf8-cjk" is not used too. */ | |
+#ifdef NO_USE_PANE_BORDER_ASCII | |
+#ifndef NO_USE_UTF8CJK | |
+#define NO_USE_UTF8CJK | |
+#endif | |
+#endif | |
+ | |
/* | |
* Minimum layout cell size, NOT including separator line. The scroll region | |
* cannot be one line in height so this must be at least two. | |
diff -uprN tmux-2.5/tty-acs.c tmux-2.5-patched/tty-acs.c | |
--- tmux-2.5/tty-acs.c 2017-05-29 16:12:06.000000000 +0900 | |
+++ tmux-2.5-patched/tty-acs.c 2017-06-06 13:41:41.002024650 +0900 | |
@@ -64,6 +64,43 @@ static const struct tty_acs_entry tty_ac | |
{ '~', "\302\267" } /* bullet */ | |
}; | |
+#ifndef NO_USE_PANE_BORDER_ASCII | |
+const struct tty_acs_entry tty_acs_table_putty[] = { | |
+ { '+', "\342\206\222" }, | |
+ { ',', "\342\206\220" }, | |
+ { '-', "\342\206\221" }, | |
+ { '.', "\342\206\223" }, | |
+ { '0', "\342\226\256" }, | |
+ { '`', "\342\227\206" }, | |
+ { 'a', "\342\226\222" }, | |
+ { 'f', "\302\260" }, | |
+ { 'g', "\302\261" }, | |
+ { 'h', "\342\226\222" }, | |
+ { 'i', "\342\230\203" }, | |
+ { 'j', "+" }, | |
+ { 'k', "+" }, | |
+ { 'l', "+" }, | |
+ { 'm', "+" }, | |
+ { 'n', "+" }, | |
+ { 'o', "\342\216\272" }, | |
+ { 'p', "\342\216\273" }, | |
+ { 'q', "-" }, | |
+ { 'r', "\342\216\274" }, | |
+ { 's', "\342\216\275" }, | |
+ { 't', "+" }, | |
+ { 'u', "+" }, | |
+ { 'v', "+" }, | |
+ { 'w', "+" }, | |
+ { 'x', "|" }, | |
+ { 'y', "\342\211\244" }, | |
+ { 'z', "\342\211\245" }, | |
+ { '{', "\317\200" }, | |
+ { '|', "\342\211\240" }, | |
+ { '}', "\302\243" }, | |
+ { '~', "*" } | |
+}; | |
+#endif | |
+ | |
static int | |
tty_acs_cmp(const void *key, const void *value) | |
{ | |
@@ -89,7 +126,16 @@ tty_acs_get(struct tty *tty, u_char ch) | |
/* Otherwise look up the UTF-8 translation. */ | |
entry = bsearch(&ch, | |
- tty_acs_table, nitems(tty_acs_table), sizeof tty_acs_table[0], | |
+#ifdef NO_USE_PANE_BORDER_ASCII | |
+ tty_acs_table, | |
+#else | |
+#ifdef NO_USE_UTF8_CJK | |
+ options_get_number(global_s_options, "pane-border-ascii") ? tty_acs_table_putty : tty_acs_table, | |
+#else | |
+ (options_get_number(global_options, "utf8-cjk") || options_get_number(global_s_options, "pane-border-ascii")) ? tty_acs_table_putty : tty_acs_table, | |
+#endif | |
+#endif | |
+ nitems(tty_acs_table), sizeof tty_acs_table[0], | |
tty_acs_cmp); | |
if (entry == NULL) | |
return (NULL); | |
diff -uprN tmux-2.5/utf8.c tmux-2.5-patched/utf8.c | |
--- tmux-2.5/utf8.c 2017-05-16 22:19:28.000000000 +0900 | |
+++ tmux-2.5-patched/utf8.c 2017-06-06 13:46:38.501713750 +0900 | |
@@ -25,6 +25,319 @@ | |
#include "tmux.h" | |
+#ifndef NO_USE_UTF8CJK | |
+/* | |
+ * This is an implementation of wcwidth() and wcswidth() (defined in | |
+ * IEEE Std 1002.1-2001) for Unicode. | |
+ * | |
+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html | |
+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html | |
+ * | |
+ * In fixed-width output devices, Latin characters all occupy a single | |
+ * "cell" position of equal width, whereas ideographic CJK characters | |
+ * occupy two such cells. Interoperability between terminal-line | |
+ * applications and (teletype-style) character terminals using the | |
+ * UTF-8 encoding requires agreement on which character should advance | |
+ * the cursor by how many cell positions. No established formal | |
+ * standards exist at present on which Unicode character shall occupy | |
+ * how many cell positions on character terminals. These routines are | |
+ * a first attempt of defining such behavior based on simple rules | |
+ * applied to data provided by the Unicode Consortium. | |
+ * | |
+ * For some graphical characters, the Unicode standard explicitly | |
+ * defines a character-cell width via the definition of the East Asian | |
+ * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. | |
+ * In all these cases, there is no ambiguity about which width a | |
+ * terminal shall use. For characters in the East Asian Ambiguous (A) | |
+ * class, the width choice depends purely on a preference of backward | |
+ * compatibility with either historic CJK or Western practice. | |
+ * Choosing single-width for these characters is easy to justify as | |
+ * the appropriate long-term solution, as the CJK practice of | |
+ * displaying these characters as double-width comes from historic | |
+ * implementation simplicity (8-bit encoded characters were displayed | |
+ * single-width and 16-bit ones double-width, even for Greek, | |
+ * Cyrillic, etc.) and not any typographic considerations. | |
+ * | |
+ * Much less clear is the choice of width for the Not East Asian | |
+ * (Neutral) class. Existing practice does not dictate a width for any | |
+ * of these characters. It would nevertheless make sense | |
+ * typographically to allocate two character cells to characters such | |
+ * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be | |
+ * represented adequately with a single-width glyph. The following | |
+ * routines at present merely assign a single-cell width to all | |
+ * neutral characters, in the interest of simplicity. This is not | |
+ * entirely satisfactory and should be reconsidered before | |
+ * establishing a formal standard in this area. At the moment, the | |
+ * decision which Not East Asian (Neutral) characters should be | |
+ * represented by double-width glyphs cannot yet be answered by | |
+ * applying a simple rule from the Unicode database content. Setting | |
+ * up a proper standard for the behavior of UTF-8 character terminals | |
+ * will require a careful analysis not only of each Unicode character, | |
+ * but also of each presentation form, something the author of these | |
+ * routines has avoided to do so far. | |
+ * | |
+ * http://www.unicode.org/unicode/reports/tr11/ | |
+ * | |
+ * Markus Kuhn -- 2007-05-26 (Unicode 5.0) | |
+ * | |
+ * Permission to use, copy, modify, and distribute this software | |
+ * for any purpose and without fee is hereby granted. The author | |
+ * disclaims all warranties with regard to this software. | |
+ * | |
+ * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c | |
+ */ | |
+ | |
+// Delete duplicated '#include <wchar.h>' by Z.OOL. <[email protected]> | |
+//#include <wchar.h> | |
+ | |
+struct interval { | |
+ int first; | |
+ int last; | |
+}; | |
+ | |
+/* auxiliary function for binary search in interval table */ | |
+static int bisearch(wchar_t ucs, const struct interval *table, int max) { | |
+ int min = 0; | |
+ int mid; | |
+ | |
+ if (ucs < table[0].first || ucs > table[max].last) | |
+ return 0; | |
+ while (max >= min) { | |
+ mid = (min + max) / 2; | |
+ if (ucs > table[mid].last) | |
+ min = mid + 1; | |
+ else if (ucs < table[mid].first) | |
+ max = mid - 1; | |
+ else | |
+ return 1; | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+ | |
+/* The following two functions define the column width of an ISO 10646 | |
+ * character as follows: | |
+ * | |
+ * - The null character (U+0000) has a column width of 0. | |
+ * | |
+ * - Other C0/C1 control characters and DEL will lead to a return | |
+ * value of -1. | |
+ * | |
+ * - Non-spacing and enclosing combining characters (general | |
+ * category code Mn or Me in the Unicode database) have a | |
+ * column width of 0. | |
+ * | |
+ * - SOFT HYPHEN (U+00AD) has a column width of 1. | |
+ * | |
+ * - Other format characters (general category code Cf in the Unicode | |
+ * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. | |
+ * | |
+ * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) | |
+ * have a column width of 0. | |
+ * | |
+ * - Spacing characters in the East Asian Wide (W) or East Asian | |
+ * Full-width (F) category as defined in Unicode Technical | |
+ * Report #11 have a column width of 2. | |
+ * | |
+ * - All remaining characters (including all printable | |
+ * ISO 8859-1 and WGL4 characters, Unicode control characters, | |
+ * etc.) have a column width of 1. | |
+ * | |
+ * This implementation assumes that wchar_t characters are encoded | |
+ * in ISO 10646. | |
+ */ | |
+ | |
+int mk_wcwidth(wchar_t ucs) | |
+{ | |
+ /* sorted list of non-overlapping intervals of non-spacing characters */ | |
+ /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ | |
+ static const struct interval combining[] = { | |
+ { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, | |
+ { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, | |
+ { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, | |
+ { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, | |
+ { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, | |
+ { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, | |
+ { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, | |
+ { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, | |
+ { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, | |
+ { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, | |
+ { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, | |
+ { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, | |
+ { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, | |
+ { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, | |
+ { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, | |
+ { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, | |
+ { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, | |
+ { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, | |
+ { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, | |
+ { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, | |
+ { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, | |
+ { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, | |
+ { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, | |
+ { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, | |
+ { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, | |
+ { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, | |
+ { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, | |
+ { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, | |
+ { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, | |
+ { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, | |
+ { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, | |
+ { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, | |
+ { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, | |
+ { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, | |
+ { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, | |
+ { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, | |
+ { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, | |
+ { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, | |
+ { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, | |
+ { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, | |
+ { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, | |
+ { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, | |
+ { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, | |
+ { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, | |
+ { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 }, | |
+ { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, | |
+ { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F }, | |
+ { 0xE0100, 0xE01EF } | |
+ }; | |
+ | |
+ /* test for 8-bit control characters */ | |
+ if (ucs == 0) | |
+ return 0; | |
+ if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) | |
+ return -1; | |
+ | |
+ /* binary search in table of non-spacing characters */ | |
+ if (bisearch(ucs, combining, | |
+ sizeof(combining) / sizeof(struct interval) - 1)) | |
+ return 0; | |
+ | |
+ /* if we arrive here, ucs is not a combining or C0/C1 control character */ | |
+ | |
+ return 1 + | |
+ (ucs >= 0x1100 && | |
+ (ucs <= 0x115f || /* Hangul Jamo init. consonants */ | |
+ ucs == 0x2329 || ucs == 0x232a || | |
+ (ucs >= 0x2e80 && ucs <= 0xa4cf && | |
+ ucs != 0x303f) || /* CJK ... Yi */ | |
+ (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ | |
+ (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ | |
+ (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */ | |
+ (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ | |
+ (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ | |
+ (ucs >= 0xffe0 && ucs <= 0xffe6) || | |
+ (ucs >= 0x20000 && ucs <= 0x2fffd) || | |
+ (ucs >= 0x30000 && ucs <= 0x3fffd))); | |
+} | |
+ | |
+ | |
+int mk_wcswidth(const wchar_t *pwcs, size_t n) | |
+{ | |
+ int w, width = 0; | |
+ | |
+ for (;*pwcs && n-- > 0; pwcs++) | |
+ if ((w = mk_wcwidth(*pwcs)) < 0) | |
+ return -1; | |
+ else | |
+ width += w; | |
+ | |
+ return width; | |
+} | |
+ | |
+ | |
+/* | |
+ * The following functions are the same as mk_wcwidth() and | |
+ * mk_wcswidth(), except that spacing characters in the East Asian | |
+ * Ambiguous (A) category as defined in Unicode Technical Report #11 | |
+ * have a column width of 2. This variant might be useful for users of | |
+ * CJK legacy encodings who want to migrate to UCS without changing | |
+ * the traditional terminal character-width behaviour. It is not | |
+ * otherwise recommended for general use. | |
+ */ | |
+int mk_wcwidth_cjk(wchar_t ucs) | |
+{ | |
+ /* sorted list of non-overlapping intervals of East Asian Ambiguous | |
+ * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ | |
+ static const struct interval ambiguous[] = { | |
+ { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 }, | |
+ { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 }, | |
+ { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 }, | |
+ { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 }, | |
+ { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED }, | |
+ { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA }, | |
+ { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 }, | |
+ { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B }, | |
+ { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 }, | |
+ { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 }, | |
+ { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 }, | |
+ { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE }, | |
+ { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 }, | |
+ { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA }, | |
+ { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 }, | |
+ { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB }, | |
+ { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB }, | |
+ { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 }, | |
+ { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 }, | |
+ { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 }, | |
+ { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 }, | |
+ { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 }, | |
+ { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 }, | |
+ { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 }, | |
+ { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC }, | |
+ { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 }, | |
+ { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 }, | |
+ { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 }, | |
+ { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 }, | |
+ { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 }, | |
+ { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 }, | |
+ { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B }, | |
+ { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 }, | |
+ { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 }, | |
+ { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E }, | |
+ { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 }, | |
+ { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 }, | |
+ { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F }, | |
+ { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 }, | |
+ { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF }, | |
+ { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B }, | |
+ { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 }, | |
+ { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 }, | |
+ { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 }, | |
+ { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 }, | |
+ { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 }, | |
+ { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 }, | |
+ { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 }, | |
+ { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 }, | |
+ { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F }, | |
+ { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF }, | |
+ { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD } | |
+ }; | |
+ | |
+ /* binary search in table of non-spacing characters */ | |
+ if (bisearch(ucs, ambiguous, | |
+ sizeof(ambiguous) / sizeof(struct interval) - 1)) | |
+ return 2; | |
+ | |
+ return mk_wcwidth(ucs); | |
+} | |
+ | |
+ | |
+int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n) | |
+{ | |
+ int w, width = 0; | |
+ | |
+ for (;*pwcs && n-- > 0; pwcs++) | |
+ if ((w = mk_wcwidth_cjk(*pwcs)) < 0) | |
+ return -1; | |
+ else | |
+ width += w; | |
+ | |
+ return width; | |
+} | |
+#endif | |
+ | |
static int utf8_width(wchar_t); | |
/* Set a single character. */ | |
@@ -109,11 +422,23 @@ utf8_width(wchar_t wc) | |
{ | |
int width; | |
+#ifndef NO_USE_UTF8CJK | |
+ if (options_get_number(global_options, "utf8-cjk")) { | |
+ width = mk_wcwidth_cjk(wc); | |
+ } else { | |
+#ifdef HAVE_UTF8PROC | |
+ width = utf8proc_wcwidth(wc); | |
+#else | |
+ width = wcwidth(wc); | |
+#endif | |
+ } | |
+#else | |
#ifdef HAVE_UTF8PROC | |
width = utf8proc_wcwidth(wc); | |
#else | |
width = wcwidth(wc); | |
#endif | |
+#endif | |
if (width < 0 || width > 0xff) { | |
log_debug("Unicode %04lx, wcwidth() %d", (long)wc, width); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment