deton · March 26, 2022 03:57
diff --git a/lynx-jajoinspaces.patch b/lynx-jajoinspaces.patch
 diff --git a/src/GridText.c b/src/GridText.c
 index 04e9a4a..d9a1665 100644
 --- a/src/GridText.c
 +++ b/src/GridText.c
 @@ -453,7 +453,11 @@ struct _HText {
     HTList *hidden_links;	/* Content-less links ... */
     int hiddenlinkflag;		/*  ... and how to treat them */
     BOOL no_cache;		/* Always refresh? */
 +#ifdef EXP_JAPANESE_SPACES
 +    char LastChars[7];		/* utf-8 buffer */
 +#else
     char LastChar;		/* For absorbing white space */
 +#endif
 
 /* For Internal use: */
     HTStyle *style;		/* Current style */
 @@ -1134,7 +1138,11 @@ HText *HText_new(HTParentAnchor *anchor)
 				 anchor->post_data)
 				? YES
 				: NO);
 +#ifdef EXP_JAPANESE_SPACES
 +    memset(self->LastChars, 0, sizeof(self->LastChars));
 +#else
     self->LastChar = '\0';
 +#endif
 
 #ifndef USE_PRETTYSRC
     if (HTOutputFormat == WWW_SOURCE)
 @@ -2867,7 +2875,7 @@ static void split_line(HText *text, unsigned split)
 #ifdef EXP_WCWIDTH_SUPPORT
     utfxtracells_on_this_line = 0;
 #endif
 -    text->LastChar = ' ';
 +    HText_setLastChar(text, ' ');
 
 #ifdef DEBUG_APPCH
     CTRACE((tfp, "GridText: split_line(%p,%d) called\n", text, split));
 @@ -4648,7 +4656,20 @@ void HText_setLastChar(HText *text, int ch)
     if (!text)
 	return;
 
 +#ifdef EXP_JAPANESE_SPACES
 +    if (IS_UTF_EXTRA(ch) && IS_UTF_FIRST(text->LastChars[0])) {
 +	int i;
 +	for (i = 1; text->LastChars[i] != '\0' && i < sizeof(text->LastChars) - 1; i++)
 +	    ;
 +	text->LastChars[i] = (char) ch;
 +	text->LastChars[i + 1] = '\0';
 +	return;
 +    }
 +    memset(text->LastChars, 0, sizeof(text->LastChars));
 +    text->LastChars[0] = (char) ch;
 +#else
     text->LastChar = (char) ch;
 +#endif
 }
 
 /*	Get LastChar element in the text object.
 @@ -4659,8 +4680,37 @@ char HText_getLastChar(HText *text)
     if (!text)
 	return ('\0');
 
 +#ifdef EXP_JAPANESE_SPACES
 +    if (IS_UTF_FIRST(text->LastChars[0])) {
 +	int i;
 +	for (i = 1; text->LastChars[i] != '\0' && i < sizeof(text->LastChars); i++)
 +	    ;
 +	return ((char) text->LastChars[i - 1]);
 +    }
 +    return ((char) text->LastChars[0]);
 +#else
     return ((char) text->LastChar);
 +#endif
 +}
 +
 +#ifdef EXP_JAPANESE_SPACES
 +BOOL HText_checkLastChar_needSpaceOnJoinLines(HText *text)
 +{
 +    if (!text)
 +	return YES;
 +
 +    if (IS_UTF_FIRST(text->LastChars[0]) && isUTF8CJChar(text->LastChars))
 +	return NO;
 +    if ((HTCJK == CHINESE || HTCJK == JAPANESE) && is8bits(text->LastChars[0])) {
 +	/* TODO: support 2nd byte of some SJIS kanji (!is8bits && IS_SJIS_LO) */
 +	return NO;
 +    }
 +    if (text->LastChars[0] != ' ')
 +	return YES;
 +    return NO;
 }
 +#endif
 +
 
 /*		Simple table handling - private
  *		-------------------------------
 @@ -5204,7 +5254,7 @@ static void add_link_number(HText *text, TextAnchor *a, int save_position)
 	&& (text->source ? !psrcview_no_anchor_numbering : 1)
 #endif
 	&& links_are_numbered()) {
 -	char saved_lastchar = text->LastChar;
 +	char saved_lastchar = HText_getLastChar(text);
 	int saved_linenum = text->Lines;
 	HTAnchor *link_dest;
 	char *link_text;
 @@ -5222,7 +5272,7 @@ static void add_link_number(HText *text, TextAnchor *a, int save_position)
 	    HText_appendText(text, marker);
 	}
 	if (saved_linenum && text->Lines && saved_lastchar != ' ')
 -	    text->LastChar = ']';	/* if marker not after space caused split */
 +	    HText_setLastChar(text, ']');	/* if marker not after space caused split */
 	if (save_position) {
 	    a->line_num = text->Lines;
 	    a->line_pos = (short) text->last_line->size;
 @@ -14973,6 +15023,14 @@ static void permit_split_after_CJchar(HText *text, const char *s, unsigned short
 {
     /* Can split after almost any CJ char (Korean uses space) */
     /* TODO: UAX#14 Unicode Line Breaking Algorithm (use ICU4C?) */
 +    if (isUTF8CJChar(s))
 +	text->permissible_split = pos;
 +}
 +#endif /* EXP_WCWIDTH_SUPPORT */
 +
 +#if defined(EXP_WCWIDTH_SUPPORT) || defined(EXP_JAPANESE_SPACES)
 +BOOL isUTF8CJChar(const char *s)
 +{
     UCode_t u = UCGetUniFromUtf8String(&s);
     if (u >= 0x4e00 && u <= 0x9fff || /* CJK Unified Ideographs */
 	u >= 0x3000 && u <= 0x30ff || /* CJK Symbols and Punctuation, Hiragana, Katakana */
 @@ -14981,6 +15039,7 @@ static void permit_split_after_CJchar(HText *text, const char *s, unsigned short
 	u >= 0x3400 && u <= 0x4dbf || /* CJK Unified Ideographs Extension A */
 	u >= 0xf900 && u <= 0xfaff || /* CJK Compatibility Ideographs */
 	u >= 0x20000 && u <= 0x3ffff) /* {Supplementary,Tertiary} Ideographic Plane */
 -	text->permissible_split = pos;
 +	return YES;
 +    return NO;
 }
 -#endif
 +#endif /* EXP_WCWIDTH_SUPPORT || EXP_JAPANESE_SPACES */
 diff --git a/src/GridText.h b/src/GridText.h
 index 911de26..40b17b1 100644
 --- a/src/GridText.h
 +++ b/src/GridText.h
 @@ -93,6 +93,9 @@ US-ASCII control characters <32 which are not defined in Unicode standard
 
     extern void HText_setLastChar(HText *text, int ch);
     extern char HText_getLastChar(HText *text);
 +#ifdef EXP_JAPANESE_SPACES
 +    extern BOOL HText_checkLastChar_needSpaceOnJoinLines(HText *text);
 +#endif
 
     extern int HText_sourceAnchors(HText *text);
     extern void HText_setStale(HText *text);
 @@ -289,6 +292,10 @@ US-ASCII control characters <32 which are not defined in Unicode standard
     extern HTkcode HText_getSpecifiedKcode(HText *text);
     extern void HText_updateSpecifiedKcode(HText *text, HTkcode kcode);
 
 +#if defined(EXP_WCWIDTH_SUPPORT) || defined(EXP_JAPANESE_SPACES)
 +    extern BOOL isUTF8CJChar(const char *s);
 +#endif
 +
 #ifdef __cplusplus
 }
 #endif
 diff --git a/src/HTML.c b/src/HTML.c
 index a012466..cf2e18b 100644
 --- a/src/HTML.c
 +++ b/src/HTML.c
 @@ -275,18 +275,6 @@ void LYShowBadHTML(const char *message)
  *			A C T I O N	R O U T I N E S
  */
 
 -/* FIXME:  this should be amended to do the substitution only when not in a
 - * multibyte stream.
 - */
 -#ifdef EXP_JAPANESE_SPACES
 -#define FIX_JAPANESE_SPACES \
 -	(HTCJK == CHINESE || HTCJK == JAPANESE || HTCJK == TAIPEI)
 -	/* don't replace '\n' with ' ' if Chinese or Japanese - HN
 -	 */
 -#else
 -#define FIX_JAPANESE_SPACES 0
 -#endif
 -
 /*	Character handling
  *	------------------
  */
 @@ -333,12 +321,25 @@ void HTML_put_character(HTStructured * me, int c)
 	    return;
 	if (c != '\n' && c != '\t' && c != '\r') {
 	    HTChunkPutc(&me->title, uc);
 -	} else if (FIX_JAPANESE_SPACES) {
 -	    if (c == '\t') {
 -		HTChunkPutc(&me->title, ' ');
 -	    } else {
 +#ifdef EXP_JAPANESE_SPACES
 +	} else if (c == '\t') {
 +	    HTChunkPutc(&me->title, ' ');
 +	/* don't replace '\n' with ' ' if Chinese or Japanese - HN
 +	 */
 +	} else if (me->title.size > 0 && is8bits(me->title.data[me->title.size - 1])) {
 +	    if (HTCJK == CHINESE || HTCJK == JAPANESE) {
 +		/* TODO: support 2nd byte of SJIS (!is8bits && IS_SJIS_LO) */
 		return;
 +	    } else if (IS_UTF8_TTY) {
 +		/* find start position of UTF-8 sequence */
 +		int i = me->title.size - 1;
 +		while (i > 0 && (me->title.data[i] & 0xc0) == 0x80) /* UTF_EXTRA */
 +		    i--;
 +		if (isUTF8CJChar(&(me->title.data[i])))
 +		    return;
 	    }
 +	    HTChunkPutc(&me->title, ' ');
 +#endif
 	} else {
 	    HTChunkPutc(&me->title, ' ');
 	}
 @@ -453,15 +454,17 @@ void HTML_put_character(HTStructured * me, int c)
 		UPDATE_STYLE;
 	    }
 	    if (c == '\n') {
 -		if (!FIX_JAPANESE_SPACES) {
 -		    if (me->in_word) {
 -			if (HText_getLastChar(me->text) != ' ') {
 -			    me->inP = TRUE;
 -			    me->inLABEL = FALSE;
 -			    HText_appendCharacter(me->text, ' ');
 -			}
 -			me->in_word = NO;
 +		if (me->in_word) {
 +#ifdef EXP_JAPANESE_SPACES
 +		    if (HText_checkLastChar_needSpaceOnJoinLines(me->text)) {
 +#else
 +		    if (HText_getLastChar(me->text) != ' ') {
 +#endif
 +			me->inP = TRUE;
 +			me->inLABEL = FALSE;
 +			HText_appendCharacter(me->text, ' ');
 		    }
 +		    me->in_word = NO;
 		}
 
 	    } else if (c == ' ' || c == '\t') {
 @@ -607,12 +610,14 @@ void HTML_put_string(HTStructured * me, const char *s)
 		    UPDATE_STYLE;
 		}
 		if (c == '\n') {
 -		    if (!FIX_JAPANESE_SPACES) {
 -			if (me->in_word) {
 -			    if (HText_getLastChar(me->text) != ' ')
 -				HText_appendCharacter(me->text, ' ');
 -			    me->in_word = NO;
 -			}
 +		    if (me->in_word) {
 +#ifdef EXP_JAPANESE_SPACES
 +			if (HText_checkLastChar_needSpaceOnJoinLines(me->text))
 +#else
 +			if (HText_getLastChar(me->text) != ' ')
 +#endif
 +			    HText_appendCharacter(me->text, ' ');
 +			me->in_word = NO;
 		    }
 
 		} else if (c == ' ' || c == '\t') {
diff --git a/testjajoinsp_eucjp.html b/testjajoinsp_eucjp.html
 <html>
 <head>
 <meta charset="EUC-JP">
 <title>
 lorem
 ipsum
 漢
 あ
 カ
 ！
 「
 dolor

 sit

 空

 行
 </title>
 </head>
 <body>
 <p>
 lorem
 ipsum
 漢
 あ
 カ
 ！
 「
 dolor

 sit

 空

 行
 </p>

 <p>
 Expected result:<br>
 lorem ipsum 漢あカ！「dolor sit 空行
 </p>

 <h2>span</h2>
 <p>
 <span>lorem</span>
 <span>ipsum</span>
 <span>漢</span>
 <span>あ</span>
 <span>カ</span>
 <span>！</span>
 <span>「</span>
 <span>dolor</span>
 <span></span>
 <span>sit</span>
 <span></span>
 <span>空</span>
 <span></span>
 <span>行</span>
 </p>

 <p>
 Expected result:<br>
 lorem ipsum 漢あカ！「dolor sit 空行
 </p>
 </body>
 </html>
diff --git a/testjajoinsp_iso2022jp.html b/testjajoinsp_iso2022jp.html
 <html>
 <head>
 <meta charset="iso-2022-jp">
 <title>
 lorem
 ipsum
 $B4A(B
 $B$"(B
 $B%+(B
 $B!*(B
 $B!V(B
 dolor

 sit

 $B6u(B

 $B9T(B
 </title>
 </head>
 <body>
 <p>
 lorem
 ipsum
 $B4A(B
 $B$"(B
 $B%+(B
 $B!*(B
 $B!V(B
 dolor

 sit

 $B6u(B

 $B9T(B
 </p>

 <p>
 Expected result:<br>
 lorem ipsum $B4A$"%+!*!V(Bdolor sit $B6u9T(B
 </p>

 <h2>span</h2>
 <p>
 <span>lorem</span>
 <span>ipsum</span>
 <span>$B4A(B</span>
 <span>$B$"(B</span>
 <span>$B%+(B</span>
 <span>$B!*(B</span>
 <span>$B!V(B</span>
 <span>dolor</span>
 <span></span>
 <span>sit</span>
 <span></span>
 <span>$B6u(B</span>
 <span></span>
 <span>$B9T(B</span>
 </p>

 <p>
 Expected result:<br>
 lorem ipsum $B4A$"%+!*!V(Bdolor sit $B6u9T(B
 </p>
 </body>
 </html>
diff --git a/testjajoinsp_shiftjis.html b/testjajoinsp_shiftjis.html
 <html>
 <head>
 <meta charset="Shift_JIS">
 <title>
 lorem
 ipsum
 漢
 あ
 カ
 ！
 「
 ｷ
 dolor

 sit

 空

 行
 </title>
 </head>
 <body>
 <p>
 lorem
 ipsum
 漢
 あ
 カ
 ！
 「
 ｷ
 dolor

 sit

 空

 行
 </p>

 <p>
 Expected result:<br>
 lorem ipsum 漢あカ！「ｷdolor sit 空行
 </p>

 <h2>span</h2>
 <p>
 <span>lorem</span>
 <span>ipsum</span>
 <span>漢</span>
 <span>あ</span>
 <span>カ</span>
 <span>！</span>
 <span>「</span>
 <span>ｷ</span>
 <span>dolor</span>
 <span></span>
 <span>sit</span>
 <span></span>
 <span>空</span>
 <span></span>
 <span>行</span>
 </p>

 <p>
 Expected result:<br>
 lorem ipsum 漢あカ！「ｷdolor sit 空行
 </p>
 </body>
 </html>
diff --git a/testjajoinsp_utf8.html b/testjajoinsp_utf8.html
 <html>
 <head>
 <meta charset="utf-8">
 <title>
 lorem
 ipsum
 漢
 あ
 カ
 ！
 「
 㓅
 﨑
 𠀋
 한
 空

 ｷ
 dolor

 weiß
 sit
 </title>
 </head>
 <body>
 <p>
 lorem
 ipsum
 漢
 あ
 カ
 ！
 「
 㓅
 﨑
 𠀋
 한
 空

 ｷ
 dolor

 weiß
 sit
 </p>

 <p>
 Expected result:<br>
 lorem ipsum 漢あカ！「㓅﨑𠀋한 空ｷdolor weiß sit
 </p>

 <h2>span</h2>
 <p>
 <span>lorem</span>
 <span>ipsum</span>
 <span>漢</span>
 <span>あ</span>
 <span>カ</span>
 <span>！</span>
 <span>「</span>
 <span>㓅</span>
 <span>﨑</span>
 <span>𠀋</span>
 <span>한</span>
 <span>空</span>
 <span></span>
 <span>ｷ</span>
 <span>dolor</span>
 <span></span>
 <span>weiß</span>
 <span>sit</span>
 </p>

 <p>
 Expected result:<br>
 lorem ipsum 漢あカ！「㓅﨑𠀋한 空ｷdolor weiß sit
 </p>
 </body>
 </html>
	diff --git a/src/GridText.c b/src/GridText.c
	index 04e9a4a..d9a1665 100644
	--- a/src/GridText.c
	+++ b/src/GridText.c
	@@ -453,7 +453,11 @@ struct _HText {
	HTList hidden_links; / Content-less links ... */
	int hiddenlinkflag; /* ... and how to treat them */
	BOOL no_cache; /* Always refresh? */
	+#ifdef EXP_JAPANESE_SPACES
	+ char LastChars[7]; /* utf-8 buffer */
	+#else
	char LastChar; /* For absorbing white space */
	+#endif

	/* For Internal use: */
	HTStyle style; / Current style */
	@@ -1134,7 +1138,11 @@ HText HText_new(HTParentAnchor anchor)
	anchor->post_data)
	? YES
	: NO);
	+#ifdef EXP_JAPANESE_SPACES
	+ memset(self->LastChars, 0, sizeof(self->LastChars));
	+#else
	self->LastChar = '\0';
	+#endif

	#ifndef USE_PRETTYSRC
	if (HTOutputFormat == WWW_SOURCE)
	@@ -2867,7 +2875,7 @@ static void split_line(HText *text, unsigned split)
	#ifdef EXP_WCWIDTH_SUPPORT
	utfxtracells_on_this_line = 0;
	#endif
	- text->LastChar = ' ';
	+ HText_setLastChar(text, ' ');

	#ifdef DEBUG_APPCH
	CTRACE((tfp, "GridText: split_line(%p,%d) called\n", text, split));
	@@ -4648,7 +4656,20 @@ void HText_setLastChar(HText *text, int ch)
	if (!text)
	return;

	+#ifdef EXP_JAPANESE_SPACES
	+ if (IS_UTF_EXTRA(ch) && IS_UTF_FIRST(text->LastChars[0])) {
	+ int i;
	+ for (i = 1; text->LastChars[i] != '\0' && i < sizeof(text->LastChars) - 1; i++)
	+ ;
	+ text->LastChars[i] = (char) ch;
	+ text->LastChars[i + 1] = '\0';
	+ return;
	+ }
	+ memset(text->LastChars, 0, sizeof(text->LastChars));
	+ text->LastChars[0] = (char) ch;
	+#else
	text->LastChar = (char) ch;
	+#endif
	}

	/* Get LastChar element in the text object.
	@@ -4659,8 +4680,37 @@ char HText_getLastChar(HText *text)
	if (!text)
	return ('\0');

	+#ifdef EXP_JAPANESE_SPACES
	+ if (IS_UTF_FIRST(text->LastChars[0])) {
	+ int i;
	+ for (i = 1; text->LastChars[i] != '\0' && i < sizeof(text->LastChars); i++)
	+ ;
	+ return ((char) text->LastChars[i - 1]);
	+ }
	+ return ((char) text->LastChars[0]);
	+#else
	return ((char) text->LastChar);
	+#endif
	+}
	+
	+#ifdef EXP_JAPANESE_SPACES
	+BOOL HText_checkLastChar_needSpaceOnJoinLines(HText *text)
	+{
	+ if (!text)
	+ return YES;
	+
	+ if (IS_UTF_FIRST(text->LastChars[0]) && isUTF8CJChar(text->LastChars))
	+ return NO;
	+ if ((HTCJK == CHINESE \|\| HTCJK == JAPANESE) && is8bits(text->LastChars[0])) {
	+ /* TODO: support 2nd byte of some SJIS kanji (!is8bits && IS_SJIS_LO) */
	+ return NO;
	+ }
	+ if (text->LastChars[0] != ' ')
	+ return YES;
	+ return NO;
	}
	+#endif
	+

	/* Simple table handling - private
	* -------------------------------
	@@ -5204,7 +5254,7 @@ static void add_link_number(HText text, TextAnchor a, int save_position)
	&& (text->source ? !psrcview_no_anchor_numbering : 1)
	#endif
	&& links_are_numbered()) {
	- char saved_lastchar = text->LastChar;
	+ char saved_lastchar = HText_getLastChar(text);
	int saved_linenum = text->Lines;
	HTAnchor *link_dest;
	char *link_text;
	@@ -5222,7 +5272,7 @@ static void add_link_number(HText text, TextAnchor a, int save_position)
	HText_appendText(text, marker);
	}
	if (saved_linenum && text->Lines && saved_lastchar != ' ')
	- text->LastChar = ']'; /* if marker not after space caused split */
	+ HText_setLastChar(text, ']'); /* if marker not after space caused split */
	if (save_position) {
	a->line_num = text->Lines;
	a->line_pos = (short) text->last_line->size;
	@@ -14973,6 +15023,14 @@ static void permit_split_after_CJchar(HText text, const char s, unsigned short
	{
	/* Can split after almost any CJ char (Korean uses space) */
	/* TODO: UAX#14 Unicode Line Breaking Algorithm (use ICU4C?) */
	+ if (isUTF8CJChar(s))
	+ text->permissible_split = pos;
	+}
	+#endif /* EXP_WCWIDTH_SUPPORT */
	+
	+#if defined(EXP_WCWIDTH_SUPPORT) \|\| defined(EXP_JAPANESE_SPACES)
	+BOOL isUTF8CJChar(const char *s)
	+{
	UCode_t u = UCGetUniFromUtf8String(&s);
	if (u >= 0x4e00 && u <= 0x9fff \|\| /* CJK Unified Ideographs */
	u >= 0x3000 && u <= 0x30ff \|\| /* CJK Symbols and Punctuation, Hiragana, Katakana */
	@@ -14981,6 +15039,7 @@ static void permit_split_after_CJchar(HText text, const char s, unsigned short
	u >= 0x3400 && u <= 0x4dbf \|\| /* CJK Unified Ideographs Extension A */
	u >= 0xf900 && u <= 0xfaff \|\| /* CJK Compatibility Ideographs */
	u >= 0x20000 && u <= 0x3ffff) /* {Supplementary,Tertiary} Ideographic Plane */
	- text->permissible_split = pos;
	+ return YES;
	+ return NO;
	}
	-#endif
	+#endif /* EXP_WCWIDTH_SUPPORT \|\| EXP_JAPANESE_SPACES */
	diff --git a/src/GridText.h b/src/GridText.h
	index 911de26..40b17b1 100644
	--- a/src/GridText.h
	+++ b/src/GridText.h
	@@ -93,6 +93,9 @@ US-ASCII control characters <32 which are not defined in Unicode standard

	extern void HText_setLastChar(HText *text, int ch);
	extern char HText_getLastChar(HText *text);
	+#ifdef EXP_JAPANESE_SPACES
	+ extern BOOL HText_checkLastChar_needSpaceOnJoinLines(HText *text);
	+#endif

	extern int HText_sourceAnchors(HText *text);
	extern void HText_setStale(HText *text);
	@@ -289,6 +292,10 @@ US-ASCII control characters <32 which are not defined in Unicode standard
	extern HTkcode HText_getSpecifiedKcode(HText *text);
	extern void HText_updateSpecifiedKcode(HText *text, HTkcode kcode);

	+#if defined(EXP_WCWIDTH_SUPPORT) \|\| defined(EXP_JAPANESE_SPACES)
	+ extern BOOL isUTF8CJChar(const char *s);
	+#endif
	+
	#ifdef __cplusplus
	}
	#endif
	diff --git a/src/HTML.c b/src/HTML.c
	index a012466..cf2e18b 100644
	--- a/src/HTML.c
	+++ b/src/HTML.c
	@@ -275,18 +275,6 @@ void LYShowBadHTML(const char *message)
	* A C T I O N R O U T I N E S
	*/

	-/* FIXME: this should be amended to do the substitution only when not in a
	- * multibyte stream.
	- */
	-#ifdef EXP_JAPANESE_SPACES
	-#define FIX_JAPANESE_SPACES \
	- (HTCJK == CHINESE \|\| HTCJK == JAPANESE \|\| HTCJK == TAIPEI)
	- /* don't replace '\n' with ' ' if Chinese or Japanese - HN
	- */
	-#else
	-#define FIX_JAPANESE_SPACES 0
	-#endif
	-
	/* Character handling
	* ------------------
	*/
	@@ -333,12 +321,25 @@ void HTML_put_character(HTStructured * me, int c)
	return;
	if (c != '\n' && c != '\t' && c != '\r') {
	HTChunkPutc(&me->title, uc);
	- } else if (FIX_JAPANESE_SPACES) {
	- if (c == '\t') {
	- HTChunkPutc(&me->title, ' ');
	- } else {
	+#ifdef EXP_JAPANESE_SPACES
	+ } else if (c == '\t') {
	+ HTChunkPutc(&me->title, ' ');
	+ /* don't replace '\n' with ' ' if Chinese or Japanese - HN
	+ */
	+ } else if (me->title.size > 0 && is8bits(me->title.data[me->title.size - 1])) {
	+ if (HTCJK == CHINESE \|\| HTCJK == JAPANESE) {
	+ /* TODO: support 2nd byte of SJIS (!is8bits && IS_SJIS_LO) */
	return;
	+ } else if (IS_UTF8_TTY) {
	+ /* find start position of UTF-8 sequence */
	+ int i = me->title.size - 1;
	+ while (i > 0 && (me->title.data[i] & 0xc0) == 0x80) /* UTF_EXTRA */
	+ i--;
	+ if (isUTF8CJChar(&(me->title.data[i])))
	+ return;
	}
	+ HTChunkPutc(&me->title, ' ');
	+#endif
	} else {
	HTChunkPutc(&me->title, ' ');
	}
	@@ -453,15 +454,17 @@ void HTML_put_character(HTStructured * me, int c)
	UPDATE_STYLE;
	}
	if (c == '\n') {
	- if (!FIX_JAPANESE_SPACES) {
	- if (me->in_word) {
	- if (HText_getLastChar(me->text) != ' ') {
	- me->inP = TRUE;
	- me->inLABEL = FALSE;
	- HText_appendCharacter(me->text, ' ');
	- }
	- me->in_word = NO;
	+ if (me->in_word) {
	+#ifdef EXP_JAPANESE_SPACES
	+ if (HText_checkLastChar_needSpaceOnJoinLines(me->text)) {
	+#else
	+ if (HText_getLastChar(me->text) != ' ') {
	+#endif
	+ me->inP = TRUE;
	+ me->inLABEL = FALSE;
	+ HText_appendCharacter(me->text, ' ');
	}
	+ me->in_word = NO;
	}

	} else if (c == ' ' \|\| c == '\t') {
	@@ -607,12 +610,14 @@ void HTML_put_string(HTStructured * me, const char *s)
	UPDATE_STYLE;
	}
	if (c == '\n') {
	- if (!FIX_JAPANESE_SPACES) {
	- if (me->in_word) {
	- if (HText_getLastChar(me->text) != ' ')
	- HText_appendCharacter(me->text, ' ');
	- me->in_word = NO;
	- }
	+ if (me->in_word) {
	+#ifdef EXP_JAPANESE_SPACES
	+ if (HText_checkLastChar_needSpaceOnJoinLines(me->text))
	+#else
	+ if (HText_getLastChar(me->text) != ' ')
	+#endif
	+ HText_appendCharacter(me->text, ' ');
	+ me->in_word = NO;
	}

	} else if (c == ' ' \|\| c == '\t') {
	<html>
	<head>
	<meta charset="EUC-JP">
	<title>
	lorem
	ipsum
	漢
	あ
	カ
	！
	「
	dolor

	sit

	空

	行
	</title>
	</head>
	<body>
	<p>
	lorem
	ipsum
	漢
	あ
	カ
	！
	「
	dolor

	sit

	空

	行
	</p>

	<p>
	Expected result:<br>
	lorem ipsum 漢あカ！「dolor sit 空行
	</p>

	<h2>span</h2>
	<p>
	<span>lorem</span>
	<span>ipsum</span>
	<span>漢</span>
	<span>あ</span>
	<span>カ</span>
	<span>！</span>
	<span>「</span>
	<span>dolor</span>
	<span></span>
	<span>sit</span>
	<span></span>
	<span>空</span>
	<span></span>
	<span>行</span>
	</p>

	<p>
	Expected result:<br>
	lorem ipsum 漢あカ！「dolor sit 空行
	</p>
	</body>
	</html>
	<html>
	<head>
	<meta charset="iso-2022-jp">
	<title>
	lorem
	ipsum
	$B4A(B
	$B$"(B
	$B%+(B
	$B!*(B
	$B!V(B
	dolor

	sit

	$B6u(B

	$B9T(B
	</title>
	</head>
	<body>
	<p>
	lorem
	ipsum
	$B4A(B
	$B$"(B
	$B%+(B
	$B!*(B
	$B!V(B
	dolor

	sit

	$B6u(B

	$B9T(B
	</p>

	<p>
	Expected result:<br>
	lorem ipsum $B4A$"%+!*!V(Bdolor sit $B6u9T(B
	</p>

	<h2>span</h2>
	<p>
	<span>lorem</span>
	<span>ipsum</span>
	<span>$B4A(B</span>
	<span>$B$"(B</span>
	<span>$B%+(B</span>
	<span>$B!*(B</span>
	<span>$B!V(B</span>
	<span>dolor</span>
	<span></span>
	<span>sit</span>
	<span></span>
	<span>$B6u(B</span>
	<span></span>
	<span>$B9T(B</span>
	</p>

	<p>
	Expected result:<br>
	lorem ipsum $B4A$"%+!*!V(Bdolor sit $B6u9T(B
	</p>
	</body>
	</html>
	<html>
	<head>
	<meta charset="Shift_JIS">
	<title>
	lorem
	ipsum
	漢
	あ
	カ
	！
	「
	ｷ
	dolor

	sit

	空

	行
	</title>
	</head>
	<body>
	<p>
	lorem
	ipsum
	漢
	あ
	カ
	！
	「
	ｷ
	dolor

	sit

	空

	行
	</p>

	<p>
	Expected result:<br>
	lorem ipsum 漢あカ！「ｷdolor sit 空行
	</p>

	<h2>span</h2>
	<p>
	<span>lorem</span>
	<span>ipsum</span>
	<span>漢</span>
	<span>あ</span>
	<span>カ</span>
	<span>！</span>
	<span>「</span>
	<span>ｷ</span>
	<span>dolor</span>
	<span></span>
	<span>sit</span>
	<span></span>
	<span>空</span>
	<span></span>
	<span>行</span>
	</p>

	<p>
	Expected result:<br>
	lorem ipsum 漢あカ！「ｷdolor sit 空行
	</p>
	</body>
	</html>
	<html>
	<head>
	<meta charset="utf-8">
	<title>
	lorem
	ipsum
	漢
	あ
	カ
	！
	「
	㓅
	﨑
	𠀋
	한
	空

	ｷ
	dolor

	weiß
	sit
	</title>
	</head>
	<body>
	<p>
	lorem
	ipsum
	漢
	あ
	カ
	！
	「
	㓅
	﨑
	𠀋
	한
	空

	ｷ
	dolor

	weiß
	sit
	</p>

	<p>
	Expected result:<br>
	lorem ipsum 漢あカ！「㓅﨑𠀋한 空ｷdolor weiß sit
	</p>

	<h2>span</h2>
	<p>
	<span>lorem</span>
	<span>ipsum</span>
	<span>漢</span>
	<span>あ</span>
	<span>カ</span>
	<span>！</span>
	<span>「</span>
	<span>㓅</span>
	<span>﨑</span>
	<span>𠀋</span>
	<span>한</span>
	<span>空</span>
	<span></span>
	<span>ｷ</span>
	<span>dolor</span>
	<span></span>
	<span>weiß</span>
	<span>sit</span>
	</p>

	<p>
	Expected result:<br>
	lorem ipsum 漢あカ！「㓅﨑𠀋한 空ｷdolor weiß sit
	</p>
	</body>
	</html>