ma8ma · December 11, 2018 13:00
diff --git a/jd-thread-search-title.patch b/jd-thread-search-title.patch
 ２ちゃんねるブラウザ JD の
 スレタイ検索結果のタイトルに含まれるタグを取り除くパッチです。

 注意：繰り返しテストしていないので不具合が起こる可能性があります
 ライセンス：GPL v2

 使い方
 https://github.com/yama-natuki/JD/tree/test にパッチを適用してビルド

 変更の要約
 * タイトルのエスケープ処理をやめる
 * 終了タグ</a>の判定を加えて入れ子になったタグを無視しつつ中身をリンクのテキストに入れる
  既存のコードはAタグの中にタグがある場合を想定していないので単純に無視しても大丈夫だと推定している

 diff --git a/src/article/articleviewsearch.cpp b/src/article/articleviewsearch.cpp
 index 889b1d2..a915b18 100644
 --- a/src/article/articleviewsearch.cpp
 +++ b/src/article/articleviewsearch.cpp
 @@ -290,7 +290,7 @@ void ArticleViewSearch::relayout()
                 if( m_searchmode == CORE::SEARCHMODE_ALLLOG || m_searchmode == CORE::SEARCHMODE_TITLE  )
                     comment << "[ <a href=\"" << DBTREE::url_subject( (*it).url_readcgi ) << "\">" << (*it).boardname << "</a> ] ";
 
 -                comment << "<a href=\"" << (*it).url_readcgi << "\">" << MISC::html_escape( (*it).subject ) << "</a>";
 +                comment << "<a href=\"" << (*it).url_readcgi << "\">" << (*it).subject << "</a>";
 
                 if( (*it).num ) comment << " ( " << (*it).num << " )";
 
 diff --git a/src/dbtree/nodetreebase.cpp b/src/dbtree/nodetreebase.cpp
 index da18604..e5c1277 100644
 --- a/src/dbtree/nodetreebase.cpp
 +++ b/src/dbtree/nodetreebase.cpp
 @@ -2019,53 +2019,55 @@ void NodeTreeBase::parse_html( const char* str, const int lng, const int color_t
                 if( pos >= pos_end ) continue;
                 ++pos;
 
 -                const char* pos_str_start = pos;
 -                int lng_str = 0;
 -
 -                bool exec_decode = false;
 -                while( pos < pos_end && *pos != '<' ){
 -                    if( *pos == '&' ) exec_decode = true;
 -                    ++pos;
 -                    ++lng_str;
 +                // A要素の終了タグまでの処理
 +                // デコード処理したテキストはm_parsed_textに、長さはlng_textに保存する
 +                for( ; pos < pos_end; ++pos ) {
 +                    if( *pos == '<' ) {
 +                        if( std::strncmp( pos + 1, "/a>", 3 ) == 0 || std::strncmp( pos + 1, "/A>", 3 ) == 0 ) {
 +                            break;
 +                        }
 +                        // 入れ子になったタグは無視して中身だけテキストに追加する
 +                        ++pos;
 +                        while( pos < pos_end && *pos != '>' ) {
 +                            ++pos;
 +                        }
 +                    }
 +                    else if( *pos == '&' ) {
 +                        // 特殊文字デコード
 +                        int n_in = 0;
 +                        int n_out = 0;
 +                        const int ret_decode = DBTREE::decode_char( pos, n_in,
 +                                                                    m_parsed_text + lng_text, n_out, false );
 +                        if( ret_decode != NODE_NONE ){
 +                            lng_text += n_out;
 +                            pos += n_in;
 +                            pos--;
 +                        }
 +                        else {
 +                            m_parsed_text[ lng_text++ ] = *pos;
 +                        }
 +                    }
 +                    else {
 +                        m_parsed_text[ lng_text++ ] = *pos;
 +                    }
                 }
                 if( pos >= pos_end ) continue;
 
 +                // </a>を読み飛ばす
                 while( pos < pos_end && *pos != '>' ) ++pos;
                 if( pos >= pos_end ) continue;
                 ++pos;
 
 -                if( lng_link && lng_str ){
 -
 -                    // 特殊文字デコード
 -                    if( exec_decode ){
 -
 -                        for( int pos_tmp = 0; pos_tmp < lng_str; ++ pos_tmp ){
 -                            int n_in = 0;
 -                            int n_out = 0;
 -                            const int ret_decode = DBTREE::decode_char( pos_str_start + pos_tmp, n_in, m_parsed_text + lng_text, n_out, false );
 -                            if( ret_decode != NODE_NONE ){
 -                                lng_text += n_out;
 -                                pos_tmp += n_in;
 -                                pos_tmp--;
 -                            }
 -                            else m_parsed_text[ lng_text++ ] = *( pos_str_start + pos_tmp );
 -                        }
 +                if( lng_link && lng_text ) {
 #ifdef _DEBUG
 -                        m_parsed_text[ lng_text ] = '\0';
 -                        std::cout << m_parsed_text << std::endl;
 +                    m_parsed_text[ lng_text ] = '\0';
 +                    std::cout << m_parsed_text << std::endl;
 #endif
 -                        pos_str_start = m_parsed_text;
 -                        lng_str = lng_text;
 -                        lng_text = 0;
 -                    }
 -
 -                    create_node_link( pos_str_start, lng_str , pos_link_start, lng_link, COLOR_CHAR_LINK, false );
 +                    create_node_link( m_parsed_text, lng_text, pos_link_start, lng_link, COLOR_CHAR_LINK, false );
 +                    lng_text = 0;
                 }
             }
 
 -            // </a>
 -            else if( *( pos + 1 ) == '/' && ( *( pos + 2 ) == 'a' || *( pos + 2 ) == 'A' ) && *( pos + 3 ) == '>' ) pos += 4;
 -
             // 改行にするタグ
             else if(
                 // <p>
	２ちゃんねるブラウザ JD の
	スレタイ検索結果のタイトルに含まれるタグを取り除くパッチです。

	注意：繰り返しテストしていないので不具合が起こる可能性があります
	ライセンス：GPL v2

	使い方
	https://github.com/yama-natuki/JD/tree/test にパッチを適用してビルド

	変更の要約
	* タイトルのエスケープ処理をやめる
	* 終了タグ</a>の判定を加えて入れ子になったタグを無視しつつ中身をリンクのテキストに入れる
	既存のコードはAタグの中にタグがある場合を想定していないので単純に無視しても大丈夫だと推定している

	diff --git a/src/article/articleviewsearch.cpp b/src/article/articleviewsearch.cpp
	index 889b1d2..a915b18 100644
	--- a/src/article/articleviewsearch.cpp
	+++ b/src/article/articleviewsearch.cpp
	@@ -290,7 +290,7 @@ void ArticleViewSearch::relayout()
	if( m_searchmode == CORE::SEARCHMODE_ALLLOG \|\| m_searchmode == CORE::SEARCHMODE_TITLE )
	comment << "[ <a href=\"" << DBTREE::url_subject( (it).url_readcgi ) << "\">" << (it).boardname << "</a> ] ";

	- comment << "<a href=\"" << (it).url_readcgi << "\">" << MISC::html_escape( (it).subject ) << "</a>";
	+ comment << "<a href=\"" << (it).url_readcgi << "\">" << (it).subject << "</a>";

	if( (it).num ) comment << " ( " << (it).num << " )";

	diff --git a/src/dbtree/nodetreebase.cpp b/src/dbtree/nodetreebase.cpp
	index da18604..e5c1277 100644
	--- a/src/dbtree/nodetreebase.cpp
	+++ b/src/dbtree/nodetreebase.cpp
	@@ -2019,53 +2019,55 @@ void NodeTreeBase::parse_html( const char* str, const int lng, const int color_t
	if( pos >= pos_end ) continue;
	++pos;

	- const char* pos_str_start = pos;
	- int lng_str = 0;
	-
	- bool exec_decode = false;
	- while( pos < pos_end && *pos != '<' ){
	- if( *pos == '&' ) exec_decode = true;
	- ++pos;
	- ++lng_str;
	+ // A要素の終了タグまでの処理
	+ // デコード処理したテキストはm_parsed_textに、長さはlng_textに保存する
	+ for( ; pos < pos_end; ++pos ) {
	+ if( *pos == '<' ) {
	+ if( std::strncmp( pos + 1, "/a>", 3 ) == 0 \|\| std::strncmp( pos + 1, "/A>", 3 ) == 0 ) {
	+ break;
	+ }
	+ // 入れ子になったタグは無視して中身だけテキストに追加する
	+ ++pos;
	+ while( pos < pos_end && *pos != '>' ) {
	+ ++pos;
	+ }
	+ }
	+ else if( *pos == '&' ) {
	+ // 特殊文字デコード
	+ int n_in = 0;
	+ int n_out = 0;
	+ const int ret_decode = DBTREE::decode_char( pos, n_in,
	+ m_parsed_text + lng_text, n_out, false );
	+ if( ret_decode != NODE_NONE ){
	+ lng_text += n_out;
	+ pos += n_in;
	+ pos--;
	+ }
	+ else {
	+ m_parsed_text[ lng_text++ ] = *pos;
	+ }
	+ }
	+ else {
	+ m_parsed_text[ lng_text++ ] = *pos;
	+ }
	}
	if( pos >= pos_end ) continue;

	+ // </a>を読み飛ばす
	while( pos < pos_end && *pos != '>' ) ++pos;
	if( pos >= pos_end ) continue;
	++pos;

	- if( lng_link && lng_str ){
	-
	- // 特殊文字デコード
	- if( exec_decode ){
	-
	- for( int pos_tmp = 0; pos_tmp < lng_str; ++ pos_tmp ){
	- int n_in = 0;
	- int n_out = 0;
	- const int ret_decode = DBTREE::decode_char( pos_str_start + pos_tmp, n_in, m_parsed_text + lng_text, n_out, false );
	- if( ret_decode != NODE_NONE ){
	- lng_text += n_out;
	- pos_tmp += n_in;
	- pos_tmp--;
	- }
	- else m_parsed_text[ lng_text++ ] = *( pos_str_start + pos_tmp );
	- }
	+ if( lng_link && lng_text ) {
	#ifdef _DEBUG
	- m_parsed_text[ lng_text ] = '\0';
	- std::cout << m_parsed_text << std::endl;
	+ m_parsed_text[ lng_text ] = '\0';
	+ std::cout << m_parsed_text << std::endl;
	#endif
	- pos_str_start = m_parsed_text;
	- lng_str = lng_text;
	- lng_text = 0;
	- }
	-
	- create_node_link( pos_str_start, lng_str , pos_link_start, lng_link, COLOR_CHAR_LINK, false );
	+ create_node_link( m_parsed_text, lng_text, pos_link_start, lng_link, COLOR_CHAR_LINK, false );
	+ lng_text = 0;
	}
	}

	- // </a>
	- else if( ( pos + 1 ) == '/' && ( ( pos + 2 ) == 'a' \|\| ( pos + 2 ) == 'A' ) && ( pos + 3 ) == '>' ) pos += 4;
	-
	// 改行にするタグ
	else if(
	// <p>