Created
August 21, 2013 03:42
-
-
Save WenLiangTseng/6290123 to your computer and use it in GitHub Desktop.
中文的Wordpress摘要,含有HTML的Tag時,可保留HTML標籤且避免砍到HTML標籤的完整寫法
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php // 參考資料來源 http://stackoverflow.com/questions/1193500/php-truncate-html-ignoring-tags | |
function memo_desc_excerpt($str) { | |
$len = 100; | |
//find all tags | |
$tagPattern = '/(<\/?)([\w]*)(\s*[^>]*)>?|&[\w#]+;/i'; //match html tags and entities | |
preg_match_all($tagPattern, $str, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER ); | |
//WSDDebug::dump($matches); exit; | |
$i = 0; | |
//loop through each found tag that is within the $len, add those characters to the len, | |
//also track open and closed tags | |
// $matches[$i][0] = the whole tag string --the only applicable field for html enitities | |
// IF its not matching an &htmlentity; the following apply | |
// $matches[$i][1] = the start of the tag either '<' or '</' | |
// $matches[$i][2] = the tag name | |
// $matches[$i][3] = the end of the tag | |
//$matces[$i][$j][0] = the string | |
//$matces[$i][$j][1] = the str offest | |
while($matches[$i][0][1] < $len && !empty($matches[$i])){ | |
$len = $len + strlen($matches[$i][0][0]); | |
if(substr($matches[$i][0][0],0,1) == '&' ) | |
$len = $len-1; | |
//if $matches[$i][2] is undefined then its an html entity, want to ignore those for tag counting | |
//ignore empty/singleton tags for tag counting | |
if(!empty($matches[$i][2][0]) && !in_array($matches[$i][2][0],array('br','img','hr', 'input', 'param', 'link'))){ | |
//double check | |
if(substr($matches[$i][3][0],-1) !='/' && substr($matches[$i][1][0],-1) !='/') | |
$openTags[] = $matches[$i][2][0]; | |
elseif(end($openTags) == $matches[$i][2][0]){ | |
array_pop($openTags); | |
}else{ | |
$warnings[] = "html has some tags mismatched in it: $str"; | |
} | |
} | |
$i++; | |
} | |
$closeTags = ''; | |
if (!empty($openTags)){ | |
$openTags = array_reverse($openTags); | |
foreach ($openTags as $t){ | |
//$closeTagString .="</".$t . ">"; | |
//這個視需要加上,補足不完整的標籤 | |
} | |
} | |
if(strlen($str)>$len){ | |
//truncate with new len | |
$truncated_html = mb_substr($str, 0, $len, 'UTF-8'); | |
//$truncated_html = substr($str, 0, $len); | |
//↑純英文用這個 | |
//add the end text | |
//$truncated_html .= $end ; | |
//restore any open tags | |
$truncated_html .= $closeTagString; | |
} else { | |
$truncated_html = $str; | |
} | |
return $truncated_html; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment