Created
October 20, 2016 04:27
-
-
Save EdisonChendi/5b3912ea3559b1c5cbd746096e51582d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
def trunc(s, limit, coding="UTF-8", postfix="..."): | |
''' | |
works both on python2 and python3 | |
''' | |
unicode_s = s.decode(coding) if type(s) == bytes else s | |
nums = (len(u.encode(coding)) for u in unicode_s) | |
sum, i = 0, 0 | |
use_postfix = "" | |
for i,n in enumerate(nums): | |
if sum+n > limit: | |
use_postfix = postfix | |
break | |
else: | |
sum += n | |
return unicode_s[:i] + use_postfix | |
# py2 | |
a = u"你好世界," * 100 | |
trunc_a = trunc(a, 50) | |
print(trunc_a) | |
b = "你好世界," * 100 | |
trunc_b = trunc(b, 50) | |
print(trunc_b) | |
c = u'你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312") | |
trunc_c = trunc(c, 20, coding="gb2312") | |
print(trunc_c) | |
# py3 | |
# a = "你好世界," * 100 | |
# trunc_a = trunc(a, 50) | |
# print(trunc_a) | |
# b = bytes("你好世界," * 100, "UTF-8") | |
# trunc_b = trunc(b, 50) | |
# print(trunc_b) | |
# c = '你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312") | |
# trunc_c = trunc(c, 20, coding="gb2312") | |
# print(trunc_c) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment