EdisonChendi · October 20, 2016 05:15
diff --git a/trunc.py b/trunc.py
 # coding=utf-8

 def trunc(s, limit, coding="UTF-8", postfix="..."):
    '''
    sensibly trunc a str/bytes(py3) or str/unicode string(py2) to some limit by counting bytes
    '''
    unicode_s = s.decode(coding) if type(s) == bytes else s
    nums = (len(u.encode(coding)) for u in unicode_s)
    sum, i = 0, 0
    use_postfix = ""
    for i,n in enumerate(nums):
        if sum+n > limit:
            use_postfix = postfix
            break
        else:
            sum += n
    return unicode_s[:i] + use_postfix

 # py2
 a = u"你好世界，" * 100
 trunc_a = trunc(a, 50)
 print(trunc_a)
 b = "你好世界，" * 100
 trunc_b = trunc(b, 50)
 print(trunc_b)
 c = u'你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
 trunc_c = trunc(c, 20, coding="gb2312", postfix=u"呃呃呃") # use unicode in py2 for postfix
 print(trunc_c)

 # py3
 # a = "你好世界，" * 100
 # trunc_a = trunc(a, 50)
 # print(trunc_a)
 # b = bytes("你好世界，" * 100, "UTF-8")
 # trunc_b = trunc(b, 50)
 # print(trunc_b)
 # c = '你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
 # trunc_c = trunc(c, 20, coding="gb2312")
 # print(trunc_c)
	# coding=utf-8

	def trunc(s, limit, coding="UTF-8", postfix="..."):
	'''
	sensibly trunc a str/bytes(py3) or str/unicode string(py2) to some limit by counting bytes
	'''
	unicode_s = s.decode(coding) if type(s) == bytes else s
	nums = (len(u.encode(coding)) for u in unicode_s)
	sum, i = 0, 0
	use_postfix = ""
	for i,n in enumerate(nums):
	if sum+n > limit:
	use_postfix = postfix
	break
	else:
	sum += n
	return unicode_s[:i] + use_postfix

	# py2
	a = u"你好世界，" * 100
	trunc_a = trunc(a, 50)
	print(trunc_a)
	b = "你好世界，" * 100
	trunc_b = trunc(b, 50)
	print(trunc_b)
	c = u'你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
	trunc_c = trunc(c, 20, coding="gb2312", postfix=u"呃呃呃") # use unicode in py2 for postfix
	print(trunc_c)

	# py3
	# a = "你好世界，" * 100
	# trunc_a = trunc(a, 50)
	# print(trunc_a)
	# b = bytes("你好世界，" * 100, "UTF-8")
	# trunc_b = trunc(b, 50)
	# print(trunc_b)
	# c = '你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
	# trunc_c = trunc(c, 20, coding="gb2312")
	# print(trunc_c)