Last active
December 3, 2022 06:37
-
-
Save gullevek/4d2a5175c23a37d4b0e7fa2db26437a4 to your computer and use it in GitHub Desktop.
Python class to shorten double byte string and set correct adapted format length for output print
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
formatting with double width characters | |
""" | |
import unicodedata | |
def shorten_string_cjk(intput_string, width, placeholder='..'): | |
""" | |
shorten a string with CJK (double byte) characters | |
Args: | |
intput_string (string): input string to shorten | |
width (int): character count to shorten too | |
placeholder (str, optional): cut of end characters if space is there | |
Defaults to '..'. | |
Returns: | |
string: shortend string | |
""" | |
# get the length with double byte charactes | |
string_len_cjk = string_length_cjk(str(intput_string)) | |
# if double byte width is too big | |
if string_len_cjk > width: | |
# set current length and output string | |
cur_len = 0 | |
out_string = '' | |
# loop through each character | |
for char in str(intput_string): | |
# set the current length if we add the character | |
cur_len += 2 if unicodedata.east_asian_width(char) in "WF" else 1 | |
# if the new length is smaller than the output length to shorten too add the char | |
if cur_len <= (width - len(placeholder)): | |
out_string += char | |
# return string with new width and placeholder | |
return f"{out_string}{placeholder}" | |
else: | |
return str(intput_string) | |
def string_length_cjk(input_string): | |
""" | |
string lenth for a CJK (double byte) string | |
Args: | |
string (string): string to get length for | |
Returns: | |
int: length of characters, where CJK (double byte) will count as one | |
""" | |
# return string len including double count for double width characters | |
return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in input_string) | |
def format_string_length(input_string, length): | |
""" | |
returns length udpated for string with double byte characters | |
get string length normal, get string length including double byte characters | |
then subtract that from the original length | |
Args: | |
input_string (string): string to calculate length of | |
length (int): maxium length for string | |
Returns: | |
int: correct length for string to shorten too | |
""" | |
return length - (string_length_cjk(input_string) - len(input_string)) | |
def main(): | |
""" | |
main call: | |
test with various strings | |
""" | |
string = [ | |
"Some string 123 other text", | |
"Some string 日本語 other text", | |
"日本語は string 123 other text", | |
"あいうえおかきくけこさしすせそなにぬねのまみむめも〜", | |
"あいうえおかきくけこさしす 1 other text", | |
"Some string すせそなにぬねのまみむめも〜", | |
"SOME OTHER STRING THAT IS LONGER THAN TWENTYSIX CHARACTERS" | |
] | |
# format_str = "|{{:<{len}}}|" | |
format_len = 26 | |
string_len = 26 | |
print("Original string") | |
for _string in string: | |
print( | |
"Normal (CJK len " | |
f"{string_length_cjk(_string):>2}/len {len(_string):>2}): |{_string}|" | |
) | |
print("Shorten string") | |
for _string in string: | |
print( | |
f"Calculate> format_len: {format_len}, string_len: {string_len}, " | |
"stringLenCJK(short) " | |
f"{string_length_cjk(shorten_string_cjk(_string, width=string_len))}, " | |
f"len(short) {len(shorten_string_cjk(_string, width=string_len))}, " | |
"new format_len: " | |
f"{format_string_length(shorten_string_cjk(_string, width=string_len), format_len)}" | |
) | |
# shorten format length by the number of double bye characters found | |
# in the shortened string | |
# string shorten uses double byte character count | |
for _string in string: | |
string_to_print = shorten_string_cjk(_string, width=string_len) | |
string_length = format_string_length( | |
shorten_string_cjk(_string, width=string_len), | |
format_len | |
) | |
print( | |
"Normal: " | |
f"|{string_to_print:<{string_length}}|" | |
) | |
main() | |
# __END__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Updated with Python 3.9/10
pylance linting pass