Created
May 22, 2021 22:25
-
-
Save Yoxem/d7150a50f45334a6846b58fe3a6551ad to your computer and use it in GitHub Desktop.
Knuth Line-breaking implementation in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
#-*-coding:utf-8-*- | |
# License: GPLv3 | |
import functools | |
import math | |
import re | |
line_width = 40 | |
#input_text = "aaa bb cc ddddd" | |
input_text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum gravida semper elit, ac porttitor arcu convallis in. Vivamus iaculis neque pretium, porttitor purus quis, elementum sem. Pellentesque mollis ultricies hendrerit. Nunc ac dictum elit, ut malesuada mi. Donec fringilla eu nunc pellentesque aliquam. Nunc neque sem, interdum in ligula vitae, lobortis vestibulum neque. Praesent placerat gravida volutpat. " | |
input_splitted = re.split('(\s+)',input_text); | |
input_splitted_len = len(input_splitted) | |
punish_2d_array = [[None for _ in range(input_splitted_len)] for _ in range(input_splitted_len)] | |
print(input_splitted) | |
def punish(i,j, line_width, input_splitted): | |
if punish_2d_array[i][j] != None: | |
return punish_2d_array[i][j] | |
else: | |
input_splitted_fragment = input_splitted[i:j+1] | |
input_splitted_fragment_length = map(lambda x: len(x), \ | |
input_splitted_fragment) | |
total_fragment_length = sum(input_splitted_fragment_length) | |
if line_width < total_fragment_length: | |
res = math.inf | |
else: | |
res = (line_width - total_fragment_length) ** 2 | |
punish_2d_array[i][j] = res | |
return res | |
def line_break(input_splitted, line_width): | |
line_break_table = [] | |
for i in range(len(input_splitted)): | |
# prev 指上一個斷句的詞 array index。 | |
line_break_table.append({"prev": "", "value": math.inf}) | |
def line_break_aux(n): | |
nonlocal line_break_table | |
if line_break_table[n]["value"] != math.inf: | |
return line_break_table[n]["value"] | |
i = 0 # 起始索引 | |
if re.match('\s+', input_splitted[0]): | |
i = 1 | |
total_punish_val = punish(0,n, line_width, input_splitted) | |
if total_punish_val == math.inf: | |
min_cost_array = [math.inf] * n | |
for j in range(i,n, 2): | |
rhs = punish(j+2,n, line_width, input_splitted) | |
if rhs == math.inf: | |
min_cost_array[j] = math.inf | |
else: | |
min_cost_array[j] = line_break_aux(j) + rhs | |
print("min_cost_array[%d] = %f" % (j , min_cost_array[j] )) | |
print(min_cost_array) | |
res = min(min_cost_array) | |
res_index = min_cost_array.index(res) | |
if line_break_table[n]["value"] > res: | |
line_break_table[n]["prev"] = res_index | |
line_break_table[n]["value"] = res | |
return res | |
else: | |
line_break_table[n]["value"] = total_punish_val | |
return total_punish_val | |
res = line_break_aux(len(input_splitted)-1) | |
print(line_break_table) | |
return [res,line_break_table] | |
def show_breaking_result(line_break_table, input_splitted): | |
breaking_chain_list = [] | |
traveller = len(line_break_table) - 1 | |
while(line_break_table[traveller]['prev'] != ''): | |
breaking_chain_list.append(line_break_table[traveller]['prev']) | |
traveller = line_break_table[traveller]['prev'] | |
breaking_chain_list = [x for x in reversed(breaking_chain_list)] # 倒排 | |
result_string = "" | |
head = -2 | |
tail = 0 | |
for i in breaking_chain_list: | |
tail = i | |
# 取 head 的下一個元素,以及 tail 後面的空白 | |
fragment = input_splitted[head+2:tail+2] | |
fragment_combined_str = functools.reduce(lambda x, y: x + y,fragment,"") | |
fragment_combined_str += "\n" | |
result_string += fragment_combined_str | |
head = tail | |
result_string += functools.reduce(lambda x, y: x + y,input_splitted[head+2:],"") | |
print(result_string) | |
result = line_break(input_splitted, line_width) | |
show_breaking_result(result[1], input_splitted) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment