Created
April 8, 2018 05:00
-
-
Save squidarth/e57834c89feb8e8b4596e11d5e8812eb to your computer and use it in GitHub Desktop.
A program that breaks a string and prints words consistently
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage: python consistent_printer.py max_line_length file_path | |
# Summary: Prints out the contents of a file given a max line length, | |
# ensuring that the space at the end of each line is minimal. | |
# Example: `python consistent_printer.py 100 words.txt` | |
import sys | |
def get_consistent_lines(max_line_length, string): | |
# This function takes a string and a maximum line length, | |
# and returns a string with newlines inserted such that | |
# the space at the end of each line is consistent. | |
# | |
# This is defined as the set of newlines that result in the minimum | |
# "cost", where the cost is the sum of the square of the number | |
# of spaces left over at the end of each line. | |
stripped_string = string.replace('\n', ' ') | |
words = stripped_string.strip().split(" ") | |
# cost_array[i] is the cost of the optimal | |
# solution for considering words 0 - i. | |
cost_array = [None] * len(words) | |
# previous_break_indexes[i] is the index in words | |
# in which a newline should be inserted, if | |
# i were the last index in the array. | |
previous_break_indexes = [None] * len(words) | |
for i in xrange(len(words)): | |
# On each iteration of this loop i, | |
# we compute the optimal solution | |
# for the array of words 0 - i. | |
# | |
# Once the loop has completed, we will | |
# have computed the the optimal solution | |
# for the whole array words. | |
minimum_cost = sys.maxsize | |
minimum_index = 0 | |
for j in xrange(i + 1): | |
line_length = len(" ".join(words[j:i+1])) | |
if not line_length > max_line_length: | |
extra_space = max_line_length - line_length | |
# Need to do this check, because j starts at 0, and at | |
# that point, since there are no previous words, the previous cost is 0 | |
previous_cost = 0 if j == 0 else cost_array[j - 1] | |
total_cost = (previous_cost + (extra_space ** 2)) | |
if total_cost < minimum_cost: | |
minimum_cost = total_cost | |
minimum_index = j | |
previous_break_indexes[i] = minimum_index | |
cost_array[i] = minimum_cost | |
final_break_indexes = get_break_indexes(previous_break_indexes, []) | |
return get_final_string(words, final_break_indexes) | |
def get_file_contents(file_path): | |
with open(file_path) as f: | |
return f.read() | |
def get_final_string(words, final_break_indexes): | |
final_string = "" | |
for i in xrange(len(words)): | |
if i + 1 in final_break_indexes: | |
extra_char = "\n" | |
else: | |
extra_char = " " | |
final_string += words[i] + extra_char | |
return final_string.strip() | |
def get_break_indexes(previous_break_indexes, final_break_indexes): | |
# Recursive function, that given the previous_break_index array, | |
# works backwards from the last element, and builds up a final | |
# list of indexes before which there should be line breaks. | |
if previous_break_indexes == []: | |
return final_break_indexes | |
last_break_index = previous_break_indexes[-1] | |
final_break_indexes.append(last_break_index) | |
return get_break_indexes(previous_break_indexes[0:last_break_index], final_break_indexes) | |
def print_consistent_lines(max_line_length, string): | |
print get_consistent_lines(max_line_length, string) | |
if __name__ == "__main__": | |
max_line_length = int(sys.argv[1]) | |
file_path = sys.argv[2] | |
print_consistent_lines(max_line_length, get_file_contents(file_path)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment