Skip to content

Instantly share code, notes, and snippets.

@vindard
Last active March 9, 2021 05:35
Show Gist options
  • Save vindard/b2eb2af5688f40bb8ab8bbdcd3fb229b to your computer and use it in GitHub Desktop.
Save vindard/b2eb2af5688f40bb8ab8bbdcd3fb229b to your computer and use it in GitHub Desktop.
Splits a paragraph string into smaller paragraph strings based on the 'max_lines' number passed in
from typing import List
def split_para(para_string: str, max_lines: int = 3) -> List[str]:
if not max_lines > 0:
print(f"Error: Please pass a 'max_lines' arg greater than 0")
return [para_string]
# Get 1st 'max chunk' of para_string and add to result
para_lines = para_string.split('\n')
para_1st_string = '\n'.join(para_lines[:max_lines])
result = [para_1st_string]
# Get 2nd chunk of para_string and potentially process further recursively
para_rest_string = '\n'.join(para_lines[max_lines:])
if para_rest_string:
result += split_para(para_rest_string, max_lines)
return result
import re
from typing import List
def split_para_regex(para_string: str, max_lines: int = 3) -> List[str]:
unchanged_result = [para_string]
if not max_lines > 0:
print(f"Error: Please pass a 'max_lines' arg greater than 0")
return unchanged_result
# Fetch all max-lines-sized chunks
regex = r".*\n"*max_lines + r"?"
result = re.findall(regex, para_string)
# Clean null chunks and trim extra '\n' chars
result = list(filter(None, result))
result = [para.strip('\n') for para in result]
# Return original para when max_size is greater than number of lines
result = result or unchanged_result
# Append any remaining chunks smaller than the max_lines size
result_string = '\n'.join(result)
rest = para_string.split(result_string)
rest = filter(None, rest)
if rest:
rest = [para.strip('\n') for para in rest]
result += rest
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment