Skip to content

Instantly share code, notes, and snippets.

@itsjohncs
Created July 13, 2015 03:12
Show Gist options
  • Select an option

  • Save itsjohncs/373d654cd6e2e82237d6 to your computer and use it in GitHub Desktop.

Select an option

Save itsjohncs/373d654cd6e2e82237d6 to your computer and use it in GitHub Desktop.
html_aware_wrap
def html_aware_wrap(string, width=72):
"""Wraps some HTML to the given width.
This ignores the HTML for the purpose of determining the width of each
line.
Warning: This takes a pretty trivial approach to matching HTML tags, so
buyer beware.
"""
lines = [""]
pos = 0
current_line_length = 0
TAG_RE = re.compile(ur"(<.+?>)", flags=re.MULTILINE|re.UNICODE)
TEXT_RE = re.compile(ur"(.+?)(\s|<.+?>|$)", flags=re.MULTILINE|re.UNICODE)
while True:
if pos < len(string) and string[pos] == "\n":
pos += 1
current_line_length = 0
lines.append("")
continue
for regex in (TAG_RE, TEXT_RE):
match = regex.match(string, pos)
if match:
if regex is not TAG_RE:
if current_line_length + len(match.group(1)) > width:
lines.append("")
current_line_length = 0
current_line_length += len(match.group(1))
pos += len(match.group(1))
lines[-1] += match.group(1)
break
else:
break
if pos < len(string):
raise RuntimeError()
return [i.strip() for i in lines]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment