Last active
June 9, 2023 20:08
-
-
Save JJTech0130/b600c9f1e85a58558fe2c576fdac39b8 to your computer and use it in GitHub Desktop.
Parse Apple's ITML into proper HTML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import etree | |
import requests | |
OVERRIDE_CSS = """ | |
/* Custom injected CSS */ | |
.VBoxView { | |
display: flex; | |
flex-direction: column; | |
} | |
.HBoxView { | |
display: flex; | |
flex-direction: row; | |
} | |
.MatrixView { | |
display: grid; | |
} | |
a { | |
text-decoration: none; | |
} | |
""" | |
def convert_fontstyle_to_css(fontstyle: str): | |
fontstyle = "<root>" + fontstyle + "</root>" | |
tree = etree.fromstring(fontstyle) | |
css = "" | |
parse_later = [] # List of elements to parse later | |
additional = {} # Additional classes to OR to class names | |
# Loop through all the elements in the fontstyles | |
for element in tree: | |
if element.tag == "FontStyle": | |
# We can't parse FontStyles until after we've parsed FontStyleSets | |
parse_later.append(element) | |
elif element.tag == "FontStyleSet": | |
if element.attrib['normalStyle'] not in additional: | |
additional[element.attrib['normalStyle']] = [] | |
if element.attrib['linkStyle'] not in additional: | |
additional[element.attrib['linkStyle']] = [] | |
if element.attrib['linkPressedStyle'] not in additional: | |
additional[element.attrib['linkPressedStyle']] = [] | |
if element.attrib['linkRolloverStyle'] not in additional: | |
additional[element.attrib['linkRolloverStyle']] = [] | |
additional[element.attrib['normalStyle']].append(f".{element.attrib['setName']}") | |
additional[element.attrib['linkStyle']].append(f".{element.attrib['setName']} a") | |
additional[element.attrib['linkPressedStyle']].append(f".{element.attrib['setName']} a:active") | |
additional[element.attrib['linkRolloverStyle']].append(f".{element.attrib['setName']} a:hover") | |
for element in parse_later: | |
if element.tag == "FontStyle": | |
if element.attrib['styleName'] in additional: | |
# There are additional classes to OR to the class name | |
css += f".{element.attrib['styleName']}, {', '.join(additional[element.attrib['styleName']])} {{\n" | |
else: | |
css += f".{element.attrib['styleName']} {{\n" | |
if 'font' in element.attrib: | |
# If there are commas, split it into multiple fonts | |
fonts = element.attrib['font'].split(",") | |
fonts_out = [] | |
for font in fonts: | |
# If there are spaces, wrap it in quotes | |
if " " in font: | |
#print(f"Warning: font {font} contains spaces") | |
fonts_out.append(f"\'{font}\'") | |
else: | |
fonts_out.append(font) | |
if font == "SF UI": | |
# SF UI font is probably not available | |
fonts_out.append("-apple-system") | |
fonts_out.append("BlinkMacSystemFont") | |
fonts_out.append("sans-serif") | |
# Rejoin the fonts | |
css += f" font-family: {', '.join(fonts_out)};\n" | |
if 'size' in element.attrib: | |
css += f" font-size: {element.attrib['size']}px;\n" | |
if 'color' in element.attrib: | |
css += f" color: #{element.attrib['color']};\n" | |
if 'line-height' in element.attrib: | |
css += f" line-height: {element.attrib['line-height']}px;\n" | |
if 'face' in element.attrib: | |
faces = element.attrib['face'].split(",") | |
if 'bold' in faces: | |
css += f" font-weight: bold;\n" | |
if 'underline' in faces: | |
css += f" text-decoration: underline;\n" | |
if not 'underline' in faces and not 'bold' in faces: | |
print(f"Warning: Unknown face {element.attrib['face']}") | |
css += "}\n" | |
return css | |
# TODO: Make this not a global variable | |
global_includes = [] | |
# Convert ITML elements to HTML | |
def convert_itml_to_html(element: etree._Element): | |
global global_includes | |
if "}" in element.tag: | |
# Remove the namespace from the tag | |
element.tag = element.tag.split("}")[1] | |
#print(element.tag) | |
if element.tag == "Document": | |
element.tag = "body" | |
elif element.tag == 'Include': | |
global_includes.append(element.attrib['url']) | |
return None | |
elif element.tag == "TextView": | |
element.tag = "p" | |
# make not self-closing | |
if len(element) == 0 and element.text is None: | |
element.text = "" | |
elif element.tag == "PictureView": | |
element.tag = "img" | |
element.attrib["src"] = element.attrib["url"] | |
# Other "Views" are converted into divs with appropriate styling | |
elif "View" in element.tag: | |
element.attrib["class"] = element.tag | |
element.tag = "div" | |
# Make sure it's not self-closing | |
if len(element) == 0 and element.text is None: | |
element.text = "" | |
elif element.tag == "b": | |
pass | |
elif element.tag == "SetFontStyle": | |
element.tag = "span" | |
elif element.tag == "OpenURL": | |
element.tag = "a" | |
element.attrib["href"] = element.attrib["url"] | |
del element.attrib["url"] | |
if "target" in element.attrib: | |
del element.attrib["target"] | |
# links need to inherit the class of their parent | |
#element.attrib["class"] = element.getparent().attrib["class"] | |
else: | |
print(f"Warning: Ignoring element {element.tag}") | |
return None | |
# Fixup styling | |
if "styleSet" in element.attrib: | |
if "class" not in element.attrib: | |
element.attrib["class"] = element.attrib["styleSet"] | |
else: | |
element.attrib["class"] += " " + element.attrib["styleSet"] | |
del element.attrib["styleSet"] | |
if "normalStyle" in element.attrib: | |
if "class" not in element.attrib: | |
element.attrib["class"] = element.attrib["normalStyle"] | |
else: | |
element.attrib["class"] += " " + element.attrib["normalStyle"] | |
del element.attrib["normalStyle"] | |
# Fixup other attributes into inline styles | |
if 'bottomInset' in element.attrib: | |
element.attrib['style'] = f"margin-bottom: {element.attrib['bottomInset']}px;{element.attrib.get('style', '')}" | |
del element.attrib['bottomInset'] | |
if 'topInset' in element.attrib: | |
element.attrib['style'] = f"margin-top: {element.attrib['topInset']}px;" + element.attrib.get('style', "") | |
del element.attrib['topInset'] | |
if 'leftInset' in element.attrib: | |
element.attrib['style'] = f"margin-left: {element.attrib['leftInset']}px;" + element.attrib.get('style', "") | |
del element.attrib['leftInset'] | |
if 'rightInset' in element.attrib: | |
element.attrib['style'] = f"margin-right: {element.attrib['rightInset']}px;" + element.attrib.get('style', "") | |
del element.attrib['rightInset'] | |
if 'width' in element.attrib: | |
element.attrib['style'] = f"width: {element.attrib['width']}px;" + element.attrib.get('style', "") | |
del element.attrib['width'] | |
if 'height' in element.attrib: | |
element.attrib['style'] = f"height: {element.attrib['height']}px;" + element.attrib.get('style', "") | |
del element.attrib['height'] | |
if 'minWidth' in element.attrib: | |
element.attrib['style'] = f"min-width: {element.attrib['minWidth']}px;" + element.attrib.get('style', "") | |
del element.attrib['minWidth'] | |
if 'stretchiness' in element.attrib: | |
element.attrib['style'] = f"flex-grow: {element.attrib['stretchiness']};" + element.attrib.get('style', "") | |
del element.attrib['stretchiness'] | |
if 'textJust' in element.attrib: | |
element.attrib['style'] = f"text-align: {element.attrib['textJust']};" + element.attrib.get('style', "") | |
del element.attrib['textJust'] | |
if 'borderColor' in element.attrib: | |
element.attrib['style'] = f"border-color: #{element.attrib['borderColor']};" + element.attrib.get('style', "") | |
if not 'border-style' in element.attrib['style']: | |
element.attrib['style'] += "border-style: solid;" | |
del element.attrib['borderColor'] | |
if 'borderWidth' in element.attrib: | |
element.attrib['style'] = f"border-width: {element.attrib['borderWidth']}px;" + element.attrib.get('style', "") | |
if not 'border-style' in element.attrib['style']: | |
element.attrib['style'] += "border-style: solid;" | |
del element.attrib['borderWidth'] | |
if 'viewName' in element.attrib: | |
element.attrib['id'] = element.attrib['viewName'] | |
del element.attrib['viewName'] | |
for child in element: | |
conv = convert_itml_to_html(child) | |
if conv is None: | |
#print(f"Warning: DELETING element {child.tag}") | |
element.remove(child) | |
return True | |
def fixup_includes(includes): | |
style_elems = [] | |
for include in includes: | |
print(f"Fetching {include} to convert to CSS...") | |
# Request the URL | |
r = requests.get(include) | |
# Feed to convert css | |
css = convert_fontstyle_to_css(r.text) | |
css = "\n/* " + include + " */\n" + css | |
# # Indent the CSS 6 spaces (html + head + style) | |
# css = " " + css.replace("\n", "\n ") | |
style_elems.append(css) | |
return style_elems | |
# Hacky function to properly indent multiline CSS so that it looks right when lxml serializes it | |
def indent(css, level=0, dent=" "): | |
ident = dent * level | |
css = css.strip() # Remove trailing newline and spaces | |
css = "\n" + css # Add a newline to the beginning | |
css = css.replace("\n", "\n" + ident) # Add the ident | |
css += "\n" # Add a newline to the end | |
css += dent * (level - 1) # Add the ident so that the end tag is correct | |
return css | |
def convert_itml_document(tree) -> str: | |
"""Convert an ITML document to HTML""" | |
# Get the root element | |
#root = tree.getroot() | |
# If tree is an ElementTree, get the root element | |
if isinstance(tree, etree._ElementTree): | |
root = tree.getroot() | |
# If it's an Element, use it as the root | |
elif isinstance(tree, etree._Element): | |
root = tree | |
# If it's a string, parse it as XML and use the root | |
elif isinstance(tree, str): | |
root = etree.fromstring(tree, parser=etree.XMLParser(recover=True)) | |
# Convert ITML elements to HTML elements | |
convert_itml_to_html(root) | |
# Wrap the converted body in an HTML document | |
html = etree.Element("html") | |
head = etree.SubElement(html, "head") | |
# For all the FontStyles included, convert them to CSS | |
style_elems = fixup_includes(global_includes) | |
# Append the View CSS stub | |
style_elems.append(OVERRIDE_CSS) | |
# Append the style elements to the head | |
for style_elem in style_elems: | |
style = etree.SubElement(head, "style") | |
style.text = indent(style_elem, 3) | |
# Append the converted body to the HTML document | |
html.append(root) | |
# Create an HTML tree from the HTML document | |
html_tree = etree.ElementTree(html) | |
# Serialize the HTML tree to a string | |
html_string = etree.tostring(html_tree, encoding="unicode", pretty_print=True) | |
#print(html_string) | |
return html_string | |
def extract_protocol(tree) -> str: | |
# If tree is an ElementTree, get the root element | |
if isinstance(tree, etree._ElementTree): | |
root = tree.getroot() | |
# If it's an Element, use it as the root | |
elif isinstance(tree, etree._Element): | |
root = tree | |
# If it's a string, parse it as XML and use the root | |
elif isinstance(tree, str): | |
root = etree.fromstring(tree, parser=etree.XMLParser(recover=True,ns_clean=True)) | |
# Find Protocol node, ignoring namespaces | |
for elem in root: | |
if elem.tag.endswith("Protocol"): | |
protocol = elem | |
break | |
return etree.tostring(protocol[0]).decode() | |
# with open("input.itml", "r") as f: | |
# itml_string = f.read() | |
# html_string = convert_itml_document(etree.fromstring(itml_string)) | |
# with open("output.html", "w") as f: | |
# f.write(html_string) | |
# print("Converted input.itml to output.html") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment