Skip to content

Instantly share code, notes, and snippets.

@brunifrancesco
Created January 12, 2016 16:07
Show Gist options
  • Save brunifrancesco/fa0f70917c21817f86c7 to your computer and use it in GitHub Desktop.
Save brunifrancesco/fa0f70917c21817f86c7 to your computer and use it in GitHub Desktop.
Process and parse HTML files to get them complied with the Angular "translate" filter. The scripts create also the final json file which will contain all the needed translations.
#! /usr/bin/env python
"""
Process some HTML files to add translation features.
The script replaces simple strings (avoiding parsing special chars, comments, Angular based variables, HTML entities)
and any other Angular related stuff with the same string followed by the 'translate' filter as stated in
https://angular-translate.github.io/ .
To be translated labels got memorized in the final json file, where translations values will be present.
File by file, processed content is stored in a new file letting the original one intact.
Install required deps
pip install fn beautifulsoup4
"""
import json
import os
from bs4 import BeautifulSoup
from fn.monad import Option
__author__ = "Francesco Bruni"
def process_htmls(main_path):
"""
Process html files to apply the parsing algorithm
:param main_path: the path where all the HTML files have been placed to be processed.
"""
data_json = dict()
for file in filter(lambda item: "refactored" not in item, os.listdir(main_path)):
print("Processing %s" % file)
with open(os.path.join(main_path, file), "r") as input:
soup = BeautifulSoup(input.read().encode("utf8"), "html.parser")
data = soup.find_all(text=lambda x: x.strip())
for node in data:
stipped_node = node.strip()
if Option(stipped_node). \
map(lambda item: stipped_node if item not in ("\n", "\t", " ", "") else None). \
map(
lambda item: stipped_node if not stipped_node.startswith("{{") and not stipped_node.startswith(
"<") else None). \
map(lambda item: stipped_node if "{{" not in item else None). \
map(lambda item: stipped_node if "./modal" not in item else None). \
map(lambda item: stipped_node if "$(" not in item else None). \
map(lambda item: stipped_node if "http://" not in item else None). \
map(lambda item: stipped_node if "/*" not in item else None). \
map(lambda item: stipped_node if "\n" not in item else None). \
map(lambda item: stipped_node if "+++++" not in item else None).get_or(False):
node.replace_with("{{ '%s' | translate }}" % node)
data_json[stipped_node] = None
with open(os.path.join("views", file.split(".html")[0] + "_refactored.html"), "wb") as output:
output.write(soup.encode("utf8"))
with open("it.json", "wb") as out:
out.write(json.dumps(data_json, indent=2).encode("utf8"), )
print("Done!")
if __name__ == '__main__':
process_htmls("views")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment