Created
January 12, 2016 16:07
-
-
Save brunifrancesco/fa0f70917c21817f86c7 to your computer and use it in GitHub Desktop.
Process and parse HTML files to get them complied with the Angular "translate" filter. The scripts create also the final json file which will contain all the needed translations.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
""" | |
Process some HTML files to add translation features. | |
The script replaces simple strings (avoiding parsing special chars, comments, Angular based variables, HTML entities) | |
and any other Angular related stuff with the same string followed by the 'translate' filter as stated in | |
https://angular-translate.github.io/ . | |
To be translated labels got memorized in the final json file, where translations values will be present. | |
File by file, processed content is stored in a new file letting the original one intact. | |
Install required deps | |
pip install fn beautifulsoup4 | |
""" | |
import json | |
import os | |
from bs4 import BeautifulSoup | |
from fn.monad import Option | |
__author__ = "Francesco Bruni" | |
def process_htmls(main_path): | |
""" | |
Process html files to apply the parsing algorithm | |
:param main_path: the path where all the HTML files have been placed to be processed. | |
""" | |
data_json = dict() | |
for file in filter(lambda item: "refactored" not in item, os.listdir(main_path)): | |
print("Processing %s" % file) | |
with open(os.path.join(main_path, file), "r") as input: | |
soup = BeautifulSoup(input.read().encode("utf8"), "html.parser") | |
data = soup.find_all(text=lambda x: x.strip()) | |
for node in data: | |
stipped_node = node.strip() | |
if Option(stipped_node). \ | |
map(lambda item: stipped_node if item not in ("\n", "\t", " ", "") else None). \ | |
map( | |
lambda item: stipped_node if not stipped_node.startswith("{{") and not stipped_node.startswith( | |
"<") else None). \ | |
map(lambda item: stipped_node if "{{" not in item else None). \ | |
map(lambda item: stipped_node if "./modal" not in item else None). \ | |
map(lambda item: stipped_node if "$(" not in item else None). \ | |
map(lambda item: stipped_node if "http://" not in item else None). \ | |
map(lambda item: stipped_node if "/*" not in item else None). \ | |
map(lambda item: stipped_node if "\n" not in item else None). \ | |
map(lambda item: stipped_node if "+++++" not in item else None).get_or(False): | |
node.replace_with("{{ '%s' | translate }}" % node) | |
data_json[stipped_node] = None | |
with open(os.path.join("views", file.split(".html")[0] + "_refactored.html"), "wb") as output: | |
output.write(soup.encode("utf8")) | |
with open("it.json", "wb") as out: | |
out.write(json.dumps(data_json, indent=2).encode("utf8"), ) | |
print("Done!") | |
if __name__ == '__main__': | |
process_htmls("views") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment