brunifrancesco · January 12, 2016 16:07
diff --git a/process_html.py b/process_html.py
 #! /usr/bin/env python
 """
    Process some HTML files to add translation features.
    The script replaces simple strings (avoiding parsing special chars, comments, Angular based variables, HTML entities)
    and any other Angular related stuff with the same string followed by the 'translate' filter as stated in
    https://angular-translate.github.io/ .
    To be translated labels got memorized in the final json file, where translations values will be present.
    File by file, processed content is stored in a new file letting the original one intact.

    Install required deps
    pip install fn beautifulsoup4
    """

 import json
 import os
 from bs4 import BeautifulSoup
 from fn.monad import Option

 __author__ = "Francesco Bruni"


 def process_htmls(main_path):
    """
    Process html files to apply the parsing algorithm
    :param main_path: the path where all the HTML files have been placed to be processed.
    """
    data_json = dict()

    for file in filter(lambda item: "refactored" not in item, os.listdir(main_path)):
        print("Processing %s" % file)
        with open(os.path.join(main_path, file), "r") as input:
            soup = BeautifulSoup(input.read().encode("utf8"), "html.parser")
            data = soup.find_all(text=lambda x: x.strip())

            for node in data:
                stipped_node = node.strip()
                if Option(stipped_node). \
                        map(lambda item: stipped_node if item not in ("\n", "\t", " ", "") else None). \
                        map(
                        lambda item: stipped_node if not stipped_node.startswith("{{") and not stipped_node.startswith(
                                "<") else None). \
                        map(lambda item: stipped_node if "{{" not in item else None). \
                        map(lambda item: stipped_node if "./modal" not in item else None). \
                        map(lambda item: stipped_node if "$(" not in item else None). \
                        map(lambda item: stipped_node if "http://" not in item else None). \
                        map(lambda item: stipped_node if "/*" not in item else None). \
                        map(lambda item: stipped_node if "\n" not in item else None). \
                        map(lambda item: stipped_node if "+++++" not in item else None).get_or(False):
                    node.replace_with("{{ '%s' | translate }}" % node)
                    data_json[stipped_node] = None

        with open(os.path.join("views", file.split(".html")[0] + "_refactored.html"), "wb") as output:
            output.write(soup.encode("utf8"))

    with open("it.json", "wb") as out:
        out.write(json.dumps(data_json, indent=2).encode("utf8"), )
    print("Done!")


 if __name__ == '__main__':
    process_htmls("views")
	#! /usr/bin/env python
	"""
	Process some HTML files to add translation features.
	The script replaces simple strings (avoiding parsing special chars, comments, Angular based variables, HTML entities)
	and any other Angular related stuff with the same string followed by the 'translate' filter as stated in
	https://angular-translate.github.io/ .
	To be translated labels got memorized in the final json file, where translations values will be present.
	File by file, processed content is stored in a new file letting the original one intact.

	Install required deps
	pip install fn beautifulsoup4
	"""

	import json
	import os
	from bs4 import BeautifulSoup
	from fn.monad import Option

	__author__ = "Francesco Bruni"


	def process_htmls(main_path):
	"""
	Process html files to apply the parsing algorithm
	:param main_path: the path where all the HTML files have been placed to be processed.
	"""
	data_json = dict()

	for file in filter(lambda item: "refactored" not in item, os.listdir(main_path)):
	print("Processing %s" % file)
	with open(os.path.join(main_path, file), "r") as input:
	soup = BeautifulSoup(input.read().encode("utf8"), "html.parser")
	data = soup.find_all(text=lambda x: x.strip())

	for node in data:
	stipped_node = node.strip()
	if Option(stipped_node). \
	map(lambda item: stipped_node if item not in ("\n", "\t", " ", "") else None). \
	map(
	lambda item: stipped_node if not stipped_node.startswith("{{") and not stipped_node.startswith(
	"<") else None). \
	map(lambda item: stipped_node if "{{" not in item else None). \
	map(lambda item: stipped_node if "./modal" not in item else None). \
	map(lambda item: stipped_node if "$(" not in item else None). \
	map(lambda item: stipped_node if "http://" not in item else None). \
	map(lambda item: stipped_node if "/*" not in item else None). \
	map(lambda item: stipped_node if "\n" not in item else None). \
	map(lambda item: stipped_node if "+++++" not in item else None).get_or(False):
	node.replace_with("{{ '%s' \| translate }}" % node)
	data_json[stipped_node] = None

	with open(os.path.join("views", file.split(".html")[0] + "_refactored.html"), "wb") as output:
	output.write(soup.encode("utf8"))

	with open("it.json", "wb") as out:
	out.write(json.dumps(data_json, indent=2).encode("utf8"), )
	print("Done!")


	if __name__ == '__main__':
	process_htmls("views")