Skip to content

Instantly share code, notes, and snippets.

@taicaile
Created September 24, 2021 08:57
Show Gist options
  • Save taicaile/fe39bf0bad57c4d464b172e0e35efc2e to your computer and use it in GitHub Desktop.
Save taicaile/fe39bf0bad57c4d464b172e0e35efc2e to your computer and use it in GitHub Desktop.
A script that removes the watermarks from the HTML file generated by MindMaster. It is for study only, you may subscript the paid plan if you want to public the file.
#!/usr/bin/python3
"""
A script that removes the watermarks from the HTML file generated by MindMaster. It is for study only, you may subscript the paid plan if you wnat to public the file.
"""
import os
import re
import sys
import argparse
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("file", type=str, help="the HTML file path")
args = parser.parse_args()
def read_file(file):
"""
return file content if file exists, otherwise raise exception
"""
if not os.path.exists(file):
print(f"{file} doesn't found, exit...")
sys.exit(-1)
if not os.path.isfile(file):
print(f"{file} is not file, exit...")
sys.exit(-1)
with open(file, "r", encoding="utf-8") as r_f:
text = r_f.read()
return text
html = read_file(args.file)
regexes = [r"<svg\sxmlns=.*?</svg>", r"<div\sid=\"copyright\">.*</div>"]
EMPTY = ""
for regex in regexes:
html = re.sub(regex, EMPTY, html, 0, re.MULTILINE | re.DOTALL)
STEM, EXT = os.path.splitext(args.file)
NEW_FILE = STEM + ".clean" + EXT
with open(NEW_FILE, "w+", encoding="utf-8") as wf:
wf.write(html)
print(f"The watermarks were removed, please check the new file {NEW_FILE}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment