Last active
August 29, 2015 14:15
-
-
Save MitI-7/8d9ee33de12413a83f61 to your computer and use it in GitHub Desktop.
Greedが生成したhtmlから問題文を抜き出して,日本語に翻訳&挿入したhtmlを作成
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import subprocess | |
from html.parser import HTMLParser | |
from microsofttranslator import Translator | |
client_id = "" | |
client_secret = "" | |
class TestHTMLParser(HTMLParser): | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.in_intro = False | |
self.problem = "" | |
def handle_starttag(self, tag, attrs): | |
if 'intro' == tag: | |
self.in_intro = True | |
def handle_endtag(self, tag): | |
if 'intro' == tag: | |
self.in_intro = False | |
def handle_data(self, data): | |
if self.in_intro and data.strip(): | |
self.problem += data | |
def main(): | |
argvs = sys.argv | |
if len(argvs) == 2: | |
input_file_name = argvs[1] | |
output_file_name = input_file_name.replace(".html", "") + "_JP.html" | |
elif len(argvs) == 3: | |
input_file_name = argvs[1] | |
output_file_name = argvs[2] | |
else: | |
print("input html") | |
return | |
with open(input_file_name) as f: | |
html = f.read() | |
parser = TestHTMLParser() | |
parser.feed(html) | |
parser.close() | |
problem = parser.problem | |
# 翻訳 | |
try: | |
translator = Translator(client_id=client_id, client_secret=client_secret) | |
problem_jp = translator.translate(problem, "ja") | |
except: | |
ex, ms, tb = sys.exc_info() | |
problem_jp = "An unexpected error has occurred.<br/>" | |
problem_jp += str(ex) + str(ms) + str(tb) | |
# 作成した日本語訳の挿入 | |
block = '<h2 class="section-title">Problem Statement(日本語)</h2>' | |
block += "<div>" + problem_jp.replace("。", "<br/>") + "</div>" | |
i = html.find('<h2 class="section-title">Problem Statement</h2>') | |
html = html[:i] + block + html[i:] | |
with open(output_file_name, "w", encoding="utf-8") as f: | |
f.write(html) | |
# operaでhtmlを開く | |
subprocess.check_call(["C:/Program Files (x86)/Opera/launcher.exe", output_file_name]) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment