Last active
February 27, 2022 07:38
-
-
Save justin3737/fa4abe77e5b15e0765ee57592e9fb8f1 to your computer and use it in GitHub Desktop.
利用python 的 enchant 做拼字檢查
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import enchant | |
from xml.dom.minidom import parse | |
import xml.dom.minidom | |
import js2py | |
import re | |
# ------------------- Define ------------------- | |
pat = '[a-zA-Z]+' | |
xmlTags = ['String', 'value', 'string'] | |
# ------------------- Use lib ------------------- | |
d = enchant.Dict("en_US") | |
# ------------------- Load whitelist ------------------- | |
add_dict = open("exception.txt","r").readlines() | |
for line in add_dict: | |
d.add(line) | |
# ------------------- Load XML ------------------- | |
#DOMTree = xml.dom.minidom.parse("StringTable.xml") | |
DOMTree = xml.dom.minidom.parse("MSReportTemplates.resx") | |
arrData = [] | |
for tages in xmlTags: | |
arrData += DOMTree.getElementsByTagName(tages) | |
if len( arrData ) > 0: | |
for v in arrData: | |
try: | |
if v.childNodes[0].data is not None: | |
line = re.findall(pat, v.childNodes[0].data) | |
else: | |
line = v.attributes['DisplayName'].value | |
for x in range(len(line)): | |
if d.check(line[x]) is False: | |
print(line[x]) | |
except IndexError: | |
print('------------------- Parse L10N XML file error.------------------- ') | |
# ------------------- Load JS ------------------- | |
with open('L10N.update.js') as dataFile: | |
data = dataFile.read() | |
obj = js2py.eval_js(data) | |
dataFile.close() | |
for key in obj: | |
all = re.findall(pat, obj[key]) | |
for x in range(len(all)): | |
if d.check(all[x]) == False: | |
print(all[x]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment