Created
February 25, 2024 07:23
-
-
Save littleblacklb/b620e005cfe16d70ce4188bf6526a944 to your computer and use it in GitHub Desktop.
asynchrionization opencc zh-TW to zh-CN converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import time | |
import opencc | |
loop = asyncio.get_event_loop() | |
cc = opencc.OpenCC("t2s") | |
def timer(func): | |
def _decorator(*args, **kwargs): | |
t0 = time.perf_counter() | |
rtn = func(*args, **kwargs) | |
dt = time.perf_counter() - t0 | |
print(f"Function {func.__name__} Took {dt:.4f} seconds") | |
return rtn | |
return _decorator | |
async def convert_async(text): | |
return await loop.run_in_executor(None, cc.convert, text) | |
def convert_sync(text): | |
return cc.convert(text) | |
@timer | |
def sync_batch_convert(lines): | |
res = [] | |
for line in lines: | |
res.append(convert_sync(line)) | |
return res | |
@timer | |
def async_batch_convert(lines): | |
tasks = asyncio.gather(*[convert_async(line) for line in lines]) | |
results = loop.run_until_complete(tasks) | |
return results | |
def benchmark(): | |
# Function async_batch_convert Took 47.1727 seconds | |
# Function sync_batch_convert Took 25.3549 seconds | |
async_batch_convert(lines) | |
sync_batch_convert(lines) | |
if __name__ == "__main__": | |
path = "./chibigenc.xml" | |
# path = "./test.txt" | |
with open(path, "r") as f: | |
lines = f.readlines() | |
print("start processing") | |
results = async_batch_convert(lines) | |
print("done processing") | |
with open("./chibigenc-simplified.xml", "w") as f: | |
for line in results: | |
f.write(line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment