Created
November 30, 2021 10:53
-
-
Save yuq-1s/3b8cdc4514c6039d487bf6bef875e0cc to your computer and use it in GitHub Desktop.
Utility function for caching middle results of Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gzip | |
import logging | |
import pickle | |
import time | |
import yaml | |
from tqdm import tqdm | |
def cached(cache_path): | |
def wrapper(func): | |
open_fn = gzip.open if cache_path.endswith('.gz') else open | |
def new_func(*args, **kwargs): | |
try: | |
logging.info(f"Loading {cache_path} ...") | |
start = time.time() | |
with open_fn(cache_path, 'rb') as f: | |
ret = pickle.load(f) | |
end = time.time() | |
logging.info(f"Loaded {cache_path} in {end - start:.4f} seconds") | |
return ret | |
except (FileNotFoundError, EOFError): | |
ret = func(*args, **kwargs) | |
logging.info(f"Loading {cache_path} failed, generating it now ...") | |
with open_fn(cache_path, 'wb') as f: | |
pickle.dump(ret, f) | |
return ret | |
return new_func | |
return wrapper | |
@cached("cache/flashtext_kp.pkl") | |
def get_kp(): | |
logging.info("Generating flashtext.KeywordProcessor ...") | |
from flashtext import KeywordProcessor | |
kp = KeywordProcessor() | |
with open('zh_list') as f: | |
for line in tqdm(f, total=18537072): | |
kp.add_keyword(line.strip()) | |
return kp | |
if __name__ == '__main__': | |
kp = get_kp() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment