Last active
October 24, 2024 06:02
-
-
Save phagenlocher/c499f145ee791b22fbf0a9f54082e70d to your computer and use it in GitHub Desktop.
Vector Embedding eDSL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from vectorize import ( | |
| categorize_class, | |
| categorize_bool, | |
| categorize_num, | |
| make_vectorize, | |
| ) | |
| @categorize_class(["FIN", "MISC"], weight=10) | |
| def module_cat(d): | |
| if d["TABLE_NAME"].startswith("FIN"): | |
| return "FIN" | |
| else: | |
| return "MISC" | |
| @categorize_bool(weight=100) | |
| def key_cat(d): | |
| return d["KEY"] | |
| @categorize_num(weight=1) | |
| def num_cat(d): | |
| return d["SOME_NUMBER"] | |
| vectorize = make_vectorize([module_cat, key_cat, num_cat]) | |
| # vectorize({"TABLE_NAME": "FIN_FOO", "KEY": True, "SOME_NUMBER": 12}) | |
| # ==> [0, 100, 12] | |
| # vectorize({"TABLE_NAME": "BAR_FOO", "KEY": False, "SOME_NUMBER": 110}) | |
| # ==> [10, 0, 110] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import wraps | |
| def categorize_num(weight=1): | |
| def _categorize_num(f): | |
| @wraps(f) | |
| def wrapper(*args, **kwds): | |
| value = f(*args, **kwds) | |
| if not isinstance(value, int) and not isinstance(value, float): | |
| raise ValueError(f"cannot vectorize {value} as a number!") | |
| return value * weight | |
| return wrapper | |
| return _categorize_num | |
| def categorize_bool(weight=1): | |
| def _categorize_bool(f): | |
| @wraps(f) | |
| def wrapper(*args, **kwds): | |
| value = f(*args, **kwds) | |
| if not isinstance(value, bool): | |
| raise ValueError(f"cannot vectorize {value} as a bool!") | |
| if value: | |
| return weight | |
| else: | |
| return 0 | |
| return wrapper | |
| return _categorize_bool | |
| def categorize_class(classes, weight=1): | |
| if not isinstance(classes, list): | |
| raise ValueError( | |
| f"the classes must be given as a list of values, but got {classes}" | |
| ) | |
| def _categorize_class(f): | |
| @wraps(f) | |
| def wrapper(*args, **kwds): | |
| value = f(*args, **kwds) | |
| if value not in classes: | |
| raise ValueError( | |
| f"the value {value} is not any of the specified classes {classes}" | |
| ) | |
| return classes.index(value) * weight | |
| return wrapper | |
| return _categorize_class | |
| def make_vectorize(funs): | |
| def vectorize(d): | |
| return list(map(lambda f: f(d), funs)) | |
| return vectorize |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment