Skip to content

Instantly share code, notes, and snippets.

@phagenlocher
Last active October 24, 2024 06:02
Show Gist options
  • Select an option

  • Save phagenlocher/c499f145ee791b22fbf0a9f54082e70d to your computer and use it in GitHub Desktop.

Select an option

Save phagenlocher/c499f145ee791b22fbf0a9f54082e70d to your computer and use it in GitHub Desktop.
Vector Embedding eDSL
from vectorize import (
categorize_class,
categorize_bool,
categorize_num,
make_vectorize,
)
@categorize_class(["FIN", "MISC"], weight=10)
def module_cat(d):
if d["TABLE_NAME"].startswith("FIN"):
return "FIN"
else:
return "MISC"
@categorize_bool(weight=100)
def key_cat(d):
return d["KEY"]
@categorize_num(weight=1)
def num_cat(d):
return d["SOME_NUMBER"]
vectorize = make_vectorize([module_cat, key_cat, num_cat])
# vectorize({"TABLE_NAME": "FIN_FOO", "KEY": True, "SOME_NUMBER": 12})
# ==> [0, 100, 12]
# vectorize({"TABLE_NAME": "BAR_FOO", "KEY": False, "SOME_NUMBER": 110})
# ==> [10, 0, 110]
from functools import wraps
def categorize_num(weight=1):
def _categorize_num(f):
@wraps(f)
def wrapper(*args, **kwds):
value = f(*args, **kwds)
if not isinstance(value, int) and not isinstance(value, float):
raise ValueError(f"cannot vectorize {value} as a number!")
return value * weight
return wrapper
return _categorize_num
def categorize_bool(weight=1):
def _categorize_bool(f):
@wraps(f)
def wrapper(*args, **kwds):
value = f(*args, **kwds)
if not isinstance(value, bool):
raise ValueError(f"cannot vectorize {value} as a bool!")
if value:
return weight
else:
return 0
return wrapper
return _categorize_bool
def categorize_class(classes, weight=1):
if not isinstance(classes, list):
raise ValueError(
f"the classes must be given as a list of values, but got {classes}"
)
def _categorize_class(f):
@wraps(f)
def wrapper(*args, **kwds):
value = f(*args, **kwds)
if value not in classes:
raise ValueError(
f"the value {value} is not any of the specified classes {classes}"
)
return classes.index(value) * weight
return wrapper
return _categorize_class
def make_vectorize(funs):
def vectorize(d):
return list(map(lambda f: f(d), funs))
return vectorize
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment