Last active
March 2, 2024 18:08
-
-
Save kohlmeier/5654241 to your computer and use it in GitHub Desktop.
Example of computing compressed features. NOTE: If you want to want to create such features consistently across process, you will need to persist the random components. Easy enough, but I've written the code for that, too, here: https://github.com/Khan/analytics/blob/master/map_reduce/py/random_features.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import numpy as np | |
class CompressedFeatures: | |
def __init__(self, num_features=50): | |
self.random_components = collections.defaultdict( | |
self._generate_component) | |
self.num_features = num_features | |
self.reset_features() | |
def _generate_component(self): | |
"""Internal method to generate a random vector on demand.""" | |
rv = np.random.randn(self.num_features, 1) | |
rv /= np.sqrt(np.dot(rv.T, rv)) # normalize to unit length | |
return rv | |
def reset_features(self): | |
self.feature_vector = np.zeros((self.num_features, 1)) | |
def get_features(self): | |
return self.feature_vector[:, 0] | |
def increment_component(self, component_key, scale=1.0): | |
self.feature_vector += scale * self.random_components[component_key] | |
if __name__ == '__main__': | |
# Example usage. | |
# Optionally, seed the RNG if you want experiments to be repeatable. | |
np.random.seed(909090) | |
cf = CompressedFeatures(5) # compress to 5 dimensions | |
# Compute compressed features for User 1, provided her history. | |
cf.reset_features() | |
cf.increment_component(("addition exercise", "correct")) | |
cf.increment_component(("calculus exercise", "incorrect")) | |
print "User 1: ", cf.get_features() | |
# Compute compressed features for User 2. | |
cf.reset_features() | |
cf.increment_component(("addition exercise", "correct")) | |
cf.increment_component(("geometery exercise", "correct")) | |
cf.increment_component(("calculus exercise", "correct")) | |
print "User 2: ", cf.get_features() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment