Skip to content

Instantly share code, notes, and snippets.

import ml.dmlc.xgboost4j.LabeledPoint
import ml.dmlc.xgboost4j.scala.{Booster, DMatrix, XGBoost}
// Load model from binary file
object Classifier {
var model: Option[Booster] = None
def Init(modelFile:String): Unit = {
if (model.isEmpty) {
model = Some(XGBoost.loadModel(modelFile))
def perm(orig_str, new_str=""):
if(len(new_str) == len(orig_str)):
print(new_str)
else:
for char in orig_str:
if(char not in new_str):
perm(orig_str, new_str + char)
# another version
def perm(orig_str, new_str=""):
X_train_pred = vae_model.predict(X_train)
error_thresh = np.quantile(mae_vector, 0.99)
mae_vector = get_error_term(X_train_pred, X_train, _rmse=False)
X_test_pred = vae_model.predict(X_test)
mae_vector_test = get_error_term(X_pred, X_test, _rmse=False)
anomalies = (mae_vector_test > error_thresh)
opt = optimizers.Adam(learning_rate=0.0001, clipvalue=0.5)
vae_model.compile(optimizer=opt, loss=vae_loss)
vae_model.summary()
# Finally, we train the model:
results = vae_model.fit(X_train, X_train,
shuffle=True,
epochs=32,
batch_size=256)
# the KL loss function:
def vae_loss(x, x_decoded_mean):
# compute the average MSE error, then scale it up, ie. simply sum on all axes
reconstruction_loss = K.sum(K.square(x - x_decoded_mean))
# compute the KL loss
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.square(K.exp(z_log_var)), axis=-1)
# return the average loss over all
total_loss = K.mean(reconstruction_loss + kl_loss)
return total_loss
# full VAE model
outputs = decoder(encoder(inputs))
vae_model = Model(inputs, outputs, name='vae_mlp')
# decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(intermediate_dim, activation='relu')(latent_inputs)
outputs = Dense(original_dim, activation='sigmoid')(x)
# Instantiate the decoder model:
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
def sample(args):
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = Dense(intermediate_dim, activation='relu')(inputs)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
# use the reparameterization trick and get the output from the sample() function
z = Lambda(sample, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs, z, name='encoder')
encoder.summary()
# First cluster the item data and return the model
items_model, items_labels, items_cluster_centers = cluster_gmm(exp_model.wv.vectors, k=8)
# [user_means] is a list of vectors, each represents the mean of the item vectors each user has listened to
# then use the model to create a new user vector to each user
#based on their probability of being part of each item cluster
# this is the empty array