a-agmon’s gists

a-agmon / s_anomaly_score.py

Created June 30, 2020 19:28

	def get_sequence_anomaly_rank(seq_a):

	t_vec = tokenizer.texts_to_sequences([seq_a])

	pad_seq = pad_sequences(t_vec, maxlen=MAX_FEAT_LEN, padding='post', truncating='post')
	vec_seq = vectorize_sequences(pad_seq, VOCAB_SIZE)
	pred_seq = autoencoder.predict(vec_seq)
	# get anomaly score
	score = np.mean(np.power(vec_seq - pred_seq, 2), axis=1)
	# get anomaly rank

a-agmon / s_predict.py

Created June 30, 2020 19:30

	#get the MSE or loss error term
	predictions = autoencoder.predict(vec_seqs)
	mse = np.mean(np.power(vec_seqs - predictions, 2), axis=1)
	sequences['MSE'] = mse

a-agmon / lda_1.py

Created September 6, 2020 06:01

	# assign each vector the cluster withwhich it is most associated
	df_avg['cluster'] = np.argmax(lda.transform(df_avg), axis=1)

	#chart the different clusters

	fig, axs = plt.subplots(3, 1, sharex=True, sharey=True)

	df_avg.iloc[:,:-1].loc[df_avg.cluster == 0].mean().plot.bar(ax=axs[0])
	axs[0].set_title('Category - 0 (Evening/Night)')

a-agmon / lda_2.py

Created September 6, 2020 07:08

	# Show a pie chart of the groups

	summs = df_avg.cluster.value_counts()
	cats = np.zeros(len(summs))
	for i, x in enumerate(summs): cats[i] = x


	labels = ['Evening / Night','Normal Day', 'Late Night / Early Morning']
	sizes = summs.astype(int)

a-agmon / learn_embed.py

Created September 21, 2020 06:09


	def user_embedding_model(embedding_size = 50):

	#Embed items and users in vec space

	# Both inputs are 1-dimensional
	user = Input(name = 'user', shape = [1])
	item = Input(name = 'item', shape = [1])

	# (None, 1, 50))

a-agmon / w2v.py

Created March 29, 2021 13:57

	# text_parts is a dictionary in which each key is a userID and the corresponding values are the items the user engaged with
	# vector_size is the size of the embedding vector or the latent factors dimension
	# window should be set to the max # of items of the user with the greatest # of items
	model = Word2Vec(text_parts.values(), min_count=3, vector_size=20, window=36, sg=1, ns_exponent=-0.5)

a-agmon / w2v_scan.py

Last active March 31, 2021 06:22



	# a dictionary consisting of K:user and V:mean of the user's items
	user_means = {}

	for user in list(text_parts.keys()):
	# get a list of artists the user listens to
	# (but only if we have a vec for it)
	artists = [artist for artist in text_parts[user] if model.wv.has_index_for(artist)]
	if len(artists) == 0:

a-agmon / cluster_gmm.py

Last active April 15, 2021 07:38


	from sklearn.mixture import GaussianMixture as GMM

	def cluster_gmm(matrix, k=4):
	gmm_model = GMM(k, covariance_type='full', random_state=0, n_init=10)
	gmm_model.fit(matrix)
	gmm_labels = gmm_model.predict(matrix)
	centers = gmm_model.means_
	return gmm_model, gmm_labels, centers

a-agmon / gmmm_calc.py

Last active April 15, 2021 08:08


	# First cluster the item data and return the model
	items_model, items_labels, items_cluster_centers = cluster_gmm(exp_model.wv.vectors, k=8)

	# [user_means] is a list of vectors, each represents the mean of the item vectors each user has listened to

	# then use the model to create a new user vector to each user
	#based on their probability of being part of each item cluster

	# this is the empty array

a-agmon / vae1.py

Created July 28, 2021 19:53


	# encoder model
	inputs = Input(shape=input_shape, name='encoder_input')
	x = Dense(intermediate_dim, activation='relu')(inputs)
	z_mean = Dense(latent_dim, name='z_mean')(x)
	z_log_var = Dense(latent_dim, name='z_log_var')(x)
	# use the reparameterization trick and get the output from the sample() function
	z = Lambda(sample, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
	encoder = Model(inputs, z, name='encoder')
	encoder.summary()

Alon Agmon a-agmon