Alon Agmon a-agmon

27 followers · 3 following

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

a-agmon / pu_est2.py

Last active March 2, 2020 12:36

	pu_estimator, probs1y1 = fit_PU_estimator(
	x_train,
	y_train,
	0.2,
	xgb.XGBClassifier())

	predicted_s = pu_estimator.predict_proba(x_train)
	predicted_s = predicted_s[:,1]
	predicted_y = predicted_s / probs1y1

a-agmon / pu_est3.py

Created March 2, 2020 12:57


	mod_data = data.copy()
	#get the indices of the positives samples
	pos_ind = np.where(mod_data.iloc[:,-1].values == 1)[0]
	#shuffle them
	np.random.shuffle(pos_ind)
	#now lets take 150 positives that will remain labeled
	pos_sample = pos_ind[:150]
	#create the new target variable and mark all the data set as unlabeled
	mod_data['class_test'] = -1

a-agmon / surv1.py

Last active May 3, 2020 05:18


	from lifelines import KaplanMeierFitter

	kmf_breq = KaplanMeierFitter()

	kmf_breq.fit(df['days_with_bad_req'], event_observed=df['event'])

	ax = kmf_breq.plot()

	plt.rcParams['figure.figsize'] = [6, 5]

a-agmon / surv2.py

Last active May 3, 2020 05:40


	print(f'50% of the population will not make bad req after {kmf_breq.percentile(0.50)} days')

	print(f'75% of the population will not make bad req after {kmf_breq.percentile(0.25)} days')

	print(f'99% of the population will not make bad req after {kmf_breq.percentile(0.01)} days')

	print(f'no one will make bad req after {kmf_breq.percentile(0.00)} days')

a-agmon / surv3.py

Created May 1, 2020 15:33


	print(f'Probability that user will made BAD requests after 3 days is: {kmf_breq.predict(3)}')

	print(f'Probability that user will made BAD requests after 14 days is: {kmf_breq.predict(14)}')

a-agmon / surv4.py

Last active May 3, 2020 17:29


	from lifelines import CoxPHFitter

	df_reg = df.loc[:,['event','days_with_bad_req','req_day','bad_req_days_rate']]

	cp = CoxPHFitter()

	cp.fit(df_reg, duration_col='days_with_bad_req', event_col='event')

	cp.print_summary()

a-agmon / s_token.py

Created June 29, 2020 04:09


	VOCAB_SIZE = 750
	# take just the target feature
	clean_sequences = sequences.loc[:,FEAT_FIELD]
	# create a tokenizer with 750 'words' -
	# we will have a number representing each of the top 750 wordsx
	tokenizer = Tokenizer(num_words=VOCAB_SIZE)
	# fit the tokenizer on our data
	tokenizer.fit_on_texts(clean_sequences)

a-agmon / s_vector.py

Created June 29, 2020 20:42

	def vectorize_sequences(sequences, dimension):
	results = np.zeros((len(sequences), dimension))
	for i, sequence in enumerate(sequences):
	results[i, sequence] = 1.
	return results

	vec_seqs = vectorize_sequences(pad_seqs, VOCAB_SIZE)

a-agmon / s_autoencoder.py

Last active June 29, 2020 20:58

	#Create the train and test set

	TRAIN_RATIO = 0.75

	train_size = int(len(vec_seqs) * TRAIN_RATIO)
	X_train = vec_seqs[:train_size]
	X_test = vec_seqs[train_size:]

	#define the encoder

a-agmon / s_autoencoder2.py

Created June 29, 2020 20:59

	optimizer = optimizers.Adam(lr=1e-2)

	autoencoder.compile(optimizer=optimizer,
	loss='mean_squared_error',
	metrics=['accuracy'])

	checkpointer = ModelCheckpoint(filepath="model_bin.h5",
	verbose=0,
	save_best_only=True)