Skip to content

Instantly share code, notes, and snippets.

@erap129
Last active May 6, 2022 07:26
Show Gist options
  • Save erap129/d80f30af38fa303ae79cfaa6ee3ce6fe to your computer and use it in GitHub Desktop.
Save erap129/d80f30af38fa303ae79cfaa6ee3ce6fe to your computer and use it in GitHub Desktop.
nltk.download('punkt')
text = ' '.join(list(feature_list))
st = StanfordNERTagger(f'{BASE_FOLDER}/MovieLens-1M/english.all.3class.distsim.crf.ser.gz',
f'{BASE_FOLDER}/MovieLens-1M/stanford-ner.jar')
people = []
for sent in nltk.sent_tokenize(text):
tokens = nltk.tokenize.word_tokenize(sent)
tags = st.tag(tokens)
for tag in tags:
if tag[1] == "PERSON":
people.append(tag[0])
tfidf_df_min = tfidf_df[[x for x in list(feature_list) if x not in people]]
item_matrix_filtered_words_no_names_trainset_loocv = get_item_matrix_with_inner_ids(tfidf_df_min.values, movies_df, train_loocv)
cosine_sim_filtered_words_no_names_trainset_loocv = cosine_similarity(item_matrix_filtered_words_no_names_trainset_loocv,
item_matrix_filtered_words_no_names_trainset_loocv)
item_matrix_filtered_words_no_names_trainset = get_item_matrix_with_inner_ids(tfidf_df_min.values, movies_df, trainset)
cosine_sim_filtered_words_no_names_trainset = cosine_similarity(item_matrix_filtered_words_no_names_trainset, item_matrix_filtered_words_no_names_trainset)
get_algorithm_report(CustomSimKNNAlgorithm, trainset, testset, train_loocv, test_loocv, movies_df,
target_movie_id='movie_1', target_user_id='user_1', top_k=10,
algo_kwargs_trainset=dict(similarities=cosine_sim_filtered_words_no_names_trainset, sim_options={'user_based': False}),
algo_kwargs_trainset_loocv=dict(similarities=cosine_sim_filtered_words_no_names_trainset_loocv, sim_options={'user_based': False}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment