aneesha’s gists

aneesha / edx_olxformat_merge_courses.py

Last active August 9, 2020 23:14

	'''
	Merge/combine courses in the OpenedX OLX format.
	'''

	import sys
	import os
	from distutils.dir_util import copy_tree
	import json

	# Example:

aneesha / SiameseBERT_SemanticSearch.ipynb

Last active August 9, 2023 00:48

Semantic Search with Sentence-BERT

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

aneesha / dask_delayed_demo.ipynb

Created June 6, 2018 09:49

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

aneesha / dask_array.py

Created June 6, 2018 09:13

aneesha / dask_bag_process_json.py

Created June 6, 2018 04:49

	import dask.bag as db
	import json

	records = db.read_text('data/2018--.json').map(json.loads)
	records.filter(lambda d: d['username'] == 'Aneesha').pluck('id').frequencies()

aneesha / dask_loadfiles_pandas.py

Created June 6, 2018 04:46

	import dask.dataframe as dd

	df = dd.read_csv('logs/2018-..csv', parse_dates=['timestamp'])
	df.groupby(df.timestamp.dt.hour).value.mean().compute()

aneesha / wordvector_retrofitting.ipynb

Last active November 29, 2017 12:00

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

aneesha / retrofitting_blog_4.py

Created November 28, 2017 09:26

	# load the original word vectors and the retrofitted word vectors as separate gensim models

	original_glove_model = gensim.models.KeyedVectors.load_word2vec_format('glove.6B.50d.word2vec.txt', binary=False)
	retrofitted_glove_model = gensim.models.KeyedVectors.load_word2vec_format('retrofittedglove.word2vec.txt', binary=False)

	# display the words closest to 'happy' using the original GLOVE vectors
	display_closestwords_tsnescatterplot(original_glove_model, 'happy', 50, 10, "Original Glove Word Vectors - 'Happy'")
	# display the words closest to 'happy' using the GLOVE vectors retrofitted with the Paraphrase lexicons
	display_closestwords_tsnescatterplot(retrofitted_glove_model, 'happy', 50, 10, "Retroffited Glove Word Vectors - 'Happy'")

aneesha / retrofitting_blog_3.py

Created November 28, 2017 09:25

	# git clone https://github.com/mfaruqui/retrofitting.git
	# Run retrofit.py with arguments to set the word vectors file, the lexicon file, the number of iterations
	# and the output word vectors. The word vectors must be in text format
	# Eg:
	# python retrofit.py -i word_vec_file -l lexicon_file -n num_iter -o out_vec_file
	# python retrofit.py -i /data/glove.6B.50d.txt -l /retrofitting/lexicons/ppdb-xl.txt -n 10 -o retrofittedglove.txt

	# Convert txt based GLOVE word vectors to Word2Vec format
	from gensim.scripts.glove2word2vec import glove2word2vec
	glove2word2vec(glove_input_file="/data/glove.6B.50d.txt", word2vec_output_file="glove.6B.50d.word2vec.txt")

aneesha / retrofitting_blog_2.py

Created November 28, 2017 09:25

	# Method to plot the top no_similar_words in 2D using TSNE
	def display_closestwords_tsnescatterplot(model, word, word_vector_dimension, no_similar_words, plot_title):

	arr = np.empty((0,word_vector_dimension), dtype='f')
	word_labels = [word]

	# get close words
	close_words = model.similar_by_word(word, topn=no_similar_words)

	# add the vector for each of the closest words to the array