Venkat Venkatstatistics

Data scientist/Statistician with business acumen. Hoping to amass knowledge and share it throughout my life.

Venkatstatistics / Spacy Basic Tutorial

Created October 4, 2019 18:04

	###Spacy Tutorials###

	## References: https://course.spacy.io/chapter1 ##

	## References: https://spacy.io/usage/spacy-101 ##

	### Learning to work with NLP object ###

	from spacy.lang.en import English
	nlp = English ()

Venkatstatistics / Spacy models to download

Created October 3, 2019 15:45

	import spacy
	from spacy.lang.en import English
	nlpsm = English()
	sbd = nlpsm.create_pipe('sentencizer')
	nlpsm.add_pipe(sbd)
	import en_vectors_web_lg
	nlplg = en_vectors_web_lg.load()
	nlplg.add_pipe(sbd)

Venkatstatistics / Text Pre Processing

Created September 20, 2019 14:57

	# -- coding: utf-8 --

	#lowercasing
	texts=["JOHN","keLLY","ArJUN","SITA"]
	lower_words=[word.lower() for word in texts]
	lower_words

	#Stemming
	import nltk
	import pandas as pd

Venkatstatistics / word2vec_demo

Created September 20, 2019 14:56

	# -- coding: utf-8 --

	from gensim.models.word2vec import Word2Vec
	import gensim.downloader as api

	#corpus = api.load('word2vec-google-news-300')
	#corpus = api.load('glove-wiki-gigaword-100')
	#model = api.load('glove-wiki-gigaword-100')
	corpus = api.load('text8') # download the corpus and return it opened as an iterable
	model = Word2Vec(corpus) # train a model from the corpus

Venkatstatistics / Spacy_redis_final

Last active August 17, 2019 19:30


	import time
	start = time.time()
	import csv
	import sys
	import pandas as pd
	import numpy as np
	from operator import itemgetter
	import redis

Venkatstatistics / Final csv creation

Created August 10, 2019 16:38

Venkatstatistics / spacy word similarity

Last active August 10, 2019 16:33

	def process():
	import en_vectors_web_lg
	nlp = en_vectors_web_lg.load()

	topicdf = pd.read_csv("small_Topics.csv", encoding='Latin-1')
	topics = topicdf.Topic.tolist()

	while True:
	big_keyword = r.lpop('big_keywords').decode('utf-8')
	if not big_keyword:

Venkatstatistics / Spacy_redis_list

Last active August 10, 2019 11:05

	# we define a function to read the 200k words. The words are stored in a column called 'keyword'in the csv file 'big_Keywords'. The words are read one by one and stored
	in a list are under the key "big_words".
	def read_biglist():
	biglist = pd.read_csv("big_Keywords.csv")
	bigwords = biglist.keyword.tolist()
	for token1 in bigwords:
	r.lpush("big_keywords", token1) #LPUSH puts the new value at the start of the list.

Venkatstatistics / Spacy_Redis

Last active August 10, 2019 05:34

	import csv
	import sys
	import pandas as pd
	import numpy as np
	from operator import itemgetter
	import redis

	#by default Redis runs on port 6379, the below is the url
	REDIS_URL = "redis://localhost:6379/0"
	r = redis.Redis(host='localhost', port=6379, db=0) #the object r is created

Venkatstatistics / Resume Phrase Matcher code

Created January 14, 2019 19:13

	#Resume Phrase Matcher code


	#importing all required libraries

	import PyPDF2
	import os
	from os import listdir
	from os.path import isfile, join
	from io import StringIO