This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###Spacy Tutorials### | |
## References: https://course.spacy.io/chapter1 ## | |
## References: https://spacy.io/usage/spacy-101 ## | |
### Learning to work with NLP object ### | |
from spacy.lang.en import English | |
nlp = English () |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
from spacy.lang.en import English | |
nlpsm = English() | |
sbd = nlpsm.create_pipe('sentencizer') | |
nlpsm.add_pipe(sbd) | |
import en_vectors_web_lg | |
nlplg = en_vectors_web_lg.load() | |
nlplg.add_pipe(sbd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
#lowercasing | |
texts=["JOHN","keLLY","ArJUN","SITA"] | |
lower_words=[word.lower() for word in texts] | |
lower_words | |
#Stemming | |
import nltk | |
import pandas as pd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from gensim.models.word2vec import Word2Vec | |
import gensim.downloader as api | |
#corpus = api.load('word2vec-google-news-300') | |
#corpus = api.load('glove-wiki-gigaword-100') | |
#model = api.load('glove-wiki-gigaword-100') | |
corpus = api.load('text8') # download the corpus and return it opened as an iterable | |
model = Word2Vec(corpus) # train a model from the corpus |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
start = time.time() | |
import csv | |
import sys | |
import pandas as pd | |
import numpy as np | |
from operator import itemgetter | |
import redis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dump(): | |
with open('results.csv', 'w') as f: | |
for key in r.lrange('results', 0, -1): | |
print(key) | |
f.write(key.decode('utf-8')) | |
f.write('\n') | |
if __name__== "__main__": |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def process(): | |
import en_vectors_web_lg | |
nlp = en_vectors_web_lg.load() | |
topicdf = pd.read_csv("small_Topics.csv", encoding='Latin-1') | |
topics = topicdf.Topic.tolist() | |
while True: | |
big_keyword = r.lpop('big_keywords').decode('utf-8') | |
if not big_keyword: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# we define a function to read the 200k words. The words are stored in a column called 'keyword'in the csv file 'big_Keywords'. The words are read one by one and stored | |
in a list are under the key "big_words". | |
def read_biglist(): | |
biglist = pd.read_csv("big_Keywords.csv") | |
bigwords = biglist.keyword.tolist() | |
for token1 in bigwords: | |
r.lpush("big_keywords", token1) #LPUSH puts the new value at the start of the list. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import sys | |
import pandas as pd | |
import numpy as np | |
from operator import itemgetter | |
import redis | |
#by default Redis runs on port 6379, the below is the url | |
REDIS_URL = "redis://localhost:6379/0" | |
r = redis.Redis(host='localhost', port=6379, db=0) #the object r is created |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Resume Phrase Matcher code | |
#importing all required libraries | |
import PyPDF2 | |
import os | |
from os import listdir | |
from os.path import isfile, join | |
from io import StringIO |
NewerOlder