Skip to content

Instantly share code, notes, and snippets.

View Venkatstatistics's full-sized avatar

Venkat Venkatstatistics

  • Aryma Labs
  • Bangalore
View GitHub Profile
# -*- coding: utf-8 -*-
from gensim.models.word2vec import Word2Vec
import gensim.downloader as api
#corpus = api.load('word2vec-google-news-300')
#corpus = api.load('glove-wiki-gigaword-100')
#model = api.load('glove-wiki-gigaword-100')
corpus = api.load('text8') # download the corpus and return it opened as an iterable
model = Word2Vec(corpus) # train a model from the corpus
# -*- coding: utf-8 -*-
#lowercasing
texts=["JOHN","keLLY","ArJUN","SITA"]
lower_words=[word.lower() for word in texts]
lower_words
#Stemming
import nltk
import pandas as pd
import spacy
from spacy.lang.en import English
nlpsm = English()
sbd = nlpsm.create_pipe('sentencizer')
nlpsm.add_pipe(sbd)
import en_vectors_web_lg
nlplg = en_vectors_web_lg.load()
nlplg.add_pipe(sbd)
###Spacy Tutorials###
## References: https://course.spacy.io/chapter1 ##
## References: https://spacy.io/usage/spacy-101 ##
### Learning to work with NLP object ###
from spacy.lang.en import English
nlp = English ()