This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train = datasets.MNIST("", train=True, download=True, | |
transform = transforms.Compose([transforms.ToTensor()])) | |
test = datasets.MNIST("", train=False, download=True, | |
transform = transforms.Compose([transforms.ToTensor()])) | |
trainset = torch.utils.data.DataLoader(train, batch_size=15, shuffle=True) | |
testset = torch.utils.data.DataLoader(test, batch_size=15, shuffle=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torchvision | |
import torch.nn.functional as F | |
import matplotlib.pyplot as plt | |
import torch.nn as nn | |
import torch.optim as optim | |
from torchvision import transforms, datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torchvision | |
import torch.nn.functional as F | |
import matplotlib.pyplot as plt | |
import torch.nn as nn | |
import torch.optim as optim | |
from torchvision import transforms, datasets | |
# Loading and transforming the dataset | |
train = datasets.MNIST("", train=True, download=True, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
import re | |
import string | |
from gensim.models import Word2Vec | |
from nltk.tokenize import sent_tokenize as nlkt_sent_tokenize | |
from nltk.tokenize import word_tokenize as nlkt_word_tokenize | |
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer | |
from nltk.corpus import stopwords | |
import numpy as np | |
from scipy.spatial.distance import cosine |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.read_table('https://data.princeton.edu/wws509/datasets/salary.dat',delim_whitespace=True) | |
dummy = pd.get_dummies(df['sx']) | |
print(dummy.head()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.stem import PorterStemmer | |
from nltk.stem import WordNetLemmatizer | |
ps = PorterStemmer() | |
wordnet_lemmatizer = WordNetLemmatizer() | |
sentence = "She was running and coding at the same and I thought this was the craziest things I had ever seen." | |
punctuations="?:!.,;" | |
sentence_words = nltk.word_tokenize(sentence) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
sentence = """Clairson International Corp. said it expects to report a | |
net loss for its second quarter ended March 26 and doesn't expect to meet analysts' profit | |
estimates of $3.0 to $4 million, or | |
1,276 cents a share to 1,279 cents a share, for its year ending Sept. 24.""" | |
stop_words = set(stopwords.words('english')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import CountVectorizer | |
import pandas as pd | |
texts = [ | |
"""Imagine this: instead of sending a four-hundred-pound rover vehicle to Mars, we merely shoot over to the planet a single sphere, one that can fit on the end of a pin. Using energy from sources around it, the sphere divides itself into a diversified army of similar spheres. The spheres hang on to each other and sprout features: wheels, lenses, temperature sensors, and a full internal guidance system. You'd be gobsmacked to watch such a system discharge itself.""" , | |
'The countries of Haiti and the Dominican Republic share the Caribbean island of Hispaniola. Consider what would happen if a tsunami were to slam into the Dominican Republic and make it uninhabitable. One possibility is that the Dominicans would be erased from the map and Haiti would continue business as usual. But there’s a second possibility: What if the Haitians shifted their nation several hundred miles to the west, bigheartedly accommodating the Domini |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
#load core english library | |
nlp = spacy.load("en_core_web_sm") | |
text_english = """Imagine this: instead of sending a four-hundred-pound rover vehicle to Mars, | |
we merely shoot over to the planet a single sphere, one that can fit on the end of a pin. | |
Using energy from sources around it, the sphere divides itself into a diversified army of | |
similar spheres. The spheres hang on to each other and sprout features: wheels, lenses, | |
temperature sensors, and a full internal guidance system. You'd be gobsmacked to watch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import spacy library | |
import spacy | |
#load core english library | |
nlp = spacy.load("en_core_web_sm") | |
#take unicode string | |
#here u stands for unicode | |
doc = nlp(u"Clairson International Corp. said it expects to report a net loss for its second quarter ended March 26 and doesn't expect to meet analysts' profit estimates of $3.0 to $4 million, or 1,276 cents a share to 1,279 cents a share, for its year ending Sept. 24. (From the Wall Street Journal (1988))") | |
#to print sentences |