Skip to content

Instantly share code, notes, and snippets.

@vinimonteiro
vinimonteiro / nn_pytorch_data.py
Created December 17, 2021 14:54
nn_pytorch_data
train = datasets.MNIST("", train=True, download=True,
transform = transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("", train=False, download=True,
transform = transforms.Compose([transforms.ToTensor()]))
trainset = torch.utils.data.DataLoader(train, batch_size=15, shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=15, shuffle=True)
@vinimonteiro
vinimonteiro / nn_pytorch_imports.py
Created December 17, 2021 14:53
nn_pytorch_imports
import torch
import torchvision
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
@vinimonteiro
vinimonteiro / nn_pytorch.py
Created December 17, 2021 14:48
Simple neural net with pytorch (MNIST)
import torch
import torchvision
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
# Loading and transforming the dataset
train = datasets.MNIST("", train=True, download=True,
@vinimonteiro
vinimonteiro / summarizer_word_embedding.py
Last active February 8, 2022 17:29
Summarizer using word embedding
import nltk
import re
import string
from gensim.models import Word2Vec
from nltk.tokenize import sent_tokenize as nlkt_sent_tokenize
from nltk.tokenize import word_tokenize as nlkt_word_tokenize
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from nltk.corpus import stopwords
import numpy as np
from scipy.spatial.distance import cosine
@vinimonteiro
vinimonteiro / get_dummies.py
Created October 26, 2021 16:18
One hot encoding with get_dummies
import pandas as pd
df = pd.read_table('https://data.princeton.edu/wws509/datasets/salary.dat',delim_whitespace=True)
dummy = pd.get_dummies(df['sx'])
print(dummy.head())
@vinimonteiro
vinimonteiro / stem_lemma.py
Created October 17, 2021 17:42
Stem and lemmatization using NLKT
import nltk
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
ps = PorterStemmer()
wordnet_lemmatizer = WordNetLemmatizer()
sentence = "She was running and coding at the same and I thought this was the craziest things I had ever seen."
punctuations="?:!.,;"
sentence_words = nltk.word_tokenize(sentence)
@vinimonteiro
vinimonteiro / stop_word_removal.py
Created October 17, 2021 11:45
Stop word removal using NLKT
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
sentence = """Clairson International Corp. said it expects to report a
net loss for its second quarter ended March 26 and doesn't expect to meet analysts' profit
estimates of $3.0 to $4 million, or
1,276 cents a share to 1,279 cents a share, for its year ending Sept. 24."""
stop_words = set(stopwords.words('english'))
@vinimonteiro
vinimonteiro / tokenization_count_vectorizer.py
Created October 17, 2021 11:19
Tokenization with sklearn and CountVectorizer
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
texts = [
"""Imagine this: instead of sending a four-hundred-pound rover vehicle to Mars, we merely shoot over to the planet a single sphere, one that can fit on the end of a pin. Using energy from sources around it, the sphere divides itself into a diversified army of similar spheres. The spheres hang on to each other and sprout features: wheels, lenses, temperature sensors, and a full internal guidance system. You'd be gobsmacked to watch such a system discharge itself.""" ,
'The countries of Haiti and the Dominican Republic share the Caribbean island of Hispaniola. Consider what would happen if a tsunami were to slam into the Dominican Republic and make it uninhabitable. One possibility is that the Dominicans would be erased from the map and Haiti would continue business as usual. But there’s a second possibility: What if the Haitians shifted their nation several hundred miles to the west, bigheartedly accommodating the Domini
@vinimonteiro
vinimonteiro / tokenization_spacy.py
Created October 17, 2021 11:15
Tokenization spacy
import spacy
#load core english library
nlp = spacy.load("en_core_web_sm")
text_english = """Imagine this: instead of sending a four-hundred-pound rover vehicle to Mars,
we merely shoot over to the planet a single sphere, one that can fit on the end of a pin.
Using energy from sources around it, the sphere divides itself into a diversified army of
similar spheres. The spheres hang on to each other and sprout features: wheels, lenses,
temperature sensors, and a full internal guidance system. You'd be gobsmacked to watch
@vinimonteiro
vinimonteiro / sent_seg.py
Created October 16, 2021 17:21
sentence segmentation
#import spacy library
import spacy
#load core english library
nlp = spacy.load("en_core_web_sm")
#take unicode string
#here u stands for unicode
doc = nlp(u"Clairson International Corp. said it expects to report a net loss for its second quarter ended March 26 and doesn't expect to meet analysts' profit estimates of $3.0 to $4 million, or 1,276 cents a share to 1,279 cents a share, for its year ending Sept. 24. (From the Wall Street Journal (1988))")
#to print sentences