vinimonteiro’s gists

vinimonteiro / nn_pytorch_data.py

Created December 17, 2021 14:54

nn_pytorch_data

	train = datasets.MNIST("", train=True, download=True,
	transform = transforms.Compose([transforms.ToTensor()]))
	test = datasets.MNIST("", train=False, download=True,
	transform = transforms.Compose([transforms.ToTensor()]))

	trainset = torch.utils.data.DataLoader(train, batch_size=15, shuffle=True)
	testset = torch.utils.data.DataLoader(test, batch_size=15, shuffle=True)

vinimonteiro / nn_pytorch_imports.py

Created December 17, 2021 14:53

nn_pytorch_imports

	import torch
	import torchvision
	import torch.nn.functional as F
	import matplotlib.pyplot as plt
	import torch.nn as nn
	import torch.optim as optim
	from torchvision import transforms, datasets

vinimonteiro / nn_pytorch.py

Created December 17, 2021 14:48

Simple neural net with pytorch (MNIST)

	import torch
	import torchvision
	import torch.nn.functional as F
	import matplotlib.pyplot as plt
	import torch.nn as nn
	import torch.optim as optim
	from torchvision import transforms, datasets

	# Loading and transforming the dataset
	train = datasets.MNIST("", train=True, download=True,

vinimonteiro / summarizer_word_embedding.py

Last active February 8, 2022 17:29

Summarizer using word embedding

	import nltk
	import re
	import string
	from gensim.models import Word2Vec
	from nltk.tokenize import sent_tokenize as nlkt_sent_tokenize
	from nltk.tokenize import word_tokenize as nlkt_word_tokenize
	from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
	from nltk.corpus import stopwords
	import numpy as np
	from scipy.spatial.distance import cosine

vinimonteiro / get_dummies.py

Created October 26, 2021 16:18

One hot encoding with get_dummies

	import pandas as pd

	df = pd.read_table('https://data.princeton.edu/wws509/datasets/salary.dat',delim_whitespace=True)

	dummy = pd.get_dummies(df['sx'])
	print(dummy.head())

vinimonteiro / stem_lemma.py

Created October 17, 2021 17:42

Stem and lemmatization using NLKT

	import nltk
	from nltk.stem import PorterStemmer
	from nltk.stem import WordNetLemmatizer

	ps = PorterStemmer()
	wordnet_lemmatizer = WordNetLemmatizer()

	sentence = "She was running and coding at the same and I thought this was the craziest things I had ever seen."
	punctuations="?:!.,;"
	sentence_words = nltk.word_tokenize(sentence)

vinimonteiro / stop_word_removal.py

Created October 17, 2021 11:45

Stop word removal using NLKT

	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize

	sentence = """Clairson International Corp. said it expects to report a
	net loss for its second quarter ended March 26 and doesn't expect to meet analysts' profit
	estimates of $3.0 to $4 million, or
	1,276 cents a share to 1,279 cents a share, for its year ending Sept. 24."""

	stop_words = set(stopwords.words('english'))

vinimonteiro / tokenization_count_vectorizer.py

Created October 17, 2021 11:19

Tokenization with sklearn and CountVectorizer

	from sklearn.feature_extraction.text import CountVectorizer
	import pandas as pd
	texts = [
	"""Imagine this: instead of sending a four-hundred-pound rover vehicle to Mars, we merely shoot over to the planet a single sphere, one that can fit on the end of a pin. Using energy from sources around it, the sphere divides itself into a diversified army of similar spheres. The spheres hang on to each other and sprout features: wheels, lenses, temperature sensors, and a full internal guidance system. You'd be gobsmacked to watch such a system discharge itself.""" ,

	'The countries of Haiti and the Dominican Republic share the Caribbean island of Hispaniola. Consider what would happen if a tsunami were to slam into the Dominican Republic and make it uninhabitable. One possibility is that the Dominicans would be erased from the map and Haiti would continue business as usual. But there’s a second possibility: What if the Haitians shifted their nation several hundred miles to the west, bigheartedly accommodating the Domini

vinimonteiro / tokenization_spacy.py

Created October 17, 2021 11:15

Tokenization spacy

	import spacy

	#load core english library
	nlp = spacy.load("en_core_web_sm")

	text_english = """Imagine this: instead of sending a four-hundred-pound rover vehicle to Mars,
	we merely shoot over to the planet a single sphere, one that can fit on the end of a pin.
	Using energy from sources around it, the sphere divides itself into a diversified army of
	similar spheres. The spheres hang on to each other and sprout features: wheels, lenses,
	temperature sensors, and a full internal guidance system. You'd be gobsmacked to watch

vinimonteiro / sent_seg.py

Created October 16, 2021 17:21

sentence segmentation

	#import spacy library
	import spacy

	#load core english library
	nlp = spacy.load("en_core_web_sm")

	#take unicode string
	#here u stands for unicode
	doc = nlp(u"Clairson International Corp. said it expects to report a net loss for its second quarter ended March 26 and doesn't expect to meet analysts' profit estimates of $3.0 to $4 million, or 1,276 cents a share to 1,279 cents a share, for its year ending Sept. 24. (From the Wall Street Journal (1988))")
	#to print sentences

Vinicius Monteiro vinimonteiro