Steboss89’s gists

Steboss89 / naivebayes.py

Last active September 18, 2022 16:42

Run a naive Bayes classificatory

	import pandas as pd
	# vectorize words
	from sklearn.feature_extraction.text import CountVectorizer
	# naive bayes
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.metrics import auc, roc_curve
	# train test split
	from sklearn.model_selection import train_test_split

	# MAIN

Steboss89 / clean_tweet2.py

Created September 18, 2022 12:12

full process for cleaning tweets

	tweets_df = pd.read_csv("split-data/X_train.csv")
	target_df = pd.read_csv("split-data/y_train.csv")
	# PREPROCESS
	# drop the info we're not going to use
	# id, date, flag
	tweets_df.drop(columns=['ids', 'date', 'flag'], inplace=True)
	# start the cleaning process
	# lower text
	tweets_df.loc[:,'lower_text'] = tweets_df['text'].str.lower()
	# remove stopwords

Steboss89 / clean_tweet.py

Created September 18, 2022 12:07

Cleaning tweets

	import pandas as pd
	import nltk
	nltk.download("stopwords")
	from nltk.corpus import stopwords
	import string
	import re
	# vectorizer
	from sklearn.feature_extraction.text import CountVectorizer

	STOPWORDS = stopwords.words("english")

Steboss89 / train_conv.rs

Created August 14, 2022 17:28

Train convolutional neural network

	for epoch in 1..N_EPOCHS {
	// generate random idxs for batch size
	// run all the images divided in batches -> for loop
	for i in 1..n_it {
	let batch_idxs = generate_random_index(TRAIN_SIZE as i64, BATCH_SIZE);
	let batch_images = train_data.index_select(0, &batch_idxs).to_device(vs.device()).to_kind(Kind::Float);
	let batch_lbls = train_lbl.index_select(0, &batch_idxs).to_device(vs.device()).to_kind(Kind::Int64);
	// compute the loss
	let loss = net.forward_t(&batch_images, true).cross_entropy_for_logits(&batch_lbls);
	opt.backward_step(&loss);

Steboss89 / generate_random_batch.rs

Created August 14, 2022 17:27

Generate random batches

	pub fn generate_random_index(ArraySize: i64, BatchSize: i64)-> Tensor{
	let random_idxs = Tensor::randint(ArraySize, &[BatchSize], kind::INT64_CPU);
	random_idxs
	}

Steboss89 / conv_nnet.rs

Created August 14, 2022 17:06

Convolutional neural network

	use std::result::Result;
	use std::error::Error;
	use mnist::*;
	use tch::{kind, Kind, Tensor, nn, nn::ModuleT, nn::OptimizerConfig, Device};
	use ndarray::{Array3, Array2};


	const LABELS: i64 = 10; // number of distinct labels
	const HEIGHT: usize = 28;
	const WIDTH: usize = 28;

Steboss89 / train_sequential.rs

Created August 14, 2022 16:53

Train a sequential nnet

	for epoch in 1..N_EPOCHS {
	let loss = net.forward(&train_data).cross_entropy_for_logits(&train_lbl);
	// backward step
	opt.backward_step(&loss);
	//accuracy on test
	let val_accuracy = net.forward(&val_data).accuracy_for_logits(&val_lbl);
	println!(
	"epoch: {:4} train loss: {:8.5} val acc: {:5.2}%",
	epoch,
	f64::from(&loss),

Steboss89 / sequential.rs

Created August 12, 2022 16:38

Sequential neural network implementation

	use std::result::Result;
	use std::error::Error;
	use mnist::*;
	use tch::{kind, Kind, Tensor, nn, nn::Module, nn::OptimizerConfig, Device};
	use ndarray::{Array3, Array2};


	const LABELS: i64 = 10; // number of distinct labels
	const HEIGHT: usize = 28;
	const WIDTH: usize = 28;

Steboss89 / main.rs

Created August 12, 2022 16:27

Run n_epochs in a linear neural network with Rust tch

	use std::result::Result;
	use std::error::Error;
	use mnist::*;
	use tch::{kind, no_grad, Kind, Tensor};
	use ndarray::{Array3, Array2};


	const LABELS: i64 = 10; // number of distinct labels
	const HEIGHT: usize = 28;
	const WIDTH: usize = 28;

Steboss89 / image_to_tensor.rs

Created August 12, 2022 16:13

Convert an array to tch::Tensor

	use std::result::Result;
	use std::error::Error;
	use mnist::*;
	use tch::{kind, no_grad, Kind, Tensor};
	use ndarray::{Array3, Array2};


	pub fn image_to_tensor(data:Vec<u8>, dim1:usize, dim2:usize, dim3:usize)-> Tensor{
	// normalize the image as well
	let inp_data: Array3<f32> = Array3::from_shape_vec((dim1, dim2, dim3), data)

Stefano Bosisio Steboss89