Skip to content

Instantly share code, notes, and snippets.

View Steboss89's full-sized avatar

Stefano Bosisio Steboss89

View GitHub Profile
@Steboss89
Steboss89 / naivebayes.py
Last active September 18, 2022 16:42
Run a naive Bayes classificatory
import pandas as pd
# vectorize words
from sklearn.feature_extraction.text import CountVectorizer
# naive bayes
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import auc, roc_curve
# train test split
from sklearn.model_selection import train_test_split
# MAIN
@Steboss89
Steboss89 / clean_tweet2.py
Created September 18, 2022 12:12
full process for cleaning tweets
tweets_df = pd.read_csv("split-data/X_train.csv")
target_df = pd.read_csv("split-data/y_train.csv")
# PREPROCESS
# drop the info we're not going to use
# id, date, flag
tweets_df.drop(columns=['ids', 'date', 'flag'], inplace=True)
# start the cleaning process
# lower text
tweets_df.loc[:,'lower_text'] = tweets_df['text'].str.lower()
# remove stopwords
@Steboss89
Steboss89 / clean_tweet.py
Created September 18, 2022 12:07
Cleaning tweets
import pandas as pd
import nltk
nltk.download("stopwords")
from nltk.corpus import stopwords
import string
import re
# vectorizer
from sklearn.feature_extraction.text import CountVectorizer
STOPWORDS = stopwords.words("english")
@Steboss89
Steboss89 / train_conv.rs
Created August 14, 2022 17:28
Train convolutional neural network
for epoch in 1..N_EPOCHS {
// generate random idxs for batch size
// run all the images divided in batches -> for loop
for i in 1..n_it {
let batch_idxs = generate_random_index(TRAIN_SIZE as i64, BATCH_SIZE);
let batch_images = train_data.index_select(0, &batch_idxs).to_device(vs.device()).to_kind(Kind::Float);
let batch_lbls = train_lbl.index_select(0, &batch_idxs).to_device(vs.device()).to_kind(Kind::Int64);
// compute the loss
let loss = net.forward_t(&batch_images, true).cross_entropy_for_logits(&batch_lbls);
opt.backward_step(&loss);
@Steboss89
Steboss89 / generate_random_batch.rs
Created August 14, 2022 17:27
Generate random batches
pub fn generate_random_index(ArraySize: i64, BatchSize: i64)-> Tensor{
let random_idxs = Tensor::randint(ArraySize, &[BatchSize], kind::INT64_CPU);
random_idxs
}
@Steboss89
Steboss89 / conv_nnet.rs
Created August 14, 2022 17:06
Convolutional neural network
use std::result::Result;
use std::error::Error;
use mnist::*;
use tch::{kind, Kind, Tensor, nn, nn::ModuleT, nn::OptimizerConfig, Device};
use ndarray::{Array3, Array2};
const LABELS: i64 = 10; // number of distinct labels
const HEIGHT: usize = 28;
const WIDTH: usize = 28;
@Steboss89
Steboss89 / train_sequential.rs
Created August 14, 2022 16:53
Train a sequential nnet
for epoch in 1..N_EPOCHS {
let loss = net.forward(&train_data).cross_entropy_for_logits(&train_lbl);
// backward step
opt.backward_step(&loss);
//accuracy on test
let val_accuracy = net.forward(&val_data).accuracy_for_logits(&val_lbl);
println!(
"epoch: {:4} train loss: {:8.5} val acc: {:5.2}%",
epoch,
f64::from(&loss),
@Steboss89
Steboss89 / sequential.rs
Created August 12, 2022 16:38
Sequential neural network implementation
use std::result::Result;
use std::error::Error;
use mnist::*;
use tch::{kind, Kind, Tensor, nn, nn::Module, nn::OptimizerConfig, Device};
use ndarray::{Array3, Array2};
const LABELS: i64 = 10; // number of distinct labels
const HEIGHT: usize = 28;
const WIDTH: usize = 28;
@Steboss89
Steboss89 / main.rs
Created August 12, 2022 16:27
Run n_epochs in a linear neural network with Rust tch
use std::result::Result;
use std::error::Error;
use mnist::*;
use tch::{kind, no_grad, Kind, Tensor};
use ndarray::{Array3, Array2};
const LABELS: i64 = 10; // number of distinct labels
const HEIGHT: usize = 28;
const WIDTH: usize = 28;
@Steboss89
Steboss89 / image_to_tensor.rs
Created August 12, 2022 16:13
Convert an array to tch::Tensor
use std::result::Result;
use std::error::Error;
use mnist::*;
use tch::{kind, no_grad, Kind, Tensor};
use ndarray::{Array3, Array2};
pub fn image_to_tensor(data:Vec<u8>, dim1:usize, dim2:usize, dim3:usize)-> Tensor{
// normalize the image as well
let inp_data: Array3<f32> = Array3::from_shape_vec((dim1, dim2, dim3), data)