Skip to content

Instantly share code, notes, and snippets.

View ab3llini's full-sized avatar
🌎

Alberto Bellini ab3llini

🌎
  • DocuSign
  • Dublin
  • 19:29 (UTC +01:00)
  • X @ab3llini
View GitHub Profile
@ab3llini
ab3llini / TweetPreprocessor.py
Last active July 23, 2020 12:27
TweetPreprocessor
from multiprocessing import cpu_count
import pandas as pd
import numpy as np
from multiprocessing import Pool
from transformers import AutoTokenizer
import spacy
class TweetProcessor:
@ab3llini
ab3llini / TweetDataset.py
Last active July 23, 2020 10:58
TweetDataset
import torch
from torch.utils.data import Dataset
import json
class TweetDataset(Dataset):
def __init__(self, path, device):
self.device = device
# Load the JSON file containing our pre-processed data
@ab3llini
ab3llini / Inference.py
Last active July 23, 2020 12:53
Inference
import torch
from torch import nn
import os
from transformers import BertForSequenceClassification, AutoTokenizer
def extract_sentiment(model, tokenizer, text, device):
# Encode the text, create a tensor and move to device.
tensor = torch.tensor(tokenizer.encode(text, add_special_tokens=True)).unsqueeze(0).long().to(device)
@ab3llini
ab3llini / Training.py
Last active July 23, 2020 23:11
Training
import torch
from utils.data.paths import resources_path
from torch.utils.data import random_split
from transformers.optimization import AdamW
from ignite.engine import Engine, Events
from ignite.metrics import RunningAverage, Accuracy, Precision
from ignite.handlers import ModelCheckpoint, EarlyStopping
from torch.utils.data import DataLoader
from ignite.contrib.handlers import ProgressBar
from transformers import BertForSequenceClassification, AutoTokenizer