Instructions to run Label Studio with Bayesian active learning on Text Classification.
Environment:
export LABEL_STUDIO_HOSTNAME=http://localhost:8080export LABEL_STUDIO_ML_BACKEND_V2=True
| from datetime import date | |
| from enum import Enum | |
| from typing import Optional | |
| import outlines | |
| from pydantic import BaseModel | |
| class Competitors(str, Enum): | |
| RoboCorp = "RoboCorp" |
| # Wav2Vec in Baal | |
| from datasets import load_dataset | |
| from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, TrainingArguments | |
| from baal.active.heuristics import BALD | |
| from baal.bayesian.dropout import patch_module | |
| from baal.transformers_trainer_wrapper import BaalTransformersTrainer | |
| # load model and tokenizer |
| from datasets import load_dataset | |
| from transformers import pipeline, DataCollatorForTokenClassification | |
| from baal.active.active_loop import ActiveLearningLoop | |
| from baal.active.dataset import ActiveLearningDataset | |
| from baal.active.heuristics import BALD | |
| from baal.bayesian.dropout import patch_module | |
| from baal.transformers_trainer_wrapper import BaalTransformersTrainer | |
| dataset = load_dataset("conll2003") |
| import os.path | |
| import shutil | |
| import tempfile | |
| from datasets import Dataset, load_from_disk | |
| PATH = '/tmp/b.arrow' | |
| def overwrite_dataset(ds: Dataset, path) -> Dataset: |
| import numpy as np | |
| import torch | |
| from torchvision.models import vgg16 | |
| from baal.bayesian.dropout import MCDropoutModule | |
| from baal.modelwrapper import ModelWrapper | |
| model = vgg16() | |
| wrapper = ModelWrapper(model, None) | |
| input = torch.randn([2, 3, 64, 64]) |
Instructions to run Label Studio with Bayesian active learning on Text Classification.
Environment:
export LABEL_STUDIO_HOSTNAME=http://localhost:8080export LABEL_STUDIO_ML_BACKEND_V2=True| from pprint import pprint | |
| import datasets | |
| import numpy as np | |
| import torch | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from tqdm import tqdm | |
| from transformers import AutoTokenizer, AutoModel | |
| """ |
| import argparse | |
| from datasets import load_dataset | |
| from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer | |
| from transformers import AutoTokenizer | |
| from transformers import DataCollatorWithPadding | |
| LABEL_COL = "label" | |
| TEXT_COL = "text" |
| import gensim | |
| import nltk | |
| from gensim import corpora, models | |
| from nltk.stem import WordNetLemmatizer, SnowballStemmer | |
| import pandas as pd | |
| nltk.download('wordnet') | |
| nltk.download('omw-1.4') | |
| nltk.download('stopwords') |
| from typing import List, Optional, Callable | |
| import torch | |
| from torch.optim import Adam | |
| from torchvision.datasets.voc import VOCDetection | |
| from torchvision.models.detection.ssd import ssd300_vgg16 | |
| from torchvision.transforms import Compose, Resize, ToTensor | |
| from baal import ModelWrapper |