import lineflow.datasets as lfds
train = lfds.MsrParaphrase('train')
test = lfds.MsrParaphrase('test')
The item in this dataset as follows:
from typing import List, Dict, Callable | |
from collections import OrderedDict | |
from functools import partial | |
import lineflow as lf | |
import lineflow.datasets as lfds | |
import lineflow.cross_validation as lfcv | |
import torch | |
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler |
import lineflow.datasets as lfds
train = lfds.MsrParaphrase('train')
test = lfds.MsrParaphrase('test')
The item in this dataset as follows:
def configure_optimizers(self): | |
param_optimizer = list(self.model.named_parameters()) | |
no_decay = ["bias", "gamma", "beta"] | |
optimizer_grouped_parameters = [ | |
{ | |
"params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], | |
"weight_decay_rate": 0.01 | |
}, | |
{ | |
"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], |
@pl.data_loader | |
def train_dataloader(self): | |
return self._train_dataloader |
def training_step(self, batch, batch_idx): | |
labels = batch["label"] | |
input_ids = batch["input_ids"] | |
attention_mask = batch["attention_mask"] | |
token_type_ids = batch["token_type_ids"] | |
loss, _ = self.model( | |
input_ids, | |
token_type_ids=token_type_ids, | |
attention_mask=attention_mask, |
from typing import Dict | |
from functools import partial | |
import lineflow as lf | |
import lineflow.datasets as lfds | |
import lineflow.cross_validation as lfcv | |
from transformers import BertTokenizer | |
MAX_LEN = 256 |
import sister | |
embedder = sister.MeanEmbedding(lang="en") | |
sentence = "I am a dog." | |
vector = embedder(sentence) # 300-dim vector |
from pathlib import Path | |
import tarfile | |
import wget | |
import pandas as pd | |
URL = "https://www.rondhuit.com/download/ldcc-20140209.tar.gz" | |
SAVETO = Path("./livedoor-news-data.tar.gz") | |
DATASET_PATH = Path("dataset") |
import numpy as np | |
from lineflow import datasets | |
from sklearn.svm import SVC | |
import sister | |
def main(): | |
train = datasets.Imdb("train") | |
test = datasets.Imdb("test") |