import lineflow.datasets as lfds
train = lfds.MsrParaphrase('train')
test = lfds.MsrParaphrase('test')The item in this dataset as follows:
| import asyncio | |
| from dataclasses import dataclass | |
| from openai import AsyncOpenAI | |
| from tqdm.asyncio import tqdm_asyncio | |
| @dataclass | |
| class AsyncEncoder: | |
| client: AsyncOpenAI | |
| model_name: str |
| import numpy as np | |
| from lineflow import datasets | |
| from sklearn.svm import SVC | |
| import sister | |
| def main(): | |
| train = datasets.Imdb("train") | |
| test = datasets.Imdb("test") |
| from pathlib import Path | |
| import tarfile | |
| import wget | |
| import pandas as pd | |
| URL = "https://www.rondhuit.com/download/ldcc-20140209.tar.gz" | |
| SAVETO = Path("./livedoor-news-data.tar.gz") | |
| DATASET_PATH = Path("dataset") |
| import sister | |
| embedder = sister.MeanEmbedding(lang="en") | |
| sentence = "I am a dog." | |
| vector = embedder(sentence) # 300-dim vector |
| from typing import Dict | |
| from functools import partial | |
| import lineflow as lf | |
| import lineflow.datasets as lfds | |
| import lineflow.cross_validation as lfcv | |
| from transformers import BertTokenizer | |
| MAX_LEN = 256 |
| def training_step(self, batch, batch_idx): | |
| labels = batch["label"] | |
| input_ids = batch["input_ids"] | |
| attention_mask = batch["attention_mask"] | |
| token_type_ids = batch["token_type_ids"] | |
| loss, _ = self.model( | |
| input_ids, | |
| token_type_ids=token_type_ids, | |
| attention_mask=attention_mask, |
| @pl.data_loader | |
| def train_dataloader(self): | |
| return self._train_dataloader |
| def configure_optimizers(self): | |
| param_optimizer = list(self.model.named_parameters()) | |
| no_decay = ["bias", "gamma", "beta"] | |
| optimizer_grouped_parameters = [ | |
| { | |
| "params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], | |
| "weight_decay_rate": 0.01 | |
| }, | |
| { | |
| "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], |
import lineflow.datasets as lfds
train = lfds.MsrParaphrase('train')
test = lfds.MsrParaphrase('test')The item in this dataset as follows:
| from typing import List, Dict, Callable | |
| from collections import OrderedDict | |
| from functools import partial | |
| import lineflow as lf | |
| import lineflow.datasets as lfds | |
| import lineflow.cross_validation as lfcv | |
| import torch | |
| from torch.utils.data import DataLoader, SequentialSampler, RandomSampler |