Skip to content

Instantly share code, notes, and snippets.

View sobamchan's full-sized avatar

Sotaro Takeshita / 竹下 颯太郎 sobamchan

View GitHub Profile
import numpy as np
from lineflow import datasets
from sklearn.svm import SVC
import sister
def main():
train = datasets.Imdb("train")
test = datasets.Imdb("test")
from pathlib import Path
import tarfile
import wget
import pandas as pd
URL = "https://www.rondhuit.com/download/ldcc-20140209.tar.gz"
SAVETO = Path("./livedoor-news-data.tar.gz")
DATASET_PATH = Path("dataset")
import sister
embedder = sister.MeanEmbedding(lang="en")
sentence = "I am a dog."
vector = embedder(sentence) # 300-dim vector
from typing import Dict
from functools import partial
import lineflow as lf
import lineflow.datasets as lfds
import lineflow.cross_validation as lfcv
from transformers import BertTokenizer
MAX_LEN = 256
def training_step(self, batch, batch_idx):
labels = batch["label"]
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
token_type_ids = batch["token_type_ids"]
loss, _ = self.model(
input_ids,
token_type_ids=token_type_ids,
attention_mask=attention_mask,
@pl.data_loader
def train_dataloader(self):
return self._train_dataloader
def configure_optimizers(self):
param_optimizer = list(self.model.named_parameters())
no_decay = ["bias", "gamma", "beta"]
optimizer_grouped_parameters = [
{
"params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
"weight_decay_rate": 0.01
},
{
"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
import lineflow.datasets as lfds

train = lfds.MsrParaphrase('train')
test = lfds.MsrParaphrase('test')

The item in this dataset as follows:

from typing import List, Dict, Callable
from collections import OrderedDict
from functools import partial
import lineflow as lf
import lineflow.datasets as lfds
import lineflow.cross_validation as lfcv
import torch
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler