Skip to content

Instantly share code, notes, and snippets.

@RicherMans
Last active July 11, 2018 04:46
Show Gist options
  • Select an option

  • Save RicherMans/8b01e8d80e67e7ffdc6fa088b9ecc1eb to your computer and use it in GitHub Desktop.

Select an option

Save RicherMans/8b01e8d80e67e7ffdc6fa088b9ecc1eb to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import torch.nn.functional as F
import torch
import torch.nn as nn
import torchnet as tnt
import librosa
from tqdm import tqdm
import os
import argparse
import logging
def getmodel(inputdim, outputdim):
hidsizes = [inputdim] + [1024,1024,1024,1024] + [outputdim]
m = []
for h0,h1 in zip(hidsizes, hidsizes[1:]):
m.extend([nn.Linear(h0,h1),nn.BatchNorm1d(h1),nn.ReLU()])
# Remove last 2
m.pop()
m.pop()
return nn.Sequential(*m)
def enframe(fname,framesize=400, shift=200):
x, _ = librosa.load(fname, sr=None)
return librosa.util.frame(x, frame_length = framesize, hop_length= shift)
parser = argparse.ArgumentParser()
parser.add_argument('-binary', default=False, action="store_true")
args= parser.parse_args()
outputdir = os.path.join("trainedmodels",'binary' if args.binary else 'fullclass')
try:
os.makedirs(outputdir)
except:
pass
trainlabelpath='trainlabels'
cvlabelpath='devlabels'
evallabelpath= 'evallabels'
if args.binary:
trainlabelpath += '_binary'
cvlabelpath += '_binary'
evallabelpath += '_binary'
trainlabels = pd.read_csv(trainlabelpath, sep=" ",names = ["key","label"])
cvlabels = pd.read_csv(cvlabelpath, sep=" ",names = ["key","label"])
evallabels = pd.read_csv(evallabelpath, sep=" ",names = ["key","label"])
uniquelabels = list(np.unique([st for row in trainlabels.label.str.split(',') for st in row]))
encoder = {label:idx for idx,label in enumerate(uniquelabels)}
trainlabels['onehot'] = trainlabels.label.map(lambda x: encoder[x])
cvlabels['onehot'] = cvlabels.label.map(lambda x: encoder[x])
traindatacsv = pd.read_csv('train.scp',sep=' ',names=['key','filepath'])
cvdatacsv = pd.read_csv('dev.scp',sep=' ',names=['key','filepath'])
testdatacsv = pd.read_csv('eval.scp',sep=' ',names=['key','filepath'])
crossdatacsv = pd.read_csv('asvspoof2015_eval.scp',sep=' ', names=['key','filepath'])
traindata = traindatacsv.set_index('key').join(trainlabels.set_index('key'))
cvdata = cvdatacsv.set_index('key').join(cvlabels.set_index('key'))
evaldata = testdatacsv.set_index('key').join(evallabels.set_index('key'))
crossdatacsv = crossdatacsv.set_index('key')
framesize = 400
shift = 200
def datagenerator(pandasdf, chunksize=800, shuffle=False):
if shuffle:
pandasdf = pandasdf.sample(frac=1).reset_index(drop=True)
for k,cache in pandasdf.groupby(np.arange(len(pandasdf))//chunksize):
data, target = [], []
for row in cache.itertuples():
# Returns (Nshape, nFrames)
data.append(enframe(row.filepath,framesize,shift))
target.append(np.repeat(row.onehot, data[-1].shape[-1]))
data = torch.from_numpy(np.concatenate(data,axis=1).transpose())
target = torch.from_numpy(np.concatenate(target, axis=0))
yield from torch.utils.data.DataLoader(torch.utils.data.TensorDataset(data,target),batch_size=128,shuffle=shuffle, drop_last=shuffle)
device = 'cpu' if not torch.cuda.is_available() else 'cuda'
model = getmodel(framesize, len(uniquelabels))
model.to(device)
optimizer = torch.optim.Adam(model.parameters())
def run_epoch(generator, train=True):
avgmeter = tnt.meter.AverageValueMeter()
accmeter = tnt.meter.ClassErrorMeter(accuracy=True)
timemeter = tnt.meter.TimeMeter(0)
if train:
model.train()
desc = "Train "
else:
model.eval()
desc = "CV "
with torch.set_grad_enabled(train):
for i,(data, target) in tqdm(enumerate(generator), leave=False, unit='batch',desc=desc):
data, target = data.to(device), target.to(device)
y = model(data)
loss = F.cross_entropy(input=y, target=target)
if train:
optimizer.zero_grad()
loss.backward()
optimizer.step()
avgmeter.add(loss.item())
accmeter.add(y.data, target.data)
return avgmeter.value(), accmeter.value(), timemeter.value()
logger = logging.getLogger('btas')
logger.setLevel(logging.DEBUG)
# create file handler which logs even debug messages
fh = logging.FileHandler(os.path.join(outputdir,'train.log'))
fh.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)
# add the handlers to logger
logger.addHandler(ch)
logger.addHandler(fh)
logger.debug(model)
bestloss = np.inf
bestepoch = np.inf
modelpath = os.path.join(outputdir,'model_best.th')
# Train
if not os.path.exists(modelpath):
for epoch in range(1, 20):
trainlosses, trainacc, traintime = run_epoch(datagenerator(traindata,shuffle=True), train=True)
cvlosses, cvacc, cvtime = run_epoch(datagenerator(cvdata,shuffle=False),train=False)
logger.info("Epoch {:>3} Trainloss: ({:>5.3f}/{:>5.3f}) CVLoss: ({:>5.3f}/{:>5.3f}) Trainacc {:>8.3f} CVAcc {:>8.3f} Times ({:>3.1f}m/{:>3.1f}m)".format(epoch,*trainlosses, *cvlosses, trainacc[0], cvacc[0],traintime/60, cvtime/60))
if cvlosses[0] < bestloss:
bestloss = cvlosses[0]
bestepoch = epoch
torch.save(model, modelpath)
logger.info("Best model is at epoch {}".format(bestepoch))
else:
logger.info("Loading best model")
model = torch.load(modelpath,map_location=lambda storage, loc: storage)
model.to(device)
def forward(dataset, wp):
model.eval()
with torch.no_grad():
for row in tqdm(dataset.itertuples(),total=len(dataset),leave=False):
datasample = enframe(row.filepath,framesize,shift).transpose()
datasample = torch.from_numpy(datasample).to(device)
output = F.log_softmax(model(datasample),dim=1).data.to('cpu').mean(0).squeeze().numpy()
strrep = list(map(str, output))
strrep.insert(0, row.Index)
strrep.insert(len(strrep), '\n')
wp.write(' '.join(strrep))
# Evaluate
evaloutputf=os.path.join(outputdir,"evaluate.txt")
devoutputf=os.path.join(outputdir,"dev.txt")
crossoutputf=os.path.join(outputdir,'cross.txt')
if not os.path.exists(devoutputf):
logger.info("Forwarding dev data")
with open(devoutputf, 'w') as outputwp:
forward(cvdata, outputwp)
if not os.path.exists(evaloutputf):
logger.info("Forwarding eval data")
with open(evaloutputf,'w') as outputwp:
forward(evaldata, outputwp)
if not os.path.exists(crossoutputf):
logger.info("Forwarding cross data")
with open(crossoutputf,'w') as outputwp:
forward(crossdatacsv, outputwp)
logger.info("Evaluation done! Eval file is {}, Dev file is {}".format(evaloutputf, devoutputf))
# -*- coding: utf-8 -*-
# @Author: richman
# @Date: 2018-01-18
# @Last Modified by: richman
# @Last Modified time: 2018-04-08
import argparse
import numpy as np
import models
import torch
import os
import torchnet as tnt
import logging
from tqdm import tqdm
from torch.autograd import Variable
from sklearn.preprocessing import RobustScaler, Normalizer, StandardScaler
from sklearn.pipeline import Pipeline
from dataset import KaldiDatasetLoader, KaldiDataset
import kaldi_io
def labeltype(s):
with open(s) as rp:
# Keys is the left column, values are the right
return dict(line.rstrip().split()[:2] for line in rp)
def encode(org, encoder):
return {k: encoder[v] for k, v in org.items()}
def main():
parser = argparse.ArgumentParser()
parser.add_argument('data', type=str,
help="extracted feature ark file")
parser.add_argument('-trlabel', type=labeltype,
help="Labels for each training utterance", required=True)
parser.add_argument('-cvlabel', type=labeltype,
help="Cv labels. need at be as many as train labels", required=True)
parser.add_argument('-trcounts', type=lambda x: {k: v[0] for k, v in kaldi_io.read_vec_int_ark(x)},
help="ark,t formatted counts (feat-to-len ark:- ark,t:-) for each utterance. Enables displaying the current performance.")
parser.add_argument('-cvcounts', type=lambda x: {k: v[0] for k, v in kaldi_io.read_vec_int_ark(x)},
help="ark,t formatted counts (feat-to-len ark:- ark,t:-) for each utterance")
parser.add_argument('-bs', '--batchsize', type=int, default=256,
help="Training batchsize. Defaults to %(default)s")
parser.add_argument('-cs', '--cachesize', type=int, default=500,
help="Training cachesize. Defaults to %(default)s")
parser.add_argument('-output', default="trainedmodels/", type=str,
help="Output directory. Defaults to %(default)s. Needs to be empty")
parser.add_argument('-lr', default=1e-2, type=float,
help="Starting learning rate. Defaults to %(default)s.")
parser.add_argument('-epochs', default=200, type=int)
parser.add_argument('-epochsize', default=0, type=int,
help="Number of batches to be seen for each epoch. Default full dataset.")
parser.add_argument('-drop', default=0.0, type=float)
parser.add_argument('-net', help="Neural network to use",
type=str, default="DNN")
parser.add_argument('-cuda', default=False, action="store_true")
parser.add_argument('-crit', default="CrossEntropyLoss")
parser.add_argument(
'-norm', choices=['mean', 'var', 'none'], default=['mean'], nargs="+")
args = parser.parse_args()
args.cuda = torch.cuda.is_available() and args.cuda
# Handel trainlabels and traindata
uniquelabels = np.unique(list(args.trlabel.values()))
encoder = {lab: i for i, lab in enumerate(uniquelabels)}
# /*---------- Label parsing ----------*/
trainlab = encode(args.trlabel, encoder)
cvlab = encode(args.cvlabel, encoder)
# Use partial fit from the standard scaler to only fit each cache
scaler = StandardScaler(with_std='var' in args.norm,
with_mean='mean' in args.norm)
inputdim = 0
# Indicator how many itertions (nsamples//batches) we need to do.
# Is a rough esitmate though, since the caches might "chop" some batches
ntrainiters = None
ncviters = None
if args.trcounts:
nsamples = sum([v for k, v in args.trcounts.items() if k in trainlab])
ntrainiters = nsamples // args.batchsize
if args.cvcounts:
nsamples = sum([v for k, v in args.trcounts.items() if k in cvlab])
ncviters = nsamples // args.batchsize
for feat, tar in KaldiDataset(args.data, trainlab, cachesize=100):
scaler.partial_fit(feat)
inputdim = feat.shape[-1]
# Generates CV/Train iterator
def iteratorgen(labeldata, shuffle=False, cachesize=args.cachesize):
yield from KaldiDatasetLoader(KaldiDataset(args.data, labeldata, cachesize=cachesize),
batchsize=args.batchsize, transform=scaler.transform, shuffle=shuffle)
# Normalize train and dev data
# We summed all dimensions, actually useless but just for fun
# Save the original number of feature dimensions
try:
os.makedirs(args.output)
except OSError:
pass
model = getattr(models, args.net)(inputdim,
len(uniquelabels),
dropout=args.drop,
)
print("Creating new model {}".format(args.net))
formatter = logging.Formatter(
"[ %(levelname)s : %(asctime)s ] - %(message)s")
logging.basicConfig(level=logging.DEBUG,
format="[ %(levelname)s : %(asctime)s ] - %(message)s")
logger = logging.getLogger("Pytorch")
# Dump log to file
fh = logging.FileHandler(os.path.join(args.output, 'log'))
fh.setFormatter(formatter)
logger.addHandler(fh)
logger.info(model)
optimizer = torch.optim.SGD(
model.parameters(), lr=args.lr, momentum=0.9, nesterov=True)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer, patience=0, verbose=True, factor=0.1)
if args.cuda:
logger.info("Using GPU backend")
model = model.cuda()
crit = getattr(torch.nn, args.crit)()
def reset_meters():
meter_loss.reset()
time_meter.reset()
accmeter.reset()
def on_forward(state):
meter_loss.add(state['loss'].data[0])
accmeter.add(*state['output'])
# Function is not called during test! only training
def on_start_epoch(state):
# Put into training state
model.train()
reset_meters()
# Reset iterator - We pass none at the beginning to avoid double
# iniialization
state['iterator'] = tqdm(iteratorgen(
trainlab, shuffle=True), leave=False, total=ntrainiters)
# total=traindataloader.nsamples // traindataloader.batchsize)
def trainf(sample):
inputs, targets = Variable(sample[0]), Variable(sample[1])
if args.cuda:
inputs, targets = inputs.cuda(), targets.cuda()
o = model(inputs)
return crit(o, targets), (o.data, targets.data)
def evalf(sample):
inputs, targets = Variable(sample[0], volatile=True), Variable(
sample[1], volatile=True)
if args.cuda:
inputs, targets = inputs.cuda(), targets.cuda()
o = model(inputs)
# Return o.data and targets.data for accmeter (in on_forward)
return crit(o, targets), (o.data, targets.data)
def on_end_epoch(state):
message = 'Training Epoch {}: Time: {:=.2f}s/{:=.2f}m LR: {:=3.1e} Acc: {:=.2f} Loss (mean): {:=.4f} Loss (std): {:=.4f}'.format(
state['epoch'], time_meter.value(), time_meter.value() / 60,
optimizer.param_groups[0]['lr'], accmeter.value()[0],
*meter_loss.value())
logger.info(message)
model.eval()
reset_meters()
engine.test(evalf, tqdm(iteratorgen(cvlab),
leave=False, total=ncviters))
logger.info("Epoch: {:=4} Acc: {:=.2f} % Bestacc: {:=.2f} % Time {:=3.2f}s LR: {:=3.1e} CVLoss (mean): {:=5.4f} CVLoss (std): {:=5.4f} ".format(
state['epoch'], accmeter.value()[0], stats['bestacc'],
time_meter.value(), optimizer.param_groups[0]['lr'],
*meter_loss.value()))
scheduler.step(meter_loss.value()[0])
curacc = max(stats['bestacc'], accmeter.value()[0])
curloss = min(stats['bestloss'], meter_loss.value()[0])
if curacc > stats['bestacc']:
stats['bestacc'] = curacc
if curloss < stats['bestloss']:
stats['bestloss'] = curloss
torch.save({'model': model, 'encoder': encoder, 'scaler': scaler},
os.path.join(args.output, 'model.th'))
else:
dump = torch.load(os.path.join(args.output, 'model.th'))
model.load_state_dict(dump['model'].state_dict())
if state['epoch'] == 1:
optimizer.param_groups[0]['lr'] /= 10
# Stop training if lr < 1e-6
if optimizer.param_groups[0]['lr'] < 1e-6:
logger.info("Ending Training")
state['epoch'] = 1e20
return
reset_meters()
engine = tnt.engine.Engine()
# Statistics
time_meter = tnt.meter.TimeMeter(False)
meter_loss = tnt.meter.AverageValueMeter()
accmeter = tnt.meter.ClassErrorMeter(accuracy=True)
engine.hooks['on_forward'] = on_forward
engine.hooks['on_start_epoch'] = on_start_epoch
engine.hooks['on_end_epoch'] = on_end_epoch
stats = {'bestacc': 0, 'bestloss': 1e20}
engine.train(trainf, None,
args.epochs, optimizer)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment