Skip to content

Instantly share code, notes, and snippets.

@zhangce
Created June 28, 2015 03:29
Show Gist options
  • Select an option

  • Save zhangce/e8d650c485b04701f9ec to your computer and use it in GitHub Desktop.

Select an option

Save zhangce/e8d650c485b04701f9ec to your computer and use it in GitHub Desktop.
// Copyright 2014 Hazy Research (http://i.stanford.edu/hazy)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define NCLASS 2
//#define STEPSIZE 0.0001
//#define STEPSIZE 0.1
float STEPSIZE = 0.0005; // This is good for most clases
float LAMBDA = 0.1;
float DECAY = 0.99;
//float STEPSIZE = 0.0005;
//float LAMBDA = 0.3;
//float DECAY = 0.999;
#define BATCHSIZE 1
#define BATCHSIZE_TEST 1
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include "dimmwitted.h"
int small_class = -1;
int large_class = -1;
#define LOG_2 0.693147180559945
#define MINUS_LOG_THRESHOLD -18.42
float dot_dense(const float * const x,
const float * const y,
int N){
const float reminder = N % 8;
float rs[8] = {0,0,0,0,0,0,0,0};
for(int i=reminder;i<N;i+=8){
rs[0] += x[i] * y[i];
rs[1] += x[i+1] * y[i+1];
rs[2] += x[i+2] * y[i+2];
rs[3] += x[i+3] * y[i+3];
rs[4] += x[i+4] * y[i+4];
rs[5] += x[i+5] * y[i+5];
rs[6] += x[i+6] * y[i+6];
rs[7] += x[i+7] * y[i+7];
}
float toreturn = (rs[0]+rs[1]+rs[2]+rs[3]+rs[4]+rs[5]+rs[6]+rs[7]);
for(int i=0;i<reminder;i++){
toreturn += x[i] * y[i];
}
return toreturn;
}
inline bool fast_exact_is_equal(float a, float b){
return (a <= b && b <= a);
}
inline float logadd(float log_a, float log_b){
if (log_a < log_b)
{ // swap them
float tmp = log_a;
log_a = log_b;
log_b = tmp;
} else if (fast_exact_is_equal(log_a, log_b)) {
// Special case when log_a == log_b. In particular this works when both
// log_a and log_b are (+-) INFINITY: it will return (+-) INFINITY
// instead of NaN.
return LOG_2 + log_a;
}
float negative_absolute_difference = log_b - log_a;
if (negative_absolute_difference < MINUS_LOG_THRESHOLD)
return (log_a);
return (log_a + log1p(exp(negative_absolute_difference)));
}
/**
* \brief A model object. This model contains
* two elements: (1) p: the pointers
* to the paramters, and (2) n: the number
* of paramters that this model contains.
*
* Note that, to use PerNode and PerCore
* strategy, this object needs to have a copy
* constructor.
*
*/
class GLMModelExample{
public:
float * const p;
int n;
GLMModelExample(int _n):
n(_n), p(new float[_n]){}
GLMModelExample( const GLMModelExample& other ) :
n(other.n), p(new float[other.n]){
for(int i=0;i<n;i++){
p[i] = other.p[i];
}
}
};
/**
* \brief The function that takes input a series of models,
* and update one of them according to others. You
* need to register this one if you want to use
* PerNode and PerCore strategy.
*/
void f_lr_modelavg(GLMModelExample** const p_models, /**< set of models*/
int nreplicas, /**< number of models in the above set */
int ireplica /**< id of the model that needs updates*/
){
assert(false);
}
double f_lr_top1acc(const DenseVector<float>* const ex, GLMModelExample* const p_model){
double rs = 0.0;
for(int ibatch=0;ibatch<BATCHSIZE_TEST;ibatch++){
float * model = p_model->p;
int nfeature = ex->n-1;
int label = ex->p[ibatch*(nfeature+1)];
//if(label != 0 && label != 1){
// continue;
//}
//assert(label == small_class || label == large_class);
float * features = &ex->p[ibatch*(nfeature+1) + 1];
float logprobs[NCLASS];
float dot = 0.0;
float logsum = -1.00000;
float max = -1000;
float imax = -1;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * mi = &model[iclass * nfeature];
dot = dot_dense(features, mi, nfeature);
//std::cout << dot << std::endl;
//for(int i=0;i<nfeature;i++){
// dot += features[i] * mi[i];
//}
logprobs[iclass] = dot;
logsum = logadd(logsum, logprobs[iclass]);
}
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float prob = exp(logprobs[iclass] - logsum);
if(prob >= max){
max = prob;
imax = iclass;
}
}
if(imax == 0) imax = small_class;
else if(imax == 1) imax = large_class;
std::cout << "RESULT " << label << " -> " << imax << std::endl;
if(imax == label){
rs += 1.0;
}else{
rs += 0.0;
}
}
return rs;
}
double f_lr_top5acc(const DenseVector<float>* const ex, GLMModelExample* const p_model){
double rs = 0.0;
for(int ibatch=0;ibatch<BATCHSIZE_TEST;ibatch++){
float * model = p_model->p;
int nfeature = ex->n-1;
int label = ex->p[ibatch*(nfeature+1)];
if(label != 0 && label != 1){
continue;
}
float * features = &ex->p[ibatch*(nfeature+1) + 1];
float logprobs[NCLASS];
float dot = 0.0;
float logsum = -1.00000;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * mi = &model[iclass * nfeature];
dot = dot_dense(features, mi, nfeature);
//std::cout << dot << std::endl;
//for(int i=0;i<nfeature;i++){
// dot += features[i] * mi[i];
//}
logprobs[iclass] = dot;
logsum = logadd(logsum, logprobs[iclass]);
}
for(int i=0;i<5;i++){
float max = -1000;
int imax = -1;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float prob = exp(logprobs[iclass] - logsum);
if(prob >= max){
max = prob;
imax = iclass;
}
}
//std::cout << imax << std::endl;
if(imax == label){
rs += 1.0;
break;
}
logprobs[imax] = -10000;
}
}
return rs;
}
/**
* \brief One example of the function that can be register to
* Row-wise access (DW_ROW). This function takes as input
* one row of the data (ex), and the current model,
* returns the loss.
*/
double f_lr_loss(const DenseVector<float>* const ex, GLMModelExample* const p_model){
float * model = p_model->p;
int label = ex->p[0];
float * features = &ex->p[1];
int nfeature = ex->n-1;
float logprobs[NCLASS];
//std::cout << label << std::endl;
float dot = 0.0;
float logsum = -1.00000;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * mi = &model[iclass * nfeature];
dot = 0.0;
dot = dot_dense(features, mi, nfeature);
//std::cout << dot << std::endl;
//for(int i=0;i<nfeature;i++){
// dot += features[i] * mi[i];
//}
//std::cout << dot << std::endl;
logprobs[iclass] = dot;
logsum = logadd(logsum, logprobs[iclass]);
}
float logprob = logprobs[label] - logsum;
return -logprob;
}
/**
* \brief One example of the function that can be register to
* Row-wise access (DW_ROW). This function takes as input
* one row of the data (ex), and the current model (p_model),
* and update the model with the gradient.
*
*/
double f_lr_grad(const DenseVector<float>* const ex, GLMModelExample* const p_model){
float logprobs[BATCHSIZE][NCLASS];
float logsums[BATCHSIZE];
int nfeature = ex->n-1;
float oldloss = 0.0;
for(int ibatch=0;ibatch<BATCHSIZE;ibatch++){
float * model = p_model->p;
int label = ex->p[ibatch*(nfeature+1)];
//if(label != 0 && label != 1){
// continue;
//}
assert(label == small_class || label == large_class);
float * features = &ex->p[ibatch*(nfeature+1) + 1];
float dot = 0.0;
logsums[ibatch] = -100000.0;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * mi = &model[iclass * nfeature];
dot = 0.0;
//for(int i=0;i<nfeature;i++){
// dot += features[i] * mi[i];
//}
dot = dot_dense(features, mi, nfeature);
logprobs[ibatch][iclass] = dot;
logsums[ibatch] = logadd(logsums[ibatch], logprobs[ibatch][iclass]);
//std::cout << dot << " " << logsums[ibatch] << std::endl;
}
float max = -100000;
int imax = -1;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float prob = exp(logprobs[ibatch][iclass] - logsums[ibatch]);
if(prob >= max){
max = prob;
imax = iclass;
}
}
if(imax == 0) imax = small_class;
else if(imax == 1) imax = large_class;
//oldloss += -(logprobs[ibatch][label] - logsums[ibatch]);
oldloss += (imax != label);
}
for(int ibatch=0;ibatch<BATCHSIZE;ibatch++){
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * model = p_model->p;
float * mi = &model[iclass * nfeature];
int label = ex->p[ibatch*(nfeature+1)];
float isequal;// = (iclass == label);
if(iclass == 0){
isequal = (small_class == label);
}else{
isequal = (large_class == label);
}
int nfeature = ex->n-1;
float * features = &ex->p[ibatch*(nfeature+1) + 1];
float prob_over_sum = exp(logprobs[ibatch][iclass] - logsums[ibatch]);
//std::cout << label << " " << features[0] << " " << features[1] << " " << features[2] << std::endl;
for(int i=0;i<nfeature;i++){
mi[i] += STEPSIZE * (isequal * features[i] - features[i] * prob_over_sum - LAMBDA * mi[i]);
}
}
}
return oldloss;
}
double f_lr_grad_no_batch(const DenseVector<float>* const ex, GLMModelExample* const p_model){
float logprobs[BATCHSIZE][NCLASS];
float logsums[BATCHSIZE];
int nfeature = ex->n-1;
float oldloss = 0.0;
float dot;
float * model = p_model->p;
int label = ex->p[0];
//std::cout << label << std::endl;
assert(label == small_class || label == large_class);
float * features = &ex->p[1];
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * mi = &model[iclass * nfeature];
dot = 0.0;
dot = dot_dense(features, mi, nfeature);
logprobs[0][iclass] = dot;
logsums[0] = logadd(logsums[0], logprobs[0][iclass]);
}
float max = -100000;
int imax = -1;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float prob = exp(logprobs[0][iclass] - logsums[0]);
if(prob >= max){
max = prob;
imax = iclass;
}
}
if(imax == 0) imax = small_class;
else if(imax == 1) imax = large_class;
oldloss += (imax != label);
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * model = p_model->p;
float * mi = &model[iclass * nfeature];
int label = ex->p[0];
float isequal;
if(iclass == 0){
isequal = (small_class == label);
}else{
isequal = (large_class == label);
}
int nfeature = ex->n-1;
float * features = &ex->p[1];
float prob_over_sum = exp(logprobs[0][iclass] - logsums[0]);
for(int i=0;i<nfeature;i++){
mi[i] += STEPSIZE * (isequal * features[i] - features[i] * prob_over_sum - LAMBDA * mi[i]);
}
}
return oldloss;
}
/**
* \brief One example of the function that can be register to
* Row-wise access (DW_ROW). This function takes as input
* one row of the data (ex), and the current model (p_model),
* and update the model with the gradient.
*
*/
double f_lr_grad_old(const DenseVector<float>* const ex, GLMModelExample* const p_model){
float logprobs[BATCHSIZE][NCLASS];
float logsums[BATCHSIZE];
int nfeature = ex->n-1;
float oldloss = 0.0;
for(int ibatch=0;ibatch<BATCHSIZE;ibatch++){
float * model = p_model->p;
int label = ex->p[ibatch*(nfeature+1)];
if(label != 0 && label != 1){
continue;
}
float * features = &ex->p[ibatch*(nfeature+1) + 1];
float dot = 0.0;
logsums[ibatch] = -100000.0;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * mi = &model[iclass * nfeature];
dot = 0.0;
//for(int i=0;i<nfeature;i++){
// dot += features[i] * mi[i];
//}
dot = dot_dense(features, mi, nfeature);
logprobs[ibatch][iclass] = dot;
logsums[ibatch] = logadd(logsums[ibatch], logprobs[ibatch][iclass]);
}
float max = -100000;
int imax = -1;
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float prob = exp(logprobs[ibatch][iclass] - logsums[ibatch]);
if(prob >= max){
max = prob;
imax = iclass;
}
}
//oldloss += -(logprobs[ibatch][label] - logsums[ibatch]);
oldloss += (imax != label);
}
for(int ibatch=0;ibatch<BATCHSIZE;ibatch++){
for(int iclass = 0; iclass < NCLASS ; iclass ++){
float * model = p_model->p;
float * mi = &model[iclass * nfeature];
int label = ex->p[ibatch*(nfeature+1)];
float isequal = (iclass == label);
int nfeature = ex->n-1;
float * features = &ex->p[ibatch*(nfeature+1) + 1];
float prob_over_sum = exp(logprobs[ibatch][iclass] - logsums[ibatch]);
for(int i=0;i<nfeature;i++){
mi[i] += STEPSIZE * (isequal * features[i] - features[i] * prob_over_sum - LAMBDA * mi[i]);
}
}
}
return oldloss;
}
/**
* \brief One example main entry of how to use DimmWitted.
* The application is Stochastic Gradient Descent (SGD)
* with Row-wise Access.
*
* \tparam MODELREPL Model replication strategy.
* \tparam DATAREPL Data replication strategy.
*/
template<ModelReplType MODELREPL, DataReplType DATAREPL>
float test_glm_dense_sgd(long nfeat, long nexp, float * content, float * content2, long nexp_test, float * content_test){
long nbatches = 2 * nexp / BATCHSIZE;
std::cout << "NBATCHES = " << nbatches << std::endl;
float ** examples = new float* [nbatches]; // pointers to each row
for(long i=0;i<nbatches;i+=2){
examples[i] = &content[i*(nfeat+1)*BATCHSIZE/2];
examples[i+1] = &content2[i*(nfeat+1)*BATCHSIZE/2];
}
float ** examples_test = new float* [nexp_test];
for(int i=0;i<nexp_test;i++){
examples_test[i] = &content_test[i*(nfeat+1)];
}
GLMModelExample model(nfeat * NCLASS); // for each class, there is a vector
for(int i=0;i<model.n;i++){
model.p[i] = 0.0;
}
DenseDimmWitted<float, GLMModelExample, MODELREPL, DATAREPL, DW_ACCESS_ROW>
dw(examples, nbatches, nfeat+1, &model);
DenseDimmWitted<float, GLMModelExample, MODELREPL, DATAREPL, DW_ACCESS_ROW>
dw_test(examples_test, nexp_test, nfeat+1, &model);
unsigned int f_handle_grad = dw.register_row(f_lr_grad);
unsigned int f_handle_loss = dw.register_row(f_lr_loss);
unsigned int f_handle_acc = dw.register_row(f_lr_top1acc);
dw.register_model_avg(f_handle_grad, f_lr_modelavg);
dw.register_model_avg(f_handle_loss, f_lr_modelavg);
dw.register_model_avg(f_handle_acc, f_lr_modelavg);
dw.set_n_thread_per_node(1);
dw.set_n_numa_node(1);
dw_test.set_n_thread_per_node(1);
dw_test.set_n_numa_node(1);
unsigned int f_handle_acc_test = dw_test.register_row(f_lr_top1acc);
dw_test.register_model_avg(f_handle_acc_test, f_lr_modelavg);
unsigned int f_handle_acc5_test = dw_test.register_row(f_lr_top5acc);
dw_test.register_model_avg(f_handle_acc5_test, f_lr_modelavg);
float sum = 0.0;
for(int i_epoch=0;i_epoch<5000;i_epoch++){
/*
if(i_epoch % 5 == 0){
//float loss = dw.exec(f_handle_loss)/nexp;
//float acctop1 = dw.exec(f_handle_acc)/nexp;
float acctop1_test = dw_test.exec(f_handle_acc_test)/nexp_test;
float acctop5_test = dw_test.exec(f_handle_acc5_test)/nexp_test;
std::cout << "loss=" << loss << " acc(top1-test)=" << acctop1_test << " acc(top5-test)=" << acctop5_test << std::endl;
}else{
float acctop1_test = dw_test.exec(f_handle_acc_test)/nexp_test;
float acctop5_test = dw_test.exec(f_handle_acc5_test)/nexp_test;
std::cout << "------------- acc(top1-test)=" << acctop1_test << " acc(top5-test)=" << acctop5_test << std::endl;
}
*/
std::cout << "EPOCH: " << i_epoch << " STEPSIZE " << STEPSIZE << std::endl;
Timer t;
float oldloss = dw.exec(f_handle_grad);
float data_byte = 1.0 * sizeof(float) * 2*nexp * nfeat;
float te = t.elapsed();
float throughput_gb = data_byte / te / 1024 / 1024 / 1024;
//float acctop1 = dw.exec(f_handle_acc)/nexp/2;
float acctop1_test;
if(i_epoch % 10 == 0){
acctop1_test = dw_test.exec(f_handle_acc_test)/nexp_test;
}else{
acctop1_test = -1;
}
//float acctop5_test = dw_test.exec(f_handle_acc5_test)/nexp_test;
//std::cout << "old loss=" << oldloss << " acc(train)=" << acctop1 << " acc(top1-test)=" << acctop1_test << " acc(top5-test)=" << acctop5_test << std::endl;
std::cout << "old loss=" << oldloss << " acc(top1-test)=" << acctop1_test << std::endl;
std::cout << "TIME=" << te << " secs" << " THROUGHPUT=" << throughput_gb << " GB/sec." << std::endl;
//std::ofstream fout((std::string("snapshot_") + std::to_string(i_epoch)).c_str());
//for(long i=0;i<nfeat * NCLASS;i++){
// fout << model.p[i] << std::endl;
//}
//fout.close();
STEPSIZE = STEPSIZE * DECAY;
if(oldloss < 50){
break;
}
}
dw_test.exec(f_handle_acc_test);
// Return the sum of the model. This value should be
// around 1.3-1.4 for this example.
//
return sum;
}
float * mmap_open(int nfeature, char * filename, long &nelem){
int fdin;
float * src;
struct stat statbuf;
fdin = open (filename, O_RDONLY);
fstat (fdin,&statbuf);
nelem = statbuf.st_size / sizeof(float) / (nfeature + 1);
src = (float *) mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0);
return src;
}
int main(int argc, char** argv){
int nfeature = atof(argv[1]);
char * train_filename = argv[2];
long train_nelem = 1250;
float * train;
char * train_filenme2 = argv[3];
float * train2;
char * test_filename = argv[4];
long test_nelem = 100;
float * test;
//train = mmap_open(nfeature, train_filename, train_nelem);
//train2 = mmap_open(nfeature, train_filenme2, train_nelem);
/*
for(long i=0;i<train_nelem;i++){
assert(train[i*(nfeature+1)] == small_class || train[i*(nfeature+1)] == large_class);
assert(train2[i*(nfeature+1)] == small_class || train2[i*(nfeature+1)] == large_class);
}
*/
/*
for(long i=0;i<train_nelem;i++){
std::cout << i*(nfeature+1) << std::endl;
train[i*(nfeature+1)] = 0;
train2[i*(nfeature+1)] = 1;
}
*/
float * train_mm_1 = new float[(nfeature+1)*train_nelem];
float * train_mm_2 = new float[(nfeature+1)*train_nelem];
float * test_mm = new float[(nfeature+1)*test_nelem];
FILE * f1 = fopen(train_filename, "rb");
FILE * f2 = fopen(train_filenme2, "rb");
FILE * f3 = fopen(test_filename, "rb");
//std::ifstream fin1("db_NoPreprocess_train1250_class_10_4_features_plain.txt");
//std::ifstream fin2("db_NoPreprocess_val50_class_10_4_features_plain.txt");
std::cout << (nfeature+1)*train_nelem << std::endl;
std::cout << (nfeature+1)*test_nelem << std::endl;
/*
float tmpp;
for(long i=0;i<train_nelem;i++){
fin1 >> train_mm_1[i*(nfeature+1)] >> tmpp;
for(long j=0;j<nfeature;j++) fin1 >> train_mm_1[i*(nfeature+1)+j+1];
}
for(long i=0;i<train_nelem;i++){
fin1 >> train_mm_2[i*(nfeature+1)] >> tmpp;
for(long j=0;j<nfeature;j++) fin1 >> train_mm_2[i*(nfeature+1)+j+1];
}
for(long i=0;i<test_nelem;i++){
fin2 >> test_mm[i*(nfeature+1)] >> tmpp;
for(long j=0;j<nfeature;j++) fin2 >> test_mm[i*(nfeature+1)+j+1];
}
*/
long a = fread((char*) train_mm_1, sizeof(float), (nfeature+1)*train_nelem, f1);
long b = fread((char*) train_mm_2, sizeof(float), (nfeature+1)*train_nelem, f2);
long c = fread((char*) test_mm, sizeof(float), (nfeature+1)*test_nelem, f3);
std::cout << a << std::endl;
std::cout << c << std::endl;
assert(a == (nfeature+1)*train_nelem);
assert(b == (nfeature+1)*train_nelem);
assert(c == (nfeature+1)*test_nelem);
fclose(f1);
fclose(f2);
fclose(f3);
//small_class = train_mm_1[0] < train_mm_2[0] ? train_mm_1[0] : train_mm_2[0];
//large_class = train_mm_1[0] < train_mm_2[0] ? train_mm_2[0] : train_mm_1[0];
small_class = 0;
large_class = 1;
const long NN = train_nelem*(nfeature+1);
//munmap(train, train_nelem*(nfeature+1)*sizeof(float));
//munmap(train2, train_nelem*(nfeature+1)*sizeof(float));
/*
for(long i=0;i<train_nelem;i++){
train_mm_1[i*(nfeature+1)] = 0;
train_mm_2[i*(nfeature+1)] = 1;
}
*/
//test = mmap_open(nfeature, test_filename, test_nelem);
std::cout << "NEW" << std::endl;
//train_nelem = 2600;
std::cout << "# examples = " << train_nelem << std::endl;
std::cout << "# examples (test) = " << test_nelem << std::endl;
std::cout << "# features = " << nfeature << std::endl;
std::cout << "# nclass = " << NCLASS << std::endl;
std::cout << "# small_class = " << small_class << std::endl;
std::cout << "# large_class = " << large_class << std::endl;
float rs = test_glm_dense_sgd<DW_MODELREPL_PERMACHINE, DW_DATAREPL_SHARDING>(
nfeature, train_nelem, train_mm_1, train_mm_2, test_nelem, test_mm);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment