Created
June 10, 2023 13:49
-
-
Save ialexpovad/c10c7e35af2f8fed061643aeccea8089 to your computer and use it in GitHub Desktop.
MLE of parameters in 2-class logistic regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <iostream> | |
#include <cstdlib> | |
#include <algorithm> | |
#include <fstream> | |
#include <numeric> | |
#include <string> | |
#include <Eigen/Dense> | |
using namespace Eigen; | |
using namespace std; | |
void update(VectorXd &theta, VectorXd &theta_new, MatrixXd &X, VectorXd &Y, double rate, int j); | |
int main(int argc, const char * argv[]) { | |
ifstream myfile; | |
string file; | |
double diff_value; | |
double rate; // learning rate, or step size | |
double tol = 1e-6; // tolerence, or stopping criterion | |
bool flag = 1; | |
int N, M; // input data dimension: N row * M column | |
int sliding_window = 10;// size of sliding window | |
int max_iter = 1000; // maximum number of iterations | |
int num = 0; // number of iterations | |
// arguments | |
file = argv[1]; | |
N = atoi(argv[2]); | |
M = atoi(argv[3]); | |
if (argc==5) tol = atof(argv[4]); | |
rate = 10.0 / N; | |
myfile.open(file.c_str()); | |
while(!myfile.is_open()) { | |
myfile.clear(); | |
cout << "Incorrect filename: " << file << endl; | |
abort(); | |
} | |
// Create variables | |
MatrixXd X(N,M-1); | |
VectorXd Y(N); | |
VectorXd theta(M-1); // estimated coefficients | |
VectorXd theta_new(M-1); | |
VectorXd P(N); // probabilities | |
VectorXd SE(M-1); // standard errors of estimated coefficients | |
vector<double> diff; // vector of differences of updated and previous theta | |
// read data into X and Y | |
for(int i = 0; i < N; i++) { | |
for(int j = 0; j < M; j++ ) { | |
if (j==0) myfile >> Y(i); | |
else myfile >> X(i,j-1); | |
} | |
} | |
// Coordinate Descent | |
while (flag) { | |
num++; | |
if(num >= max_iter) { | |
cerr << "Algorithm cannot converge within " << max_iter << " iterations" << endl; | |
return -1; | |
} | |
for (int j = 0; j < theta.size(); j++) { | |
theta = theta_new; | |
update(theta, theta_new, X, Y, rate, j); | |
// compute l2 norm of difference of updated and previous theta | |
diff_value = 0; | |
for (int j = 0; j < theta.size(); j++) { | |
diff_value += pow((theta_new(j) - theta(j)), 2); | |
} | |
diff_value = sqrt(diff_value); | |
// store l2 norm into the sliding window | |
diff.push_back(diff_value); | |
if (diff.size() > sliding_window) diff.erase(diff.begin(),diff.begin()+diff.size()-sliding_window); | |
cout << "moving average of differences is: " << accumulate(diff.begin(), diff.end(), 0.0) / diff.size() << endl; | |
// stop SGD if the average of l2 norms in the sliding window is samller than tolerance | |
if (accumulate(diff.begin(), diff.end(), 0.0) / diff.size() < tol) { | |
flag = 0; | |
break; | |
} | |
// print theta | |
cout << "theta is: "; | |
for (int j = 0; j < theta.size(); j++) { | |
cout << theta_new(j) << " "; | |
} | |
cout << endl; | |
} | |
// decrease learning rate | |
rate /= 1 + num / double(N); | |
} | |
cout << endl; | |
// compute standard error of the coefficients | |
if (N<=1000) { | |
for (int i = 0; i < X.rows(); i++) { | |
double sum = X.row(i) * theta; | |
double p = 1 / (1 + exp(-sum)); | |
P(i) = p*(1-p); | |
} | |
MatrixXd W(N,N); // weights matrix | |
W = P.asDiagonal(); | |
SE = (X.transpose() * W * X).inverse().diagonal().array().sqrt(); | |
} | |
// print number of iterations | |
cout << "number of iterations is: " << num << endl; | |
// print theta | |
cout << "theta is: "; | |
for (int j = 0; j < theta.size(); j++) { | |
cout << theta_new(j) << " "; | |
} | |
cout << endl; | |
// print SE | |
if (N<=1000) { | |
cout << "standard error is: "; | |
for (int j = 0; j < theta.size(); j++) { | |
cout << SE(j) << " "; | |
} | |
cout << endl; | |
} | |
return 0; | |
} | |
// update one coordinate/parameter at a time using whole data | |
void update(VectorXd &theta, VectorXd &theta_new, MatrixXd &X, VectorXd &Y, double rate, int j) { | |
double gradient = 0; | |
for (int i = 0; i < X.rows(); i++) { | |
double sum = X.row(i) * theta; | |
double p = 1 / (1 + exp(-sum)); | |
double error = Y(i) - p; | |
gradient += X(i,j) * error; | |
} | |
theta_new(j) = theta(j) + rate * gradient; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment