Created
December 25, 2016 21:09
-
-
Save caiorss/f29f7924b1046f5dc46f50eb98aa5f7a to your computer and use it in GitHub Desktop.
matlab
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% CODED BY GALINA STRELTSOVA | |
% | |
% REGRESSION ANALYSIS OF DATA USING 2 DIFFERENT IMPLEMENTATIONS OF LINEAR | |
% REGRESSION, PRINCIPAL COMPONENT ANALYSIS DIMENSIONALITY REDUCTION, | |
% PRINCIPAL COMPONENT ANALYSIS REGRESSION AND PARTIAL LEAST SQUARES | |
% REGRESSION. THESE METHODS ARE COMPARED USING PLOTS, WHICH REPRESENT | |
% CORRESPONDING RESIDUALS OF THE DATA TO FIND OUT WHICH TYPE OF REGRESSION | |
% WORKS BETTER FOR US. ANALYSIS IS PERFORMED USING MOST POPULAR AND COMMON | |
% ALGORITHMS OF REGRESSION | |
% | |
% OBSERVED TYPES OF VARIALBES | |
% X1 Relative Compactness | |
% X2 Surface Area | |
% X3 Wall Area | |
% X4 Roof Area | |
% X5 Overall Height | |
% X6 Orientation | |
% X7 Glazing Area | |
% X8 Glazing Area Distribution | |
% y1 Heating Load | |
% y2 Cooling Load | |
sourceData = xlsread('ENB2012_data.xlsx'); | |
origX = sourceData(:,1:8); | |
origY = sourceData(:,8:9); | |
%% ====== MANUAL LINEAR REGRESSION IMPLEMENTATION SECTION BEGIN ====== | |
% MAKING PREDICTIONS BASED ON RELATIVE COMPACTNESS ABOUT COOLING LOAD | |
% plot data | |
figure; | |
plot(origX(:,1), origY(:,2), 'rx', 'MarkerSize', 10); | |
xlabel('Relative Compactness'); | |
ylabel('Cooling Load'); | |
legend('Training Data'); | |
title('Linear Regression Predicting Cooling Load'); | |
% prepare variables we'll need | |
m = size(origX,1); | |
% make predictions based on model y = theta1*x1 + theta2*(x1^2) + | |
% theta3*(x1^3) | |
X = [ones(m,1), origX(:,1), origX(:,1).^2, origX(:,1).^3]; | |
y = origY(:,2); | |
theta = zeros(4,1); | |
% print initial cost of theta parameters | |
fprintf('Initial Cost\n'); | |
[cost,grad] = computeCost(X, y, theta); | |
disp(cost); | |
% prepare variables for gradient descent | |
J_history = zeros(m,1); | |
alpha = 0.5; | |
iter = 5; | |
% perform gradient descent | |
[theta, J_history] = gradientDescent(X, y, theta, alpha, iter); | |
% plot line of linear regression | |
hold on; | |
line(X(:,2), X*theta); | |
% find errors between predicted response variable and real variable | |
yfitLin = X*theta; | |
residuals = y - yfitLin; | |
% plot redsiduals | |
figure; | |
stem(residuals); | |
xlabel('Observation'); | |
ylabel('Residual'); | |
title('Residuals of Observations for Manually Computed Linear Regression'); | |
% display found data | |
fprintf('Theta found by gradient descent: '); | |
fprintf('%f %f %f \n', theta(1), theta(2), theta(3)); | |
%% ====== LIBRARY LINEAR REGRESSION IMPLEMENTATION SECTION BEGIN ====== | |
% MAKING PREDICTIONS ABOUT RELATIVE COMPACTNESS BASED ON ALL VARIABLES | |
% fit linear regression model | |
mdl = fitlm(origX, y); | |
% make predictions based on found beta value | |
yfitLin2 = predict(mdl, origX); | |
% find errors between predicted response variable and real variable | |
residuals2 = y - yfitLin2; | |
% plot residuals | |
figure; | |
stem(residuals2); | |
xlabel('Observation'); | |
ylabel('Residual'); | |
title('Residuals of Observations for Library Linear Regression'); | |
%% ====== PRINCIPAL COMPONENT ANALYSIS SECTION BEGIN ====== | |
% PROJECTING DATA ONTO 2-DIMENSIONAL SPACE | |
% plot data in 3D space | |
figure; | |
scatter3(origX(:,1), origX(:,2), origY(:,2), 'ro'); | |
title('Data Represented in 3D Space'); | |
X = origX; | |
% perform pca and find eigenvalues in vector U | |
[U S] = pcaMan(X); | |
% project data onto variables Z and choose first 2 columns to work with | |
Z = projectData(X,U,2); | |
figure; | |
scatter(Z(:,1), y); | |
xlabel('Projected Data'); | |
ylabel('Response Variable'); | |
title('Data Projected onto 2D Space'); | |
%% ====== PARTIAL LEAST SQUARES REGRESSION SECTION BEGIN ====== | |
% MAKING PREDICTIONS BASED ON THE PRINCIPAL COMPONENTS WHICH EXPLAIN THE | |
% MAJORITY OF VARIANCE | |
% loadings are coefficients in linear combination predicting a variable by | |
% the (standardized) components | |
% scores are values of matrix transformed to fit lower dimensional space | |
% performing pls regression | |
[Xloadings,Yloadings,Xscores,Yscores,betaPLS,PLSPctVar] = plsregress(X, y); | |
% plot amount of variance explained | |
figure; | |
plot(1:8,cumsum(100*PLSPctVar(2,:)),'-bo'); | |
xlabel('Amount of Principal Components'); | |
ylabel('Amount of Variance Explained'); | |
title('Explained Variance'); | |
% make predictions based on found beta value | |
yfitPLSR = [ones(size(X,1),1) X]*betaPLS; | |
% find errors between predicted response variable and real variable | |
residuals3 = yfitPLSR - y; | |
% plot residuals | |
figure; | |
stem(residuals3); | |
xlabel('Observation'); | |
ylabel('Residual'); | |
title('Residuals of Observations for PLS Regression'); | |
%% ====== PRINCIPAL COMPONENT ANALYSIS REGRESSION SECTION BEGIN ====== | |
[PCALoadings,PCAScores,PCAVar] = pca(origX); | |
betaPCR = regress(y-mean(y), PCAScores(:,1:2)); | |
betaPCR = PCALoadings(:,1:2)*betaPCR; | |
betaPCR = [mean(y) - mean(X)*betaPCR; betaPCR]; | |
n = length(X); | |
yfitPCR = [ones(n,1) X]*betaPCR; | |
residuals4 = y - yfitPCR; | |
% plot residuals | |
figure; | |
stem(residuals3); | |
xlabel('Observation'); | |
ylabel('Residual'); | |
title('Residuals of Observations for PCA Regression'); | |
% compare amount of variance explained by PCA regression and PLS regression | |
plot(1:8,cumsum(100*PLSPctVar(2,:)),'-bo', 1:8, 100*cumsum(PCAVar(1:8))/sum(PCAVar(1:8)),'r-^'); | |
xlabel('Number of Principal Components'); | |
ylabel('Percent Variance Explained in X'); | |
legend({'PLSR' 'PCR'},'location','SE'); | |
title('Explained Variance'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment