Created
August 4, 2016 20:11
-
-
Save EugeneLoy/92dbc3319271a1aa9ccd721d66a37d74 to your computer and use it in GitHub Desktop.
"ops" feature generation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clear ; close all; clc | |
% helpers used later --- | |
function [result] = measureError(X, y, theta) | |
result = linearRegCostFunction(X, y, theta, 0); | |
endfunction | |
% try different d and return errrors on cv set | |
function [error_val] = inferD(X_to_map, y_training, y_cv, ds, mapper) | |
error_val = zeros(columns(ds), 1); | |
for i = 1:columns(ds) | |
d = ds(i); | |
X = mapper(X_to_map, d); | |
X_cv = [ones(200, 1), X(1:200, :)]; | |
X_test = [ones(200, 1), X(201:400, :)]; | |
X_training = [ones(600, 1), X(401:end, :)]; | |
theta = trainLinearReg(X_training, y_training, 0); % lambda = 0: good idea? | |
error_val(i) = measureError(X_cv, y_cv, theta); | |
endfor | |
endfunction | |
% try different lambdas and return errrors on training and cv sets | |
function [error_train, error_val] = inferLambda(X_training, y_training, X_cv, y_cv, lambdas) | |
error_train = zeros(rows(lambdas), 12); | |
error_val = zeros(rows(lambdas), 12); | |
for l = 1:rows(lambdas) | |
lambda = lambdas(l); | |
for sample = 1:12 | |
i = 1 + (sample - 1) * 50; | |
X_training_i = X_training(1:i, :); | |
y_i = y_training(1:i, :); | |
theta = trainLinearReg(X_training_i, y_i, lambda); | |
error_train(l, sample) = linearRegCostFunction(X_training_i, y_i, theta, 0); | |
error_val(l, sample) = linearRegCostFunction(X_cv, y_cv, theta, 0); | |
endfor | |
endfor | |
endfunction | |
function plotLearningCurve(error_train, error_val) | |
plot([1, 51:50:551], error_train, [1, 51:50:551], error_val); | |
text([1, 51:50:551], error_val, num2str(error_val'), "rotation", 90); | |
legend('Train', 'Cross Validation'); | |
endfunction | |
% Poly map features. For features [x1, x2] and d=2 this will produce: | |
% [x1, x2, x1*x1, x1*x2, x2*x1, x2*x2] | |
function result = mapPoly(features, d) | |
result = features; | |
prev = features; | |
for _ = 2:d | |
current = []; | |
for i = 1:columns(features) | |
current = [current, prev .* features(:, i)]; | |
endfor | |
prev = current; | |
result = [result, current]; | |
endfor | |
endfunction | |
% Map features with binary operations. | |
% For features [x1, x2], ops [op1, op2] and d=2 this will produce: | |
% [x1, x2, op1(x1,x1), op1(x1,x2), op1(x2,x1), op1(x2,x2), ... | |
% op2(x1,x1), op2(x1,x2), op2(x2,x1), op2(x2,x2)] | |
function result = mapOps(features, d, ops) | |
result = features; | |
prev = features; | |
for _ = 2:d | |
current = []; | |
for i = 1:columns(features) | |
for o = 1:rows(ops) | |
current = [current, inline(ops(o, :))(prev, features(:, i))]; | |
endfor | |
endfor | |
prev = current; | |
result = [result, current]; | |
endfor | |
endfunction | |
% initial data --- | |
m = 1000; % dataset size | |
rand("seed", 42) | |
randn("seed", 314); % to rule-out uncertainty between runs | |
v = rand(m, 1); % voltage (V) | |
r = rand(m, 1); % resistance (R) | |
y = v ./ r; % current (I = V/R) | |
y = y .+ (y .* randn(1000, 1) ./ 10); % simulate measurement error | |
y_cv = y(1:200); | |
y_test = y(201:400); | |
y_training = y(401:end); | |
% linear regression on raw features --- | |
X_raw = [v, r]; | |
X_raw_cv = [ones(200, 1), X_raw(1:200, :)]; | |
X_raw_test = [ones(200, 1), X_raw(201:400, :)]; | |
X_raw_training = [ones(600, 1), X_raw(401:end, :)]; | |
lambda_raw = 0; % high bias case, no need for reguralization | |
theta_raw = trainLinearReg(X_raw_training, y_training, lambda_raw); | |
error_raw = measureError(X_raw_test, y_test, theta_raw); | |
% linear regression on polynomial features --- | |
d_poly = 8; % inferred | |
X_poly = mapPoly([v, r], d_poly); | |
X_poly_cv = [ones(200, 1), X_poly(1:200, :)]; | |
X_poly_test = [ones(200, 1), X_poly(201:400, :)]; | |
X_poly_training = [ones(600, 1), X_poly(401:end, :)]; | |
lambda_poly = 0; % inferred | |
% this is the code used to infer d and lambda: | |
%error_val = inferD([v, r], y_training, y_cv, 1:9, ... | |
% @(X_to_map, d) mapPoly(X_to_map, d)); | |
%bar(1:9, error_val); break; | |
%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]'; | |
%[error_train, error_val] = inferLambda(X_poly_training, y_training, X_poly_cv, y_cv, lambdas); | |
%plotLearningCurve(error_train(1, :), error_val(1, :)); break;; | |
theta_poly = trainLinearReg(X_poly_training, y_training, lambda_poly); | |
error_poly = measureError(X_poly_test, y_test, theta_poly); | |
% linear regression on ops-mapped features --- | |
d_ops = 2; % inferred | |
X_ops = mapOps([v, r], d_ops, ["a .* b"; "a ./ b"; "a .^ b"]); | |
X_ops_cv = [ones(200, 1), X_ops(1:200, :)]; | |
X_ops_test = [ones(200, 1), X_ops(201:400, :)]; | |
X_ops_training = [ones(600, 1), X_ops(401:end, :)]; | |
lambda_ops = 0; % inferred | |
% this is the code used to infer d and lambda: | |
%error_val = inferD([v, r], y_training, y_cv, 1:5, ... | |
% @(X_to_map, d) mapOps(X_to_map, d, ["a .* b"; "a ./ b"; "a .^ b"])); | |
%bar(1:5, error_val); break; | |
%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]'; | |
%[error_train, error_val] = inferLambda(X_ops_training, y_training, X_ops_cv, y_cv, lambdas); | |
%plotLearningCurve(error_train(1, :), error_val(1, :)); break;; | |
theta_ops = trainLinearReg(X_ops_training, y_training, lambda_ops); | |
error_ops = measureError(X_ops_test, y_test, theta_ops); | |
% 17.69321 raw | |
% 10.13354 poly (1.7460 times smaller than raw) | |
% 0.26357 ops (38.447 times smaller than poly) | |
bar([error_raw; error_poly; error_ops]) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clear ; close all; clc | |
% helpers used later --- | |
function [result] = measureError(X, y, theta) | |
result = linearRegCostFunction(X, y, theta, 0); | |
endfunction | |
% try different d and return errrors on cv set | |
function [error_val] = inferD(X_to_map, y_training, y_cv, ds, mapper) | |
error_val = zeros(columns(ds), 1); | |
for i = 1:columns(ds) | |
d = ds(i); | |
X = mapper(X_to_map, d); | |
X_cv = [ones(200, 1), X(1:200, :)]; | |
X_test = [ones(200, 1), X(201:400, :)]; | |
X_training = [ones(600, 1), X(401:end, :)]; | |
theta = trainLinearReg(X_training, y_training, 0); % lambda = 0: good idea? | |
error_val(i) = measureError(X_cv, y_cv, theta); | |
endfor | |
endfunction | |
% try different lambdas and return errrors on training and cv sets | |
function [error_train, error_val] = inferLambda(X_training, y_training, X_cv, y_cv, lambdas) | |
error_train = zeros(rows(lambdas), 12); | |
error_val = zeros(rows(lambdas), 12); | |
for l = 1:rows(lambdas) | |
lambda = lambdas(l); | |
for sample = 1:12 | |
i = 1 + (sample - 1) * 50; | |
X_training_i = X_training(1:i, :); | |
y_i = y_training(1:i, :); | |
theta = trainLinearReg(X_training_i, y_i, lambda); | |
error_train(l, sample) = linearRegCostFunction(X_training_i, y_i, theta, 0); | |
error_val(l, sample) = linearRegCostFunction(X_cv, y_cv, theta, 0); | |
endfor | |
endfor | |
endfunction | |
function plotLearningCurve(error_train, error_val) | |
plot([1, 51:50:551], error_train, [1, 51:50:551], error_val); | |
text([1, 51:50:551], error_val, num2str(error_val'), "rotation", 90); | |
legend('Train', 'Cross Validation'); | |
endfunction | |
% Poly map features. For features [x1, x2] and d=2 this will produce: | |
% [x1, x2, x1*x1, x1*x2, x2*x1, x2*x2] | |
function result = mapPoly(features, d) | |
result = features; | |
prev = features; | |
for _ = 2:d | |
current = []; | |
for i = 1:columns(features) | |
current = [current, prev .* features(:, i)]; | |
endfor | |
prev = current; | |
result = [result, current]; | |
endfor | |
endfunction | |
% Map features with binary operations. | |
% For features [x1, x2], ops [op1, op2] and d=2 this will produce: | |
% [x1, x2, op1(x1,x1), op1(x1,x2), op1(x2,x1), op1(x2,x2), ... | |
% op2(x1,x1), op2(x1,x2), op2(x2,x1), op2(x2,x2)] | |
function result = mapOps(features, d, ops) | |
result = features; | |
prev = features; | |
for _ = 2:d | |
current = []; | |
for i = 1:columns(features) | |
for o = 1:rows(ops) | |
current = [current, inline(ops(o, :))(prev, features(:, i))]; | |
endfor | |
endfor | |
prev = current; | |
result = [result, current]; | |
endfor | |
endfunction | |
% initial data --- | |
m = 1000; % dataset size | |
rand("seed", 42); | |
randn("seed", 314); % to rule-out uncertainty between runs | |
m1 = rand(m, 1); % first mass (m1) | |
m2 = rand(m, 1); % second mass (m2) | |
r = rand(m, 1); % distance (r) | |
y = 6.674e-11 .* ((m1 .* m2) ./ (r .^ 2)); % force (F = G * ((m1 * m2) / r^2))) | |
y = y .+ (y .* randn(1000, 1) ./ 10); % simulate measurement error | |
y_cv = y(1:200); | |
y_test = y(201:400); | |
y_training = y(401:end); | |
% linear regression on raw features --- | |
X_raw = [m1, m2, r]; | |
X_raw_cv = [ones(200, 1), X_raw(1:200, :)]; | |
X_raw_test = [ones(200, 1), X_raw(201:400, :)]; | |
X_raw_training = [ones(600, 1), X_raw(401:end, :)]; | |
lambda_raw = 0; % high bias case, no need for reguralization | |
theta_raw = trainLinearReg(X_raw_training, y_training, lambda_raw); | |
error_raw = measureError(X_raw_test, y_test, theta_raw); | |
% linear regression on polynomial features --- | |
d_poly = 5; % inferred | |
X_poly = mapPoly([m1, m2, r], d_poly); | |
X_poly_cv = [ones(200, 1), X_poly(1:200, :)]; | |
X_poly_test = [ones(200, 1), X_poly(201:400, :)]; | |
X_poly_training = [ones(600, 1), X_poly(401:end, :)]; | |
lambda_poly = 0; % inferred | |
% this is the code used to infer d and lambda: | |
%error_val = inferD([m1, m2, r], y_training, y_cv, 1:9, ... | |
% @(X_to_map, d) mapPoly(X_to_map, d)); | |
%bar(1:9, error_val); break; | |
%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]'; | |
%[error_train, error_val] = inferLambda(X_poly_training, y_training, X_poly_cv, y_cv, lambdas); | |
%plotLearningCurve(error_train(1, :), error_val(1, :)); break;; | |
theta_poly = trainLinearReg(X_poly_training, y_training, lambda_poly); | |
error_poly = measureError(X_poly_test, y_test, theta_poly); | |
% linear regression on ops-mapped features --- | |
d_ops = 3; % inferred | |
X_ops = mapOps([m1, m2, r], d_ops, ["a .* b"; "a ./ b"; "a .^ b"]); | |
X_ops_cv = [ones(200, 1), X_ops(1:200, :)]; | |
X_ops_test = [ones(200, 1), X_ops(201:400, :)]; | |
X_ops_training = [ones(600, 1), X_ops(401:end, :)]; | |
lambda_ops = 0; % inferred | |
% this is the code used to infer d and lambda: | |
%error_val = inferD([m1, m2, r], y_training, y_cv, 1:5, ... | |
% @(X_to_map, d) mapOps(X_to_map, d, ["a .* b"; "a ./ b"; "a .^ b"])); | |
%bar(1:5, error_val); break; | |
%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]'; | |
%[error_train, error_val] = inferLambda(X_ops_training, y_training, X_ops_cv, y_cv, lambdas); | |
%plotLearningCurve(error_train(1, :), error_val(1, :)); break;; | |
theta_ops = trainLinearReg(X_ops_training, y_training, lambda_ops); | |
error_ops = measureError(X_ops_test, y_test, theta_ops); | |
% 4.3083e-016 raw | |
% 4.1347e-016 poly | |
% 1.6780e-017 ops (24.641 times smaller than poly) | |
bar([error_raw; error_poly; error_ops]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment