EugeneLoy · August 4, 2016 20:11
diff --git a/features.m b/features.m

 clear ; close all; clc

 % helpers used later ---

 function [result] = measureError(X, y, theta)
  result = linearRegCostFunction(X, y, theta, 0);
 endfunction


 % try different d and return errrors on cv set
 function [error_val] = inferD(X_to_map, y_training, y_cv, ds, mapper)
  error_val = zeros(columns(ds), 1);
  for i = 1:columns(ds)
    d = ds(i);
    X = mapper(X_to_map, d);
    X_cv = [ones(200, 1), X(1:200, :)];
    X_test = [ones(200, 1), X(201:400, :)];
    X_training = [ones(600, 1), X(401:end, :)];
    
    theta = trainLinearReg(X_training, y_training, 0); % lambda = 0: good idea?
    error_val(i) = measureError(X_cv, y_cv, theta);
  endfor

 endfunction

 % try different lambdas and return errrors on training and cv sets
 function [error_train, error_val] = inferLambda(X_training, y_training, X_cv, y_cv, lambdas)

  error_train = zeros(rows(lambdas), 12);
  error_val = zeros(rows(lambdas), 12);

  for l = 1:rows(lambdas)
    lambda = lambdas(l);
    for sample = 1:12
      i = 1 + (sample - 1) * 50;
      X_training_i = X_training(1:i, :);
      y_i = y_training(1:i, :);
      theta = trainLinearReg(X_training_i, y_i, lambda);
      error_train(l, sample) = linearRegCostFunction(X_training_i, y_i, theta, 0);
      error_val(l, sample) = linearRegCostFunction(X_cv, y_cv, theta, 0);
    endfor

  endfor

 endfunction


 function plotLearningCurve(error_train, error_val)
  plot([1, 51:50:551], error_train, [1, 51:50:551], error_val);
  text([1, 51:50:551], error_val, num2str(error_val'), "rotation", 90);
  legend('Train', 'Cross Validation');
 endfunction


 % Poly map features. For features [x1, x2] and d=2 this will produce:
 % [x1, x2, x1*x1, x1*x2, x2*x1, x2*x2] 
 function result = mapPoly(features, d)
  
  result = features;
  prev = features;
  
  for _ = 2:d
    current = [];
    for i = 1:columns(features)
      current = [current, prev .* features(:, i)];    
    endfor
    prev = current;
    result = [result, current];

  endfor

 endfunction

 % Map features with binary operations.
 % For features [x1, x2], ops [op1, op2] and d=2 this will produce:
 % [x1, x2, op1(x1,x1), op1(x1,x2), op1(x2,x1), op1(x2,x2), ...
 %   op2(x1,x1), op2(x1,x2), op2(x2,x1), op2(x2,x2)] 
 function result = mapOps(features, d, ops)
  
  result = features;
  prev = features;
  
  for _ = 2:d
    current = [];
    for i = 1:columns(features)
      for o = 1:rows(ops)
        current = [current, inline(ops(o, :))(prev, features(:, i))];
      endfor 
    endfor
    prev = current;
    result = [result, current];

  endfor

 endfunction


 % initial data ---

 m = 1000; % dataset size
 rand("seed", 42)
 randn("seed", 314); % to rule-out uncertainty between runs
 v = rand(m, 1); % voltage (V)
 r = rand(m, 1); % resistance (R)

 y = v ./ r; % current (I = V/R)
 y = y .+ (y .* randn(1000, 1) ./ 10); % simulate measurement error

 y_cv = y(1:200);
 y_test = y(201:400);
 y_training = y(401:end);


 % linear regression on raw features ---

 X_raw = [v, r];
 X_raw_cv = [ones(200, 1), X_raw(1:200, :)];
 X_raw_test = [ones(200, 1), X_raw(201:400, :)];
 X_raw_training = [ones(600, 1), X_raw(401:end, :)];
 lambda_raw = 0; % high bias case, no need for reguralization

 theta_raw = trainLinearReg(X_raw_training, y_training, lambda_raw);

 error_raw = measureError(X_raw_test, y_test, theta_raw);


 % linear regression on polynomial features  ---

 d_poly = 8; % inferred
 X_poly = mapPoly([v, r], d_poly);
 X_poly_cv = [ones(200, 1), X_poly(1:200, :)];
 X_poly_test = [ones(200, 1), X_poly(201:400, :)];
 X_poly_training = [ones(600, 1), X_poly(401:end, :)];
 lambda_poly = 0; % inferred

 % this is the code used to infer d and lambda:

 %error_val = inferD([v, r], y_training, y_cv, 1:9, ...
 %  @(X_to_map, d) mapPoly(X_to_map, d));
 %bar(1:9, error_val); break;

 %lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
 %[error_train, error_val] = inferLambda(X_poly_training, y_training, X_poly_cv, y_cv, lambdas);
 %plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


 theta_poly = trainLinearReg(X_poly_training, y_training, lambda_poly);

 error_poly = measureError(X_poly_test, y_test, theta_poly);

 % linear regression on ops-mapped features  ---

 d_ops = 2; % inferred
 X_ops = mapOps([v, r], d_ops, ["a .* b"; "a ./ b"; "a .^ b"]);
 X_ops_cv = [ones(200, 1), X_ops(1:200, :)];
 X_ops_test = [ones(200, 1), X_ops(201:400, :)];
 X_ops_training = [ones(600, 1), X_ops(401:end, :)];
 lambda_ops = 0; % inferred

 % this is the code used to infer d and lambda:

 %error_val = inferD([v, r], y_training, y_cv, 1:5, ...
 %  @(X_to_map, d) mapOps(X_to_map, d, ["a .* b"; "a ./ b"; "a .^ b"]));
 %bar(1:5, error_val); break;

 %lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
 %[error_train, error_val] = inferLambda(X_ops_training, y_training, X_ops_cv, y_cv, lambdas);
 %plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


 theta_ops = trainLinearReg(X_ops_training, y_training, lambda_ops);

 error_ops = measureError(X_ops_test, y_test, theta_ops);

 %   17.69321 raw
 %   10.13354 poly (1.7460 times smaller than raw)
 %    0.26357 ops (38.447 times smaller than poly)
 bar([error_raw; error_poly; error_ops])




diff --git a/features2.m b/features2.m

 clear ; close all; clc

 % helpers used later ---

 function [result] = measureError(X, y, theta)
  result = linearRegCostFunction(X, y, theta, 0);
 endfunction


 % try different d and return errrors on cv set
 function [error_val] = inferD(X_to_map, y_training, y_cv, ds, mapper)
  error_val = zeros(columns(ds), 1);
  for i = 1:columns(ds)
    d = ds(i);
    X = mapper(X_to_map, d);
    X_cv = [ones(200, 1), X(1:200, :)];
    X_test = [ones(200, 1), X(201:400, :)];
    X_training = [ones(600, 1), X(401:end, :)];
    
    theta = trainLinearReg(X_training, y_training, 0); % lambda = 0: good idea?
    error_val(i) = measureError(X_cv, y_cv, theta);
  endfor

 endfunction

 % try different lambdas and return errrors on training and cv sets
 function [error_train, error_val] = inferLambda(X_training, y_training, X_cv, y_cv, lambdas)

  error_train = zeros(rows(lambdas), 12);
  error_val = zeros(rows(lambdas), 12);

  for l = 1:rows(lambdas)
    lambda = lambdas(l);
    for sample = 1:12
      i = 1 + (sample - 1) * 50;
      X_training_i = X_training(1:i, :);
      y_i = y_training(1:i, :);
      theta = trainLinearReg(X_training_i, y_i, lambda);
      error_train(l, sample) = linearRegCostFunction(X_training_i, y_i, theta, 0);
      error_val(l, sample) = linearRegCostFunction(X_cv, y_cv, theta, 0);
    endfor

  endfor

 endfunction


 function plotLearningCurve(error_train, error_val)
  plot([1, 51:50:551], error_train, [1, 51:50:551], error_val);
  text([1, 51:50:551], error_val, num2str(error_val'), "rotation", 90);
  legend('Train', 'Cross Validation');
 endfunction


 % Poly map features. For features [x1, x2] and d=2 this will produce:
 % [x1, x2, x1*x1, x1*x2, x2*x1, x2*x2] 
 function result = mapPoly(features, d)
  
  result = features;
  prev = features;
  
  for _ = 2:d
    current = [];
    for i = 1:columns(features)
      current = [current, prev .* features(:, i)];    
    endfor
    prev = current;
    result = [result, current];

  endfor

 endfunction

 % Map features with binary operations.
 % For features [x1, x2], ops [op1, op2] and d=2 this will produce:
 % [x1, x2, op1(x1,x1), op1(x1,x2), op1(x2,x1), op1(x2,x2), ...
 %   op2(x1,x1), op2(x1,x2), op2(x2,x1), op2(x2,x2)] 
 function result = mapOps(features, d, ops)
  
  result = features;
  prev = features;
  
  for _ = 2:d
    current = [];
    for i = 1:columns(features)
      for o = 1:rows(ops)
        current = [current, inline(ops(o, :))(prev, features(:, i))];
      endfor 
    endfor
    prev = current;
    result = [result, current];

  endfor

 endfunction


 % initial data ---

 m = 1000; % dataset size
 rand("seed", 42);
 randn("seed", 314); % to rule-out uncertainty between runs
 m1 = rand(m, 1); % first mass (m1)
 m2 = rand(m, 1); % second mass (m2)
 r = rand(m, 1); % distance (r)

 y = 6.674e-11 .* ((m1 .* m2) ./ (r .^ 2)); % force (F = G * ((m1 * m2) / r^2)))
 y = y .+ (y .* randn(1000, 1) ./ 10); % simulate measurement error

 y_cv = y(1:200);
 y_test = y(201:400);
 y_training = y(401:end);


 % linear regression on raw features ---

 X_raw = [m1, m2, r];
 X_raw_cv = [ones(200, 1), X_raw(1:200, :)];
 X_raw_test = [ones(200, 1), X_raw(201:400, :)];
 X_raw_training = [ones(600, 1), X_raw(401:end, :)];
 lambda_raw = 0; % high bias case, no need for reguralization

 theta_raw = trainLinearReg(X_raw_training, y_training, lambda_raw);

 error_raw = measureError(X_raw_test, y_test, theta_raw);


 % linear regression on polynomial features  ---

 d_poly = 5; % inferred
 X_poly = mapPoly([m1, m2, r], d_poly);
 X_poly_cv = [ones(200, 1), X_poly(1:200, :)];
 X_poly_test = [ones(200, 1), X_poly(201:400, :)];
 X_poly_training = [ones(600, 1), X_poly(401:end, :)];
 lambda_poly = 0; % inferred

 % this is the code used to infer d and lambda:

 %error_val = inferD([m1, m2, r], y_training, y_cv, 1:9, ...
 %  @(X_to_map, d) mapPoly(X_to_map, d));
 %bar(1:9, error_val); break;

 %lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
 %[error_train, error_val] = inferLambda(X_poly_training, y_training, X_poly_cv, y_cv, lambdas);
 %plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


 theta_poly = trainLinearReg(X_poly_training, y_training, lambda_poly);

 error_poly = measureError(X_poly_test, y_test, theta_poly);

 % linear regression on ops-mapped features  ---

 d_ops = 3; % inferred
 X_ops = mapOps([m1, m2, r], d_ops, ["a .* b"; "a ./ b"; "a .^ b"]);
 X_ops_cv = [ones(200, 1), X_ops(1:200, :)];
 X_ops_test = [ones(200, 1), X_ops(201:400, :)];
 X_ops_training = [ones(600, 1), X_ops(401:end, :)];
 lambda_ops = 0; % inferred

 % this is the code used to infer d and lambda:

 %error_val = inferD([m1, m2, r], y_training, y_cv, 1:5, ...
 %  @(X_to_map, d) mapOps(X_to_map, d, ["a .* b"; "a ./ b"; "a .^ b"]));
 %bar(1:5, error_val); break;

 %lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
 %[error_train, error_val] = inferLambda(X_ops_training, y_training, X_ops_cv, y_cv, lambdas);
 %plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


 theta_ops = trainLinearReg(X_ops_training, y_training, lambda_ops);

 error_ops = measureError(X_ops_test, y_test, theta_ops);


 %  4.3083e-016 raw
 %  4.1347e-016 poly
 %  1.6780e-017 ops (24.641 times smaller than poly)
 bar([error_raw; error_poly; error_ops])

	clear ; close all; clc

	% helpers used later ---

	function [result] = measureError(X, y, theta)
	result = linearRegCostFunction(X, y, theta, 0);
	endfunction


	% try different d and return errrors on cv set
	function [error_val] = inferD(X_to_map, y_training, y_cv, ds, mapper)
	error_val = zeros(columns(ds), 1);
	for i = 1:columns(ds)
	d = ds(i);
	X = mapper(X_to_map, d);
	X_cv = [ones(200, 1), X(1:200, :)];
	X_test = [ones(200, 1), X(201:400, :)];
	X_training = [ones(600, 1), X(401:end, :)];

	theta = trainLinearReg(X_training, y_training, 0); % lambda = 0: good idea?
	error_val(i) = measureError(X_cv, y_cv, theta);
	endfor

	endfunction

	% try different lambdas and return errrors on training and cv sets
	function [error_train, error_val] = inferLambda(X_training, y_training, X_cv, y_cv, lambdas)

	error_train = zeros(rows(lambdas), 12);
	error_val = zeros(rows(lambdas), 12);

	for l = 1:rows(lambdas)
	lambda = lambdas(l);
	for sample = 1:12
	i = 1 + (sample - 1) * 50;
	X_training_i = X_training(1:i, :);
	y_i = y_training(1:i, :);
	theta = trainLinearReg(X_training_i, y_i, lambda);
	error_train(l, sample) = linearRegCostFunction(X_training_i, y_i, theta, 0);
	error_val(l, sample) = linearRegCostFunction(X_cv, y_cv, theta, 0);
	endfor

	endfor

	endfunction


	function plotLearningCurve(error_train, error_val)
	plot([1, 51:50:551], error_train, [1, 51:50:551], error_val);
	text([1, 51:50:551], error_val, num2str(error_val'), "rotation", 90);
	legend('Train', 'Cross Validation');
	endfunction


	% Poly map features. For features [x1, x2] and d=2 this will produce:
	% [x1, x2, x1x1, x1x2, x2x1, x2x2]
	function result = mapPoly(features, d)

	result = features;
	prev = features;

	for _ = 2:d
	current = [];
	for i = 1:columns(features)
	current = [current, prev .* features(:, i)];
	endfor
	prev = current;
	result = [result, current];

	endfor

	endfunction

	% Map features with binary operations.
	% For features [x1, x2], ops [op1, op2] and d=2 this will produce:
	% [x1, x2, op1(x1,x1), op1(x1,x2), op1(x2,x1), op1(x2,x2), ...
	% op2(x1,x1), op2(x1,x2), op2(x2,x1), op2(x2,x2)]
	function result = mapOps(features, d, ops)

	result = features;
	prev = features;

	for _ = 2:d
	current = [];
	for i = 1:columns(features)
	for o = 1:rows(ops)
	current = [current, inline(ops(o, :))(prev, features(:, i))];
	endfor
	endfor
	prev = current;
	result = [result, current];

	endfor

	endfunction


	% initial data ---

	m = 1000; % dataset size
	rand("seed", 42)
	randn("seed", 314); % to rule-out uncertainty between runs
	v = rand(m, 1); % voltage (V)
	r = rand(m, 1); % resistance (R)

	y = v ./ r; % current (I = V/R)
	y = y .+ (y .* randn(1000, 1) ./ 10); % simulate measurement error

	y_cv = y(1:200);
	y_test = y(201:400);
	y_training = y(401:end);


	% linear regression on raw features ---

	X_raw = [v, r];
	X_raw_cv = [ones(200, 1), X_raw(1:200, :)];
	X_raw_test = [ones(200, 1), X_raw(201:400, :)];
	X_raw_training = [ones(600, 1), X_raw(401:end, :)];
	lambda_raw = 0; % high bias case, no need for reguralization

	theta_raw = trainLinearReg(X_raw_training, y_training, lambda_raw);

	error_raw = measureError(X_raw_test, y_test, theta_raw);


	% linear regression on polynomial features ---

	d_poly = 8; % inferred
	X_poly = mapPoly([v, r], d_poly);
	X_poly_cv = [ones(200, 1), X_poly(1:200, :)];
	X_poly_test = [ones(200, 1), X_poly(201:400, :)];
	X_poly_training = [ones(600, 1), X_poly(401:end, :)];
	lambda_poly = 0; % inferred

	% this is the code used to infer d and lambda:

	%error_val = inferD([v, r], y_training, y_cv, 1:9, ...
	% @(X_to_map, d) mapPoly(X_to_map, d));
	%bar(1:9, error_val); break;

	%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
	%[error_train, error_val] = inferLambda(X_poly_training, y_training, X_poly_cv, y_cv, lambdas);
	%plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


	theta_poly = trainLinearReg(X_poly_training, y_training, lambda_poly);

	error_poly = measureError(X_poly_test, y_test, theta_poly);

	% linear regression on ops-mapped features ---

	d_ops = 2; % inferred
	X_ops = mapOps([v, r], d_ops, ["a .* b"; "a ./ b"; "a .^ b"]);
	X_ops_cv = [ones(200, 1), X_ops(1:200, :)];
	X_ops_test = [ones(200, 1), X_ops(201:400, :)];
	X_ops_training = [ones(600, 1), X_ops(401:end, :)];
	lambda_ops = 0; % inferred

	% this is the code used to infer d and lambda:

	%error_val = inferD([v, r], y_training, y_cv, 1:5, ...
	% @(X_to_map, d) mapOps(X_to_map, d, ["a .* b"; "a ./ b"; "a .^ b"]));
	%bar(1:5, error_val); break;

	%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
	%[error_train, error_val] = inferLambda(X_ops_training, y_training, X_ops_cv, y_cv, lambdas);
	%plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


	theta_ops = trainLinearReg(X_ops_training, y_training, lambda_ops);

	error_ops = measureError(X_ops_test, y_test, theta_ops);

	% 17.69321 raw
	% 10.13354 poly (1.7460 times smaller than raw)
	% 0.26357 ops (38.447 times smaller than poly)
	bar([error_raw; error_poly; error_ops])