jiayuzhou · September 18, 2017 01:28
diff --git a/check_grad.m b/check_grad.m
 function check_grad(f, x0, varargin)
 % a simple function that checks the correctness of gradient. 
 % INPUT
 %  f  - a function handle of f(x) that returns function values and gradients given parameter x 
 %  x0 - the location near which the gradient will be evaluted. 

 % For a correct gradiet, the displayed ratio should be near 1.0  
 %
 % to check why the code works there is a useful link:
 %    http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/
 %
 % Jiayu, Dec 2, 2015

 delta = rand(size(x0));
 delta = delta ./ norm(delta);
 epsilon = 10.^[-7:-1];

 [f0, df0] = feval(f, x0, varargin{:});

 for i = 1:length(epsilon)
    [f_left] = feval(f, x0-epsilon(i)*delta, varargin{:});
    [f_right] = feval(f, x0+epsilon(i)*delta, varargin{:});
    ys(i) = (f_right - f_left) / 2;
    ys_hat(i) = df0' * epsilon(i)*delta;    
    fprintf('epsilon: %d , gradient: %d \n', epsilon(i), ys(i) / ys_hat(i));
 end           
diff --git a/check_grad_example_matrix.m b/check_grad_example_matrix.m
 function check_grad_example_matrix(feature_dim, dic_size, sample_size)
 % an example of check_grad on the dictionary learning:
 %          min_{alpha, X} || R - alpha * D * X ||_F^2
 %
 % by Jiayu Zhou. July 9, 2015.

 if nargin < 1, feature_dim = 50; end
 if nargin < 2, dic_size    = 20; end
 if nargin < 3, sample_size = 30; end

 Rdata = randn(feature_dim, sample_size);
 Dic   = randn(feature_dim, dic_size);
 vect0 = rand(dic_size * sample_size + 1, 1)

 % closure on the constant variables.
 test_func = @(x) dic_obj(x, Dic, Rdata)

 % perform testing.
 check_grad(test_func, vect0)


 function [f, g] = dic_obj(variable_vect, D, R)
 % The function value and gradient of the following objective
 %     min_{alpha, X} || R - alpha * D * X ||_F^2
 % where
 % INPUT
 %   [X(:); alpha]
 % OUTPUT
 % given the search point, variable_vect
 %   f - function value
 %   g - the vectorized gradient

 % the size of the features and dictionary
 dic_size    = size(D, 2);
 sample_size = size(R, 2);

 % reshape variables
 a = variable_vect(end);
 X = reshape(variable_vect(1:end-1), [ dic_size, sample_size] );

 aDX  = a * D * X;
 RaDX = R - aDX;

 % compute the objective
 f = sum(sum((RaDX).^2));

 % compute gradients
 grad_X = - (2 * a) * D' * RaDX;
 grad_a = - 2 * sum(sum((RaDX' * D)' .* X));
 %grad_a = - 2 * trace((RaDX' * D) * X);  % less efficient but readable version

 % the vectorized gradient
 g = [grad_X(:); grad_a];
diff --git a/check_grad_example_vector.m b/check_grad_example_vector.m
 function check_grad_example_vector(feature_dim, sample_size)
 % an example of check_grad on the Lasso 
 %          min_{x} || A * x - y ||_F^2
 %
 % by Jiayu Zhou. Dec 2, 2015.

 if nargin < 1, feature_dim = 500; end
 if nargin < 3, sample_size = 30; end

 A  = randn(sample_size, feature_dim);
 y  = randn(sample_size, 1);
 x0 = rand(feature_dim, 1);

 % closure on the constant variables.
 test_func = @(x) dic_obj(x, A, y);

 % perform testing.
 check_grad(test_func, x0)


 function [f, g] = dic_obj(x, A, y)
 % The function value and gradient of the following objective
 %     min_{x} || A * x - y ||_F^2
 % where
 % INPUT
 %   [X(:); alpha]
 % OUTPUT
 % given the search point, variable_vect
 %   f - function value
 %   g - the vectorized gradient

 Axy = (A * x - y);
 g = A' * Axy;
 f = 0.5 * sum(Axy.^2);
	function check_grad(f, x0, varargin)
	% a simple function that checks the correctness of gradient.
	% INPUT
	% f - a function handle of f(x) that returns function values and gradients given parameter x
	% x0 - the location near which the gradient will be evaluted.

	% For a correct gradiet, the displayed ratio should be near 1.0
	%
	% to check why the code works there is a useful link:
	% http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/
	%
	% Jiayu, Dec 2, 2015

	delta = rand(size(x0));
	delta = delta ./ norm(delta);
	epsilon = 10.^[-7:-1];

	[f0, df0] = feval(f, x0, varargin{:});

	for i = 1:length(epsilon)
	[f_left] = feval(f, x0-epsilon(i)*delta, varargin{:});
	[f_right] = feval(f, x0+epsilon(i)*delta, varargin{:});
	ys(i) = (f_right - f_left) / 2;
	ys_hat(i) = df0' * epsilon(i)*delta;
	fprintf('epsilon: %d , gradient: %d \n', epsilon(i), ys(i) / ys_hat(i));
	end
	function check_grad_example_matrix(feature_dim, dic_size, sample_size)
	% an example of check_grad on the dictionary learning:
	% min_{alpha, X} \|\| R - alpha * D * X \|\|_F^2
	%
	% by Jiayu Zhou. July 9, 2015.

	if nargin < 1, feature_dim = 50; end
	if nargin < 2, dic_size = 20; end
	if nargin < 3, sample_size = 30; end

	Rdata = randn(feature_dim, sample_size);
	Dic = randn(feature_dim, dic_size);
	vect0 = rand(dic_size * sample_size + 1, 1)

	% closure on the constant variables.
	test_func = @(x) dic_obj(x, Dic, Rdata)

	% perform testing.
	check_grad(test_func, vect0)


	function [f, g] = dic_obj(variable_vect, D, R)
	% The function value and gradient of the following objective
	% min_{alpha, X} \|\| R - alpha * D * X \|\|_F^2
	% where
	% INPUT
	% [X(:); alpha]
	% OUTPUT
	% given the search point, variable_vect
	% f - function value
	% g - the vectorized gradient

	% the size of the features and dictionary
	dic_size = size(D, 2);
	sample_size = size(R, 2);

	% reshape variables
	a = variable_vect(end);
	X = reshape(variable_vect(1:end-1), [ dic_size, sample_size] );

	aDX = a * D * X;
	RaDX = R - aDX;

	% compute the objective
	f = sum(sum((RaDX).^2));

	% compute gradients
	grad_X = - (2 * a) * D' * RaDX;
	grad_a = - 2 * sum(sum((RaDX' * D)' .* X));
	%grad_a = - 2 * trace((RaDX' * D) * X); % less efficient but readable version

	% the vectorized gradient
	g = [grad_X(:); grad_a];
	function check_grad_example_vector(feature_dim, sample_size)
	% an example of check_grad on the Lasso
	% min_{x} \|\| A * x - y \|\|_F^2
	%
	% by Jiayu Zhou. Dec 2, 2015.

	if nargin < 1, feature_dim = 500; end
	if nargin < 3, sample_size = 30; end

	A = randn(sample_size, feature_dim);
	y = randn(sample_size, 1);
	x0 = rand(feature_dim, 1);

	% closure on the constant variables.
	test_func = @(x) dic_obj(x, A, y);

	% perform testing.
	check_grad(test_func, x0)


	function [f, g] = dic_obj(x, A, y)
	% The function value and gradient of the following objective
	% min_{x} \|\| A * x - y \|\|_F^2
	% where
	% INPUT
	% [X(:); alpha]
	% OUTPUT
	% given the search point, variable_vect
	% f - function value
	% g - the vectorized gradient

	Axy = (A * x - y);
	g = A' * Axy;
	f = 0.5 * sum(Axy.^2);