tomokishii · November 22, 2016 00:02
diff --git a/README.md b/README.md
diff --git a/check_costfun.py b/check_costfun.py
 #
 #   check_costfun.py
 #       date. 11/8/2016
 #

 import numpy as np

 ## =============== cofiCostFunc.m part ==================================
 def cofi_costfunc(Y, R, shapes, lambda_c, params=None):
    #  [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
    #   num_features, lambda) returns the cost and gradient for the
    #   collaborative filtering problem.
    # Unfold the U and W matrices from params
    num_users = shapes[0]
    num_movies = shapes[1]
    num_features = shapes[2]
    boundary = num_movies * num_features
    params_c1 = np.copy(params[:boundary])
    params_c2 = np.copy(params[boundary:])
    X = params_c1.reshape((num_movies, num_features))
    Theta = params_c2.reshape((num_users, num_features))
           
    # You need to return the following values correctly
    Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \
        + 0.5 * lambda_c * ((X.ravel() **2).sum() + (Theta.ravel() **2).sum())

    return Jcost

 def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None):
    #  [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
    #   num_features, lambda) returns the cost and gradient for the
    #   collaborative filtering problem.
    # Unfold the U and W matrices from params
    num_users = shapes[0]
    num_movies = shapes[1]
    num_features = shapes[2]
    boundary = num_movies * num_features
    params_c1 = np.copy(params[:boundary])
    params_c2 = np.copy(params[boundary:])
    X = params_c1.reshape((num_movies, num_features))
    Theta = params_c2.reshape((num_users, num_features))
           
    # You need to return the following values correctly  
    X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X
    Tgsub = (np.dot(X, Theta.T) - Y) * R
    Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta
    Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

    return Jgrad

 def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta):
    # COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
    # and gives us a numerical estimate of the gradient.#
 #   check_costfun.py
 #       date. 11/8/2016
 #

 import numpy as np

 ## =============== cofiCostFunc.m part ==================================
 def cofi_costfunc(Y, R, shapes, lambda_c, params=None):
    #  [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
    #   num_features, lambda) returns the cost and gradient for the
    #   collaborative filtering problem.
    # Unfold the U and W matrices from params
    num_users = shapes[0]
    num_movies = shapes[1]
    num_features = shapes[2]
    boundary = num_movies * num_features
    params_c1 = np.copy(params[:boundary])
    params_c2 = np.copy(params[boundary:])
    X = params_c1.reshape((num_movies, num_features))
    Theta = params_c2.reshape((num_users, num_features))
           
    # You need to return the following values correctly
    Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \
        + 0.5 * lambda_c * ((X.ravel() **2).sum() + (Theta.ravel() **2).sum())

    return Jcost

 def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None):
    #  [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
    #   num_features, lambda) returns the cost and gradient for the
    #   collaborative filtering problem.
    # Unfold the U and W matrices from params
    num_users = shapes[0]
    num_movies = shapes[1]
    num_features = shapes[2]
    boundary = num_movies * num_features
    params_c1 = np.copy(params[:boundary])
    params_c2 = np.copy(params[boundary:])
    X = params_c1.reshape((num_movies, num_features))
    Theta = params_c2.reshape((num_users, num_features))
           
    # You need to return the following values correctly  
    X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X
    Tgsub = (np.dot(X, Theta.T) - Y) * R
    Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta
    Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

    return Jgrad

 def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta):
    # COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
    # and gives us a numerical estimate of the gradient.
    # numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
    # gradient of the function J around theta. Calling y = J(theta) should
    # return the function value at theta.

    # Notes: The following code implements numerical gradient checking, and 
    #        returns the numerical gradient.It sets numgrad(i) to (a numerical 
    #        approximation of) the partial derivative of J with respect to the 
    #        i-th input argument, evaluated at theta. (i.e., numgrad(i) should 
    #        be the (approximately) the partial derivative of J with respect 
    #       to theta(i).)            

    numgrad = np.zeros_like(theta)
    perturb = np.zeros_like(theta)
    eps = 1.e-4

    for p in range(len(theta)):
        # Set perturbation vector
        perturb[p] = eps
        params1 = theta - perturb
        params2 = theta + perturb
        loss1 = cost_fn(Y, R, shapes, lambda_c, params1)
        loss2 = cost_fn(Y, R, shapes, lambda_c, params2)
        # Compute Numerical Gradient
        numgrad[p] = (loss2 - loss1) / (2. * eps)
        perturb[p] = 0.

    return numgrad
 #

 def check_costfn(lambda_c=0.0):

    # Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0.
    R = np.zeros_like(Y)
    R[Y != 0.] = 1.

    # Run Gradient Checking
    X = np.random.randn(4, 3)
    Theta = np.random.randn(5, 3)
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]
    theta_arg = np.concatenate((X.ravel(), Theta.ravel()))
    shapes = [num_users, num_movies, num_features]

    numgrad = compute_numerical_grad(
                cofi_costfunc, Y, R, shapes, lambda_c, theta_arg
              )

    grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg)
    grad = grad_ret.ravel()

    print(' [numgrad : grad] ')
    for i in range(len(numgrad)):        
        print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i]))
    
    print('The above two columns you get should be very similar.\n', \
         '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

    print('If your backpropagation implementation is correct, then \n' \
         'the relative difference will be small (less than 1e-9). \n' \
         '\nRelative Difference: ', diff)
 #

    # numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
    # gradient of the function J around theta. Calling y = J(theta) should
    # return the function value at theta.

    # Notes: The following code implements numerical gradient checking, and 
    #        returns the numerical gradient.It sets numgrad(i) to (a numerical 
    #        approximation of) the partial derivative of J with respect to the 
    #        i-th input argument, evaluated at theta. (i.e., numgrad(i) should 
    #        be the (approximately) the partial derivative of J with respect 
    #       to theta(i).)            

    numgrad = np.zeros_like(theta)
    perturb = np.zeros_like(theta)
    eps = 1.e-4

    for p in range(len(theta)):
        # Set perturbation vector
        perturb[p] = eps
        params1 = theta - perturb
        params2 = theta + perturb
        loss1 = cost_fn(Y, R, shapes, lambda_c, params1)
        loss2 = cost_fn(Y, R, shapes, lambda_c, params2)
        # Compute Numerical Gradient
        numgrad[p] = (loss2 - loss1) / (2. * eps)
        perturb[p] = 0.

    return numgrad
 #

 def check_costfn(lambda_c=0.0):

    # Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0.
    R = np.zeros_like(Y)
    R[Y != 0.] = 1.

    # Run Gradient Checking
    X = np.random.randn(4, 3)
    Theta = np.random.randn(5, 3)
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]
    theta_arg = np.concatenate((X.ravel(), Theta.ravel()))
    shapes = [num_users, num_movies, num_features]

    numgrad = compute_numerical_grad(
                cofi_costfunc, Y, R, shapes, lambda_c, theta_arg
              )

    grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg)
    grad = grad_ret.ravel()

    print(' [numgrad : grad] ')
    for i in range(len(numgrad)):        
        print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i]))
    
    print('The above two columns you get should be very similar.\n', \
         '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

    print('If your backpropagation implementation is correct, then \n' \
         'the relative difference will be small (less than 1e-9). \n' \
         '\nRelative Difference: ', diff)
 #
diff --git a/cofi_tf.py b/cofi_tf.py
 #
 #   -*- coding: utf-8 -*-

 #   cofi_tf.py
 #        date. 11/15/2016
 #

 import os
 import numpy as np
 import scipy.io as sio
 import tensorflow as tf

 WARM_START = False

 def load_data(fn='./scipyopt/data/ex8_movies.mat'):
    if os.path.exists(fn):
        data = sio.loadmat(fn)
    else:
        print('File not extits.')
        data = None

    return data
 #

 def load_movielist(fn='./scipyopt/data/movie_ids.txt'):
    mlist = []
    sep = ' '
    with open(fn, 'rt', encoding="utf-8", errors="surrogateescape") as fp:
        while True:
            try:
                line = fp.readline()
                line = line.rstrip('\n')
            except:
                bytes = fp.read().encode('utf-8', 'replace')
                line = bytes.decode('utf-8')
            if not line:
                break

            words = line.split(' ')
            id = int(words[0])
            title = sep.join(words[1:-1])
            
            mlist.append((id, title))
            lastline = line

    mlist_len = len(mlist)

    print('\n\n {} movies data is loaded.'.format(mlist_len))
    print(' ******** example ********')
    if mlist_len >= 5:
        for i in range(5):
            print('    {0} : {1} '.format(i, mlist[i]))
    print(' *************************')

    return mlist
 #


 def Ypred(X, Theta):
    '''
      calculate rating with parameter X and Theta
      args.:
        X:      movie contents parameter
        Theta:  user characteristic parameter
    '''
    feat_dim1 = tf.shape(X)[1]
    feat_dim2 = tf.shape(Theta)[1]

    tf.assert_equal(feat_dim1, feat_dim2)
    rating = tf.matmul(X, tf.transpose(Theta))

    return rating
 #

 if __name__ == '__main__':
    # Data load
    data = load_data()
    Y_np = data['Y']       # rating matrix Y
    R_np = data['R']       # rating matrix R
    num_users = Y_np.shape[1]
    num_movies = Y_np.shape[0]
    num_features = 10

    # Parameter definition
    if WARM_START:
        data_param = load_data(fn='./scipyopt/data/ex8_movieParams.mat')
        X_np = data_param['X'].astype(np.float32)
        Theta_np = data_param['Theta'].astype(np.float32)
        x = tf.Variable(X_np)
        theta = tf.Variable(Theta_np)
    else:   # COLD START
        x = tf.Variable(tf.random_normal([num_movies, num_features],
            mean=0.0, stddev=0.05))
        theta = tf.Variable(tf.random_normal([num_users, num_features],
            mean=0.0, stddev=0.05))

    Y_ph = tf.placeholder(tf.float32, shape=[None, num_users])
    R_ph = tf.placeholder(tf.float32, shape=[None, num_users])

    # Cost Function, etc.
    cost = tf.reduce_sum(((Ypred(x, theta) - Y_ph) * R_ph) ** 2)
    L2_sqr = tf.nn.l2_loss(x) + tf.nn.l2_loss(theta)
    lambda_c = 1.0      # L2 norm coefficient      
    loss = cost + lambda_c * L2_sqr

    lr = 1.e-5
    train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss)
    init_op = tf.initialize_all_variables()

    # Train
    with tf.Session() as sess:
        sess.run(init_op)

        for i in range(10001):
            sess.run(train_step, feed_dict={Y_ph: Y_np, R_ph: Y_np})
        
            if i % 1000 == 0:
                loss_mon = loss.eval({Y_ph: Y_np, R_ph: Y_np})
                print(' step, loss = {:6d}: {:10.1f}'.format(i, loss_mon))

        # evaluate ranking with final parameters
        ymat = Ypred(x, theta).eval()
        sio.savemat('./ymat_tf.mat', {'Y': ymat})
        print('ymat is saved to "ymat_tf.mat".')

diff --git a/ex8_cofi.py b/ex8_cofi.py
 ## Machine Learning Online Class
 #
 #   ex8_cofi.py     date. 11/8/2016
 #
 #  Exercise 8 | Anomaly Detection and Collaborative Filtering
 #

 import numpy as np
 import scipy.io as sio
 import scipy.optimize as spopt
 from check_costfun import check_costfn
 from check_costfun import cofi_costfunc, cofi_costfunc_grad

 def load_movielist(fn='../data/movie_ids.txt'):
    mlist = []
    sep = ' '
    with open(fn, 'rt', encoding="utf-8", errors="surrogateescape") as fp:
        while True:
            try:
                line = fp.readline()
                line = line.rstrip('\n')
            except:
                bytes = fp.read().encode('utf-8', 'replace')
                line = bytes.decode('utf-8')
            if not line:
                break

            words = line.split(' ')
            id = int(words[0])
            title = sep.join(words[1:-1])
            
            mlist.append((id, title))
            lastline = line

    mlist_len = len(mlist)

    print('\n\n {} movies data is loaded.'.format(mlist_len))
    print(' ******** example ********')
    if mlist_len >= 5:
        for i in range(5):
            print('    {0} : {1} '.format(i, mlist[i]))
    print(' *************************')

    return mlist
 #


 ## =============== Part 1: Loading movie ratings dataset ================
 #  You will start by loading the movie ratings dataset to understand the
 #  structure of the data.
 #  
 print('Loading movie ratings dataset.\n\n')

 #  Load data
 dataset = sio.loadmat('../data/ex8_movies.mat')
 Y = dataset['Y']
 R = dataset['R']


 #  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 
 #  943 users
 #
 #  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
 #  rating to movie i

 #  From the matrix, we can compute statistics like average rating.
 print('Average rating for movie 1 (Toy Story): {:.3f} / 5 \n\n'.format(
        np.mean(Y[0, R[0, :]])))



 ## ============ Part 2: Collaborative Filtering Cost Function ===========
 #  You will now implement the cost function for collaborative filtering.
 #  To help you debug your cost function, we have included set of weights
 #  that we trained on that. Specifically, you should complete the code in 
 #  cofiCostFunc.m to return J.
 #  Load pre-trained weights (X, Theta, num_users, num_movies, num_features)

 dataset = sio.loadmat('../data/ex8_movieParams.mat')
 X = dataset['X']
 Theta = dataset['Theta']

 #  Reduce the data set size so that this runs faster
 num_users = 4
 num_movies = 5
 num_features = 3
 X = X[:num_movies, :num_features]
 Theta = Theta[:num_users, :num_features]
 Y = Y[:num_movies, :num_users]
 R = R[:num_movies, :num_users]
 shapes = [num_users, num_movies, num_features]
 param_flat = np.concatenate((X.ravel(), Theta.ravel()))

 #  Evaluate cost function
 J = cofi_costfunc(Y, R, shapes, 0, param_flat)
          
 print('Cost at loaded parameters: {:.4f} '.format(J))
 print('(this value should be about 22.22)\n')



 ## ============== Part 3: Collaborative Filtering Gradient ==============
 #  Once your cost function matches up with ours, you should now implement 
 #  the collaborative filtering gradient function. Specifically, you should 
 #  complete the code in cofiCostFunc.m to return the grad argument.
 #  
 print('\nChecking Gradients (without regularization) ... \n')

 #  Check gradients by running checkNNGradients
 # check_costfn()


 ## ========= Part 4: Collaborative Filtering Cost Regularization ========
 #  Now, you should implement regularization for the cost function for 
 #  collaborative filtering. You can implement it by adding the cost of
 #  regularization to the original cost computation.
 #  

 #  Evaluate cost function
 J = cofi_costfunc(Y, R, shapes, 1.5, param_flat)
 print('Cost at loaded parameters (lambda = 1.5): {:.4f} '.format(J))
 print('(this value should be about 31.34)\n')


 ## ======= Part 5: Collaborative Filtering Gradient Regularization ======
 #  Once your cost matches up with ours, you should proceed to implement 
 #  regularization for the gradient. 
 #

 #  
 print('\nChecking Gradients (with regularization) ... \n')

 #  Check gradients by running checkNNGradients
 check_costfn(1.5)


 ## ============== Part 6: Entering ratings for a new user ===============
 #  Before we will train the collaborative filtering model, we will first
 #  add ratings that correspond to a new user that we just observed. This
 #  part of the code will also allow you to put in your own ratings for the
 #  movies in our dataset!
 #
 movieList = load_movielist()


 #  Initialize my ratings
 my_ratings = np.zeros((1682, 1))

 # Check the file movie_idx.txt for id of each movie in our dataset
 # For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
 my_ratings[0] = 4

 # Or suppose did not enjoy Silence of the Lambs (1991), you can set
 my_ratings[97] = 2

 # We have selected a few movies we liked / did not like and the ratings we
 # gave are as follows:
 my_ratings[6] = 3
 my_ratings[11] = 5
 my_ratings[53] = 4
 my_ratings[63]= 5
 my_ratings[65]= 3
 my_ratings[68] = 5
 my_ratings[182] = 4
 my_ratings[225] = 5
 my_ratings[354]= 5

 print('\n\nNew user ratings:\n')
 for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print('Rated {0} for {1}'.format(my_ratings[i], movieList[i]))


 ## ================== Part 7: Learning Movie Ratings ====================
 #  Now, you will train the collaborative filtering model on a movie rating 
 #  dataset of 1682 movies and 943 users
 #

 print('\nTraining collaborative filtering...\n')

 #  Re-Load data
 dataset = sio.loadmat('../data/ex8_movies.mat')
 Y = dataset['Y']
 R = dataset['R']

 #  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by 
 #  943 users
 #
 #  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
 #  rating to movie i

 #  Add our own ratings to the data matrix
 Y = np.hstack([my_ratings, Y])
 R1 = my_ratings.copy()
 R1[R1 != 0] = 1
 R = np.hstack([R1, R])

 #  Normalize Ratings
 m, n = Y.shape
 Ymean = np.zeros((m, 1))
 Ynormed = np.zeros_like(Y)
 for i in range(m):
    idx = [j for j in range(R.shape[1]) if R[i, j] != 0]
    Ymean[i] = np.mean(Y[i, idx])
    Ynormed[i, idx] = Y[i, idx] - Ymean[i]


 #  Useful Values
 num_users = Y.shape[1]
 num_movies = Y.shape[0]
 num_features = 10
 shapes = [num_users, num_movies, num_features]

 # Set Initial Parameters (Theta, X)
 X = np.random.randn(num_movies, num_features)
 Theta = np.random.randn(num_users, num_features)

 theta_ini = np.concatenate((X.flatten(), Theta.flatten()))

 def compute_cost_sp(theta):
    global Y, R, shapes, lambda_c
    j =  cofi_costfunc(Y, R, shapes, lambda_c, params=theta)

    return j

 def compute_grad_sp(theta):
    global Y, R, shapes, lambda_c
    j_grad = cofi_costfunc_grad(Y, R, shapes, lambda_c, params=theta)

    return j_grad


 # Set options for Scipy Optimize minimize
 options = {'gtol': 1.e-6, 'disp': True}
 lambda_c = 1.5

 res1 = spopt.minimize(compute_cost_sp, theta_ini, method='CG', \
         jac=compute_grad_sp, options=options)
 theta =  res1.x

 # Unfold the returned theta back into U and W
 X = theta[:num_movies*num_features].reshape((num_movies, num_features))
 Theta = theta[num_movies*num_features:].reshape((num_users, num_features))

 print('Recommender system learning completed.\n')


 ## ================== Part 8: Recommendation for you ====================
 #  After training the model, you can now make recommendations by computing
 #  the predictions matrix.
 #

 p = np.dot(X, Theta.T)
 p_orig = p[:, 1:]
 sio.savemat('../data/pmat.mat', {'Y': p_orig})

 my_predictions = p[:, 0].ravel() + Ymean.ravel()

 movieList = load_movielist()
 movieList = [item[1] for item in movieList]

 r = np.sort(my_predictions)
 ix = np.argsort(my_predictions)
 r = r[::-1]
 ix = ix[::-1]

 print('\nTop recommendations for you: ')
 for i in range(10):
    j = ix[i]
    print('Predicting rating {0:10.4f} for movie {1:s}'.format( \
                my_predictions[j], movieList[j]))


 print('\nOriginal ratings provided: ')
 for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print('Rated {0:10.2f} for {1:s}'.format(
                float(my_ratings[i]), movieList[i]))
	#
	# check_costfun.py
	# date. 11/8/2016
	#

	import numpy as np

	## =============== cofiCostFunc.m part ==================================
	def cofi_costfunc(Y, R, shapes, lambda_c, params=None):
	# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
	# num_features, lambda) returns the cost and gradient for the
	# collaborative filtering problem.
	# Unfold the U and W matrices from params
	num_users = shapes[0]
	num_movies = shapes[1]
	num_features = shapes[2]
	boundary = num_movies * num_features
	params_c1 = np.copy(params[:boundary])
	params_c2 = np.copy(params[boundary:])
	X = params_c1.reshape((num_movies, num_features))
	Theta = params_c2.reshape((num_users, num_features))

	# You need to return the following values correctly
	Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \
	+ 0.5 * lambda_c * ((X.ravel() 2).sum() + (Theta.ravel() 2).sum())

	return Jcost

	def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None):
	# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
	# num_features, lambda) returns the cost and gradient for the
	# collaborative filtering problem.
	# Unfold the U and W matrices from params
	num_users = shapes[0]
	num_movies = shapes[1]
	num_features = shapes[2]
	boundary = num_movies * num_features
	params_c1 = np.copy(params[:boundary])
	params_c2 = np.copy(params[boundary:])
	X = params_c1.reshape((num_movies, num_features))
	Theta = params_c2.reshape((num_users, num_features))

	# You need to return the following values correctly
	X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X
	Tgsub = (np.dot(X, Theta.T) - Y) * R
	Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta
	Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

	return Jgrad

	def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta):
	# COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
	# and gives us a numerical estimate of the gradient.#
	# check_costfun.py
	# date. 11/8/2016
	#

	import numpy as np

	## =============== cofiCostFunc.m part ==================================
	def cofi_costfunc(Y, R, shapes, lambda_c, params=None):
	# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
	# num_features, lambda) returns the cost and gradient for the
	# collaborative filtering problem.
	# Unfold the U and W matrices from params
	num_users = shapes[0]
	num_movies = shapes[1]
	num_features = shapes[2]
	boundary = num_movies * num_features
	params_c1 = np.copy(params[:boundary])
	params_c2 = np.copy(params[boundary:])
	X = params_c1.reshape((num_movies, num_features))
	Theta = params_c2.reshape((num_users, num_features))

	# You need to return the following values correctly
	Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \
	+ 0.5 * lambda_c * ((X.ravel() 2).sum() + (Theta.ravel() 2).sum())

	return Jcost

	def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None):
	# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
	# num_features, lambda) returns the cost and gradient for the
	# collaborative filtering problem.
	# Unfold the U and W matrices from params
	num_users = shapes[0]
	num_movies = shapes[1]
	num_features = shapes[2]
	boundary = num_movies * num_features
	params_c1 = np.copy(params[:boundary])
	params_c2 = np.copy(params[boundary:])
	X = params_c1.reshape((num_movies, num_features))
	Theta = params_c2.reshape((num_users, num_features))

	# You need to return the following values correctly
	X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X
	Tgsub = (np.dot(X, Theta.T) - Y) * R
	Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta
	Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

	return Jgrad

	def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta):
	# COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
	# and gives us a numerical estimate of the gradient.
	# numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
	# gradient of the function J around theta. Calling y = J(theta) should
	# return the function value at theta.

	# Notes: The following code implements numerical gradient checking, and
	# returns the numerical gradient.It sets numgrad(i) to (a numerical
	# approximation of) the partial derivative of J with respect to the
	# i-th input argument, evaluated at theta. (i.e., numgrad(i) should
	# be the (approximately) the partial derivative of J with respect
	# to theta(i).)

	numgrad = np.zeros_like(theta)
	perturb = np.zeros_like(theta)
	eps = 1.e-4

	for p in range(len(theta)):
	# Set perturbation vector
	perturb[p] = eps
	params1 = theta - perturb
	params2 = theta + perturb
	loss1 = cost_fn(Y, R, shapes, lambda_c, params1)
	loss2 = cost_fn(Y, R, shapes, lambda_c, params2)
	# Compute Numerical Gradient
	numgrad[p] = (loss2 - loss1) / (2. * eps)
	perturb[p] = 0.

	return numgrad
	#

	def check_costfn(lambda_c=0.0):

	# Create small problem
	X_t = np.random.rand(4, 3)
	Theta_t = np.random.rand(5, 3)

	# Zap out most entries
	Y = np.dot(X_t, Theta_t.T)
	Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0.
	R = np.zeros_like(Y)
	R[Y != 0.] = 1.

	# Run Gradient Checking
	X = np.random.randn(4, 3)
	Theta = np.random.randn(5, 3)
	num_users = Y.shape[1]
	num_movies = Y.shape[0]
	num_features = Theta_t.shape[1]
	theta_arg = np.concatenate((X.ravel(), Theta.ravel()))
	shapes = [num_users, num_movies, num_features]

	numgrad = compute_numerical_grad(
	cofi_costfunc, Y, R, shapes, lambda_c, theta_arg
	)

	grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg)
	grad = grad_ret.ravel()

	print(' [numgrad : grad] ')
	for i in range(len(numgrad)):
	print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i]))

	print('The above two columns you get should be very similar.\n', \
	'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

	diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

	print('If your backpropagation implementation is correct, then \n' \
	'the relative difference will be small (less than 1e-9). \n' \
	'\nRelative Difference: ', diff)
	#

	# numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
	# gradient of the function J around theta. Calling y = J(theta) should
	# return the function value at theta.

	# Notes: The following code implements numerical gradient checking, and
	# returns the numerical gradient.It sets numgrad(i) to (a numerical
	# approximation of) the partial derivative of J with respect to the
	# i-th input argument, evaluated at theta. (i.e., numgrad(i) should
	# be the (approximately) the partial derivative of J with respect
	# to theta(i).)

	numgrad = np.zeros_like(theta)
	perturb = np.zeros_like(theta)
	eps = 1.e-4

	for p in range(len(theta)):
	# Set perturbation vector
	perturb[p] = eps
	params1 = theta - perturb
	params2 = theta + perturb
	loss1 = cost_fn(Y, R, shapes, lambda_c, params1)
	loss2 = cost_fn(Y, R, shapes, lambda_c, params2)
	# Compute Numerical Gradient
	numgrad[p] = (loss2 - loss1) / (2. * eps)
	perturb[p] = 0.

	return numgrad
	#

	def check_costfn(lambda_c=0.0):

	# Create small problem
	X_t = np.random.rand(4, 3)
	Theta_t = np.random.rand(5, 3)

	# Zap out most entries
	Y = np.dot(X_t, Theta_t.T)
	Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0.
	R = np.zeros_like(Y)
	R[Y != 0.] = 1.

	# Run Gradient Checking
	X = np.random.randn(4, 3)
	Theta = np.random.randn(5, 3)
	num_users = Y.shape[1]
	num_movies = Y.shape[0]
	num_features = Theta_t.shape[1]
	theta_arg = np.concatenate((X.ravel(), Theta.ravel()))
	shapes = [num_users, num_movies, num_features]

	numgrad = compute_numerical_grad(
	cofi_costfunc, Y, R, shapes, lambda_c, theta_arg
	)

	grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg)
	grad = grad_ret.ravel()

	print(' [numgrad : grad] ')
	for i in range(len(numgrad)):
	print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i]))

	print('The above two columns you get should be very similar.\n', \
	'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

	diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

	print('If your backpropagation implementation is correct, then \n' \
	'the relative difference will be small (less than 1e-9). \n' \
	'\nRelative Difference: ', diff)
	#
	#
	# -- coding: utf-8 --

	# cofi_tf.py
	# date. 11/15/2016
	#

	import os
	import numpy as np
	import scipy.io as sio
	import tensorflow as tf

	WARM_START = False

	def load_data(fn='./scipyopt/data/ex8_movies.mat'):
	if os.path.exists(fn):
	data = sio.loadmat(fn)
	else:
	print('File not extits.')
	data = None

	return data
	#

	def load_movielist(fn='./scipyopt/data/movie_ids.txt'):
	mlist = []
	sep = ' '
	with open(fn, 'rt', encoding="utf-8", errors="surrogateescape") as fp:
	while True:
	try:
	line = fp.readline()
	line = line.rstrip('\n')
	except:
	bytes = fp.read().encode('utf-8', 'replace')
	line = bytes.decode('utf-8')
	if not line:
	break

	words = line.split(' ')
	id = int(words[0])
	title = sep.join(words[1:-1])

	mlist.append((id, title))
	lastline = line

	mlist_len = len(mlist)

	print('\n\n {} movies data is loaded.'.format(mlist_len))
	print(' ****** example ******')
	if mlist_len >= 5:
	for i in range(5):
	print(' {0} : {1} '.format(i, mlist[i]))
	print(' *************************')

	return mlist
	#


	def Ypred(X, Theta):
	'''
	calculate rating with parameter X and Theta
	args.:
	X: movie contents parameter
	Theta: user characteristic parameter
	'''
	feat_dim1 = tf.shape(X)[1]
	feat_dim2 = tf.shape(Theta)[1]

	tf.assert_equal(feat_dim1, feat_dim2)
	rating = tf.matmul(X, tf.transpose(Theta))

	return rating
	#

	if __name__ == '__main__':
	# Data load
	data = load_data()
	Y_np = data['Y'] # rating matrix Y
	R_np = data['R'] # rating matrix R
	num_users = Y_np.shape[1]
	num_movies = Y_np.shape[0]
	num_features = 10

	# Parameter definition
	if WARM_START:
	data_param = load_data(fn='./scipyopt/data/ex8_movieParams.mat')
	X_np = data_param['X'].astype(np.float32)
	Theta_np = data_param['Theta'].astype(np.float32)
	x = tf.Variable(X_np)
	theta = tf.Variable(Theta_np)
	else: # COLD START
	x = tf.Variable(tf.random_normal([num_movies, num_features],
	mean=0.0, stddev=0.05))
	theta = tf.Variable(tf.random_normal([num_users, num_features],
	mean=0.0, stddev=0.05))

	Y_ph = tf.placeholder(tf.float32, shape=[None, num_users])
	R_ph = tf.placeholder(tf.float32, shape=[None, num_users])

	# Cost Function, etc.
	cost = tf.reduce_sum(((Ypred(x, theta) - Y_ph) * R_ph) ** 2)
	L2_sqr = tf.nn.l2_loss(x) + tf.nn.l2_loss(theta)
	lambda_c = 1.0 # L2 norm coefficient
	loss = cost + lambda_c * L2_sqr

	lr = 1.e-5
	train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss)
	init_op = tf.initialize_all_variables()

	# Train
	with tf.Session() as sess:
	sess.run(init_op)

	for i in range(10001):
	sess.run(train_step, feed_dict={Y_ph: Y_np, R_ph: Y_np})

	if i % 1000 == 0:
	loss_mon = loss.eval({Y_ph: Y_np, R_ph: Y_np})
	print(' step, loss = {:6d}: {:10.1f}'.format(i, loss_mon))

	# evaluate ranking with final parameters
	ymat = Ypred(x, theta).eval()
	sio.savemat('./ymat_tf.mat', {'Y': ymat})
	print('ymat is saved to "ymat_tf.mat".')
	## Machine Learning Online Class
	#
	# ex8_cofi.py date. 11/8/2016
	#
	# Exercise 8 \| Anomaly Detection and Collaborative Filtering
	#

	import numpy as np
	import scipy.io as sio
	import scipy.optimize as spopt
	from check_costfun import check_costfn
	from check_costfun import cofi_costfunc, cofi_costfunc_grad

	def load_movielist(fn='../data/movie_ids.txt'):
	mlist = []
	sep = ' '
	with open(fn, 'rt', encoding="utf-8", errors="surrogateescape") as fp:
	while True:
	try:
	line = fp.readline()
	line = line.rstrip('\n')
	except:
	bytes = fp.read().encode('utf-8', 'replace')
	line = bytes.decode('utf-8')
	if not line:
	break

	words = line.split(' ')
	id = int(words[0])
	title = sep.join(words[1:-1])

	mlist.append((id, title))
	lastline = line

	mlist_len = len(mlist)

	print('\n\n {} movies data is loaded.'.format(mlist_len))
	print(' ****** example ******')
	if mlist_len >= 5:
	for i in range(5):
	print(' {0} : {1} '.format(i, mlist[i]))
	print(' *************************')

	return mlist
	#


	## =============== Part 1: Loading movie ratings dataset ================
	# You will start by loading the movie ratings dataset to understand the
	# structure of the data.
	#
	print('Loading movie ratings dataset.\n\n')

	# Load data
	dataset = sio.loadmat('../data/ex8_movies.mat')
	Y = dataset['Y']
	R = dataset['R']


	# Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on
	# 943 users
	#
	# R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
	# rating to movie i

	# From the matrix, we can compute statistics like average rating.
	print('Average rating for movie 1 (Toy Story): {:.3f} / 5 \n\n'.format(
	np.mean(Y[0, R[0, :]])))



	## ============ Part 2: Collaborative Filtering Cost Function ===========
	# You will now implement the cost function for collaborative filtering.
	# To help you debug your cost function, we have included set of weights
	# that we trained on that. Specifically, you should complete the code in
	# cofiCostFunc.m to return J.
	# Load pre-trained weights (X, Theta, num_users, num_movies, num_features)

	dataset = sio.loadmat('../data/ex8_movieParams.mat')
	X = dataset['X']
	Theta = dataset['Theta']

	# Reduce the data set size so that this runs faster
	num_users = 4
	num_movies = 5
	num_features = 3
	X = X[:num_movies, :num_features]
	Theta = Theta[:num_users, :num_features]
	Y = Y[:num_movies, :num_users]
	R = R[:num_movies, :num_users]
	shapes = [num_users, num_movies, num_features]
	param_flat = np.concatenate((X.ravel(), Theta.ravel()))

	# Evaluate cost function
	J = cofi_costfunc(Y, R, shapes, 0, param_flat)

	print('Cost at loaded parameters: {:.4f} '.format(J))
	print('(this value should be about 22.22)\n')



	## ============== Part 3: Collaborative Filtering Gradient ==============
	# Once your cost function matches up with ours, you should now implement
	# the collaborative filtering gradient function. Specifically, you should
	# complete the code in cofiCostFunc.m to return the grad argument.
	#
	print('\nChecking Gradients (without regularization) ... \n')

	# Check gradients by running checkNNGradients
	# check_costfn()


	## ========= Part 4: Collaborative Filtering Cost Regularization ========
	# Now, you should implement regularization for the cost function for
	# collaborative filtering. You can implement it by adding the cost of
	# regularization to the original cost computation.
	#

	# Evaluate cost function
	J = cofi_costfunc(Y, R, shapes, 1.5, param_flat)
	print('Cost at loaded parameters (lambda = 1.5): {:.4f} '.format(J))
	print('(this value should be about 31.34)\n')


	## ======= Part 5: Collaborative Filtering Gradient Regularization ======
	# Once your cost matches up with ours, you should proceed to implement
	# regularization for the gradient.
	#

	#
	print('\nChecking Gradients (with regularization) ... \n')

	# Check gradients by running checkNNGradients
	check_costfn(1.5)


	## ============== Part 6: Entering ratings for a new user ===============
	# Before we will train the collaborative filtering model, we will first
	# add ratings that correspond to a new user that we just observed. This
	# part of the code will also allow you to put in your own ratings for the
	# movies in our dataset!
	#
	movieList = load_movielist()


	# Initialize my ratings
	my_ratings = np.zeros((1682, 1))

	# Check the file movie_idx.txt for id of each movie in our dataset
	# For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
	my_ratings[0] = 4

	# Or suppose did not enjoy Silence of the Lambs (1991), you can set
	my_ratings[97] = 2

	# We have selected a few movies we liked / did not like and the ratings we
	# gave are as follows:
	my_ratings[6] = 3
	my_ratings[11] = 5
	my_ratings[53] = 4
	my_ratings[63]= 5
	my_ratings[65]= 3
	my_ratings[68] = 5
	my_ratings[182] = 4
	my_ratings[225] = 5
	my_ratings[354]= 5

	print('\n\nNew user ratings:\n')
	for i in range(len(my_ratings)):
	if my_ratings[i] > 0:
	print('Rated {0} for {1}'.format(my_ratings[i], movieList[i]))


	## ================== Part 7: Learning Movie Ratings ====================
	# Now, you will train the collaborative filtering model on a movie rating
	# dataset of 1682 movies and 943 users
	#

	print('\nTraining collaborative filtering...\n')

	# Re-Load data
	dataset = sio.loadmat('../data/ex8_movies.mat')
	Y = dataset['Y']
	R = dataset['R']

	# Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by
	# 943 users
	#
	# R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
	# rating to movie i

	# Add our own ratings to the data matrix
	Y = np.hstack([my_ratings, Y])
	R1 = my_ratings.copy()
	R1[R1 != 0] = 1
	R = np.hstack([R1, R])

	# Normalize Ratings
	m, n = Y.shape
	Ymean = np.zeros((m, 1))
	Ynormed = np.zeros_like(Y)
	for i in range(m):
	idx = [j for j in range(R.shape[1]) if R[i, j] != 0]
	Ymean[i] = np.mean(Y[i, idx])
	Ynormed[i, idx] = Y[i, idx] - Ymean[i]


	# Useful Values
	num_users = Y.shape[1]
	num_movies = Y.shape[0]
	num_features = 10
	shapes = [num_users, num_movies, num_features]

	# Set Initial Parameters (Theta, X)
	X = np.random.randn(num_movies, num_features)
	Theta = np.random.randn(num_users, num_features)

	theta_ini = np.concatenate((X.flatten(), Theta.flatten()))

	def compute_cost_sp(theta):
	global Y, R, shapes, lambda_c
	j = cofi_costfunc(Y, R, shapes, lambda_c, params=theta)

	return j

	def compute_grad_sp(theta):
	global Y, R, shapes, lambda_c
	j_grad = cofi_costfunc_grad(Y, R, shapes, lambda_c, params=theta)

	return j_grad


	# Set options for Scipy Optimize minimize
	options = {'gtol': 1.e-6, 'disp': True}
	lambda_c = 1.5

	res1 = spopt.minimize(compute_cost_sp, theta_ini, method='CG', \
	jac=compute_grad_sp, options=options)
	theta = res1.x

	# Unfold the returned theta back into U and W
	X = theta[:num_movies*num_features].reshape((num_movies, num_features))
	Theta = theta[num_movies*num_features:].reshape((num_users, num_features))

	print('Recommender system learning completed.\n')


	## ================== Part 8: Recommendation for you ====================
	# After training the model, you can now make recommendations by computing
	# the predictions matrix.
	#

	p = np.dot(X, Theta.T)
	p_orig = p[:, 1:]
	sio.savemat('../data/pmat.mat', {'Y': p_orig})

	my_predictions = p[:, 0].ravel() + Ymean.ravel()

	movieList = load_movielist()
	movieList = [item[1] for item in movieList]

	r = np.sort(my_predictions)
	ix = np.argsort(my_predictions)
	r = r[::-1]
	ix = ix[::-1]

	print('\nTop recommendations for you: ')
	for i in range(10):
	j = ix[i]
	print('Predicting rating {0:10.4f} for movie {1:s}'.format( \
	my_predictions[j], movieList[j]))


	print('\nOriginal ratings provided: ')
	for i in range(len(my_ratings)):
	if my_ratings[i] > 0:
	print('Rated {0:10.2f} for {1:s}'.format(
	float(my_ratings[i]), movieList[i]))