Ethen ethen8181

brew install mvnvm (just to install maven on mac)
make a eclipse maven project on your local (File -> New -> Project -> Maven Project). During the setup just click next until you run into a place that prompt you to set the group id = com.javamakeuse.hadoop.poc (it turns out you can name it whatever you want), artifact id = Homeworkx (name is whatever you want, e.g. Homework1)
copy the pom.xml from wolf and replace the local pom.xml (you'll see it on your left in eclipse)
go to src/main/java and start a new class (e.g. Exercise1) to do your coding
after we're done coding, navigate to where the maven project is stored (e.g. mine is stored under /Users/ethen/Documents/workspace/Homework1) and type mvn package to create the jar file
After that copy the mr-app-1.0-SNAPSHOT.jar inside the target folder to wolf.
Then ssh to wolf and run the job on wolf using hadoop jar e.g. for the wordcount example I had a folder

	# MxN matrix A and N sized vector b. Ab =

	# [1, 0, 3]
	# [0, 0, 0]
	# [0, 2, 4]

	# [1, 2, 3]

	import numpy as np
	cols = np.array([0, 2, 1, 2])

	import numpy as np
	from lightfm import LightFM
	from sklearn.metrics import roc_auc_score
	from lightfm.datasets import fetch_movielens

	def auc_score(model, ratings):
	"""
	computes area under the ROC curve (AUC).
	The full name should probably be mean
	auc score as it is computing the auc

	import numpy as np
	from tqdm import trange
	from scipy.special import expit
	from sklearn.base import BaseEstimator
	from copy import deepcopy


	class NeuralNet(BaseEstimator):
	"""
	Neural Network for classification

	from hw1_nnet import NeuralNet
	from keras.datasets.mnist import load_data

	(X_train, y_train), (X_test, y_test) = load_data()
	X_train = X_train.reshape((X_train.shape[0], -1)) / 255.0
	X_test = X_test.reshape((X_test.shape[0], -1)) / 255.0


	nn_params = {
	'reg': 0.01,

	import os
	import numpy as np
	from tqdm import trange
	from scipy.special import expit
	import matplotlib.pyplot as plt
	from sklearn.base import BaseEstimator
	from sklearn.metrics import accuracy_score

	class NeuralNet(BaseEstimator):
	"""

	import os
	import math
	import numpy as np
	import pandas as pd
	from PIL import Image
	from collections import namedtuple

	def preprocessing(folder):
	'''read data and obtain point cloud and camera center.'''
	filename = os.path.join(folder, 'final_project_point_cloud.csv')