Ethen ethen8181

brew install mvnvm (just to install maven on mac)
make a eclipse maven project on your local (File -> New -> Project -> Maven Project). During the setup just click next until you run into a place that prompt you to set the group id = com.javamakeuse.hadoop.poc (it turns out you can name it whatever you want), artifact id = Homeworkx (name is whatever you want, e.g. Homework1)
copy the pom.xml from wolf and replace the local pom.xml (you'll see it on your left in eclipse)
go to src/main/java and start a new class (e.g. Exercise1) to do your coding
after we're done coding, navigate to where the maven project is stored (e.g. mine is stored under /Users/ethen/Documents/workspace/Homework1) and type mvn package to create the jar file
After that copy the mr-app-1.0-SNAPSHOT.jar inside the target folder to wolf.
Then ssh to wolf and run the job on wolf using hadoop jar e.g. for the wordcount example I had a folder

	library(dplyr)
	library(data.table)

	ids <- grid@model_ids

	BestGridSearch <- function( ids )
	{
	# ------------------------------------------------------------------
	# Description :
	# Pass in a list of h2o model id obtained by h2o.grid

	"""
	The set of stop words when you do this:
	from nltk.corpus import stopwords
	from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
	ENGLISH_STOP_WORDS = set( stopwords.words('english') ).union( set(ENGLISH_STOP_WORDS) )
	"""
	ENGLISH_STOP_WORDS = set([
	'a',
	'about',
	'above',

	library(MASS)
	library(ROCR)
	library(caret)
	library(glmnet)
	library(magrittr)
	library(data.table)
	library(doParallel)
	setwd('/Users/ethen/Desktop/predictive-project')

	# python3.5
	import os
	import heapq
	import joblib
	import numpy as np
	import pandas as pd
	from shapely.geometry import LineString, Point
	from sklearn.neighbors import NearestNeighbors
	from collections import defaultdict, namedtuple

	import os
	import math
	import numpy as np
	import pandas as pd
	from PIL import Image
	from collections import namedtuple

	def preprocessing(folder):
	'''read data and obtain point cloud and camera center.'''
	filename = os.path.join(folder, 'final_project_point_cloud.csv')

	import os
	import numpy as np
	from tqdm import trange
	from scipy.special import expit
	import matplotlib.pyplot as plt
	from sklearn.base import BaseEstimator
	from sklearn.metrics import accuracy_score

	class NeuralNet(BaseEstimator):
	"""

	from hw1_nnet import NeuralNet
	from keras.datasets.mnist import load_data

	(X_train, y_train), (X_test, y_test) = load_data()
	X_train = X_train.reshape((X_train.shape[0], -1)) / 255.0
	X_test = X_test.reshape((X_test.shape[0], -1)) / 255.0


	nn_params = {
	'reg': 0.01,