cozek · January 19, 2019 04:02
diff --git a/build.py b/build.py
 #!/usr/bin/env python3

 #DISCLAMER: Provided with no warranty, you are responsible for whatever may befall you
 # or your property as a result of using this script.
 #You implicitly agree to this by running this script.

 #Feel free to improve, modify and distribute

 import os
 import sys
 import subprocess

 input_directory = " /user/cosec/words.txt " #file must already exist in your hdfs
 output_directory = " /user/cosec/op3"  #location in HDFS where you want your output in
 program_name = " WordCount " #name of your .java file ?
 jar_file = " cw.jar " #the jar package you want to create
 javaProgram = " WordCount.java " #your program name here, program should be compatible with your "hadoop version"

 cleanJar = "rm -f "+ jar_file + " rm -f *.class *.jar"

 compile_java = "hadoop com.sun.tools.javac.Main " + javaProgram 
 create_jar = "jar cf " + jar_file + " *.class "

 hadoop = "hadoop jar " + jar_file + program_name + input_directory + output_directory

 ## what it looks like. 
 # hadoop = "hadoop jar cw.jar WordCount /user/cosec/words.txt /user/cosec/op3"


 #env variable JAVA_HOME differ for for each ready-made hadoop development platforms. set it accordingly
 #easy way is to find it is type /user/java/ and press TAB and let the shell fill it up for you
 #the default for cloudera cdh 5.3 is setup below

 env = """export JAVA_HOME=/usr/java/jdk1.7.0_67-cloudera/
 export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar"""

 cleanHDFS= "hdfs dfs -rm -r " + output_directory

 if sys.argv[1] == 'setenv':
    os.system(env)

 if sys.argv[1] == 'make':
    os.system(compile_java)
    os.system(create_jar)

 if sys.argv[1] == 'cleanDFS':
    subprocess.call(cleanHDFS,shell = True)

 if sys.argv[1] == 'cleanJar':
    subprocess.call(cleanJar,shell = True)

 if sys.argv[1] == 'run':
    subprocess.call(hadoop,shell = True)

 if sys.argv[1] =='getoutput':
    subprocess.call("hdfs dfs -cat " + output_directory + "/part*",shell =True)
	#!/usr/bin/env python3

	#DISCLAMER: Provided with no warranty, you are responsible for whatever may befall you
	# or your property as a result of using this script.
	#You implicitly agree to this by running this script.

	#Feel free to improve, modify and distribute

	import os
	import sys
	import subprocess

	input_directory = " /user/cosec/words.txt " #file must already exist in your hdfs
	output_directory = " /user/cosec/op3" #location in HDFS where you want your output in
	program_name = " WordCount " #name of your .java file ?
	jar_file = " cw.jar " #the jar package you want to create
	javaProgram = " WordCount.java " #your program name here, program should be compatible with your "hadoop version"

	cleanJar = "rm -f "+ jar_file + " rm -f .class .jar"

	compile_java = "hadoop com.sun.tools.javac.Main " + javaProgram
	create_jar = "jar cf " + jar_file + " *.class "

	hadoop = "hadoop jar " + jar_file + program_name + input_directory + output_directory

	## what it looks like.
	# hadoop = "hadoop jar cw.jar WordCount /user/cosec/words.txt /user/cosec/op3"


	#env variable JAVA_HOME differ for for each ready-made hadoop development platforms. set it accordingly
	#easy way is to find it is type /user/java/ and press TAB and let the shell fill it up for you
	#the default for cloudera cdh 5.3 is setup below

	env = """export JAVA_HOME=/usr/java/jdk1.7.0_67-cloudera/
	export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar"""

	cleanHDFS= "hdfs dfs -rm -r " + output_directory

	if sys.argv[1] == 'setenv':
	os.system(env)

	if sys.argv[1] == 'make':
	os.system(compile_java)
	os.system(create_jar)

	if sys.argv[1] == 'cleanDFS':
	subprocess.call(cleanHDFS,shell = True)

	if sys.argv[1] == 'cleanJar':
	subprocess.call(cleanJar,shell = True)

	if sys.argv[1] == 'run':
	subprocess.call(hadoop,shell = True)

	if sys.argv[1] =='getoutput':
	subprocess.call("hdfs dfs -cat " + output_directory + "/part*",shell =True)