Created
January 19, 2019 04:02
-
-
Save cozek/264d0be08b372704638455ce8402a707 to your computer and use it in GitHub Desktop.
Python 3 script for easily running hadoop mapreduce programs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
#DISCLAMER: Provided with no warranty, you are responsible for whatever may befall you | |
# or your property as a result of using this script. | |
#You implicitly agree to this by running this script. | |
#Feel free to improve, modify and distribute | |
import os | |
import sys | |
import subprocess | |
input_directory = " /user/cosec/words.txt " #file must already exist in your hdfs | |
output_directory = " /user/cosec/op3" #location in HDFS where you want your output in | |
program_name = " WordCount " #name of your .java file ? | |
jar_file = " cw.jar " #the jar package you want to create | |
javaProgram = " WordCount.java " #your program name here, program should be compatible with your "hadoop version" | |
cleanJar = "rm -f "+ jar_file + " rm -f *.class *.jar" | |
compile_java = "hadoop com.sun.tools.javac.Main " + javaProgram | |
create_jar = "jar cf " + jar_file + " *.class " | |
hadoop = "hadoop jar " + jar_file + program_name + input_directory + output_directory | |
## what it looks like. | |
# hadoop = "hadoop jar cw.jar WordCount /user/cosec/words.txt /user/cosec/op3" | |
#env variable JAVA_HOME differ for for each ready-made hadoop development platforms. set it accordingly | |
#easy way is to find it is type /user/java/ and press TAB and let the shell fill it up for you | |
#the default for cloudera cdh 5.3 is setup below | |
env = """export JAVA_HOME=/usr/java/jdk1.7.0_67-cloudera/ | |
export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar""" | |
cleanHDFS= "hdfs dfs -rm -r " + output_directory | |
if sys.argv[1] == 'setenv': | |
os.system(env) | |
if sys.argv[1] == 'make': | |
os.system(compile_java) | |
os.system(create_jar) | |
if sys.argv[1] == 'cleanDFS': | |
subprocess.call(cleanHDFS,shell = True) | |
if sys.argv[1] == 'cleanJar': | |
subprocess.call(cleanJar,shell = True) | |
if sys.argv[1] == 'run': | |
subprocess.call(hadoop,shell = True) | |
if sys.argv[1] =='getoutput': | |
subprocess.call("hdfs dfs -cat " + output_directory + "/part*",shell =True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment