lc0 · December 16, 2015 07:09
diff --git a/exercise3.R b/exercise3.R
 # Training: Big Data Analyses with R
 # Exercise 3

 Sys.setenv(HADOOP_CMD="/usr/bin/hadoop")
 Sys.setenv(HADOOP_STREAMING="/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.0.0-mr1-cdh4.2.0.jar")
 library(rmr2)
 input <- to.dfs(1:1000)
 out <- mapreduce(
  input = input, 
  map = function(k, v) cbind(v, v^2))
 res <- from.dfs(out)


 #
 # Wordcount example
 #

 wc.map <- 
  function(., lines) {
    keyval(
      unlist(
        strsplit(
          x = lines,
          split = " ")),
      1)}

 wc.reduce <-
  function(word, counts ) {
    keyval(word, sum(counts))}


 out <- mapreduce(
  input = "/hadoop.txt" ,
  input.format = "text",
  map = wc.map,
  reduce = wc.reduce,
  combine = T)
 res <- from.dfs(out)

 which(res$val==105)
 res$key[123]

 #
 # hdfs
 #

 Sys.setenv(HADOOP_CMD="/usr/bin/hadoop")
 library(rhdfs)
 hdfs.init()
 hdfs.ls("/tmp")
 data <- 1:1000
 filename <- "my_smart_unique_name"
 file <- hdfs.file(filename, "w")
 hdfs.write(data, file)
 hdfs.close(file)
 # where is the file?
 # list files in your home dir
 hdfs.ls(??)
 hdfs.mv(??, "/tmp/test")
 # remove the file
 hdfs.del(??)


 #
 # Advanced Map Reduce
 #
 # in your exercise folder there is a file faust.txt. 
 # This file holds "Faust: Der Tragödie erster Teil, by Johann Wolfgang von Goethe"
 # What is the mostly used word?
 #
 # can you implement matrix multiplication with Hadoop
 #
	# Training: Big Data Analyses with R
	# Exercise 3

	Sys.setenv(HADOOP_CMD="/usr/bin/hadoop")
	Sys.setenv(HADOOP_STREAMING="/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.0.0-mr1-cdh4.2.0.jar")
	library(rmr2)
	input <- to.dfs(1:1000)
	out <- mapreduce(
	input = input,
	map = function(k, v) cbind(v, v^2))
	res <- from.dfs(out)


	#
	# Wordcount example
	#

	wc.map <-
	function(., lines) {
	keyval(
	unlist(
	strsplit(
	x = lines,
	split = " ")),
	1)}

	wc.reduce <-
	function(word, counts ) {
	keyval(word, sum(counts))}


	out <- mapreduce(
	input = "/hadoop.txt" ,
	input.format = "text",
	map = wc.map,
	reduce = wc.reduce,
	combine = T)
	res <- from.dfs(out)

	which(res$val==105)
	res$key[123]

	#
	# hdfs
	#

	Sys.setenv(HADOOP_CMD="/usr/bin/hadoop")
	library(rhdfs)
	hdfs.init()
	hdfs.ls("/tmp")
	data <- 1:1000
	filename <- "my_smart_unique_name"
	file <- hdfs.file(filename, "w")
	hdfs.write(data, file)
	hdfs.close(file)
	# where is the file?
	# list files in your home dir
	hdfs.ls(??)
	hdfs.mv(??, "/tmp/test")
	# remove the file
	hdfs.del(??)


	#
	# Advanced Map Reduce
	#
	# in your exercise folder there is a file faust.txt.
	# This file holds "Faust: Der Tragödie erster Teil, by Johann Wolfgang von Goethe"
	# What is the mostly used word?
	#
	# can you implement matrix multiplication with Hadoop
	#
No results found