Initial setup to use Spark and TensorFlow Distributed using EMR

This script requires an Amazon EMR cluster with one master and three nodes:

library(sparklyr)
sc <- spark_connect(master = "yarn", spark_home = "/usr/lib/spark/", config = list(
    spark.dynamicAllocation.enabled = FALSE,
    `sparklyr.shell.executor-cores` = 8,
    `sparklyr.shell.num-executors` = 3,
    sparklyr.apply.env.WORKON_HOME = "/tmp/.virtualenvs"))

sdf_len(sc, 3, repartition = 3) %>%
    spark_apply(function(df, barrier) { tryCatch({
        library(tensorflow)
        install_tensorflow()
        
        nodes <- sort(as.character(barrier$address))
        cluster <- tf$train$ClusterSpec(list(ps = list(nodes[1]), worker = as.list(nodes[-1])))
        
        server <- tf$distribute$Server(cluster, job_name = "ps")
        
        as.character(tf$constant("Hello World")) }, error = function(e) e$message)
    }, barrier = TRUE, columns = c(address = "character")) %>%
    collect()

References:

https://github.com/tensorflow/examples/blob/master/community/en/docs/deploy/distributed.md
https://stackoverflow.com/questions/39666845/how-does-tf-train-replica-device-setter-work
https://stackoverflow.com/questions/41166681/what-does-global-step-mean-in-tensorflow
https://stackoverflow.com/questions/33919948/how-to-set-adaptive-learning-rate-for-gradientdescentoptimizer

javierluraschi/spark-tensorflow-barrier-emr-setup.md