akhld · October 28, 2016 04:09
diff --git a/AppendPartionedBy.scala b/AppendPartionedBy.scala
 val storage = "hdfs://nameservice1/user/plutus/data/kmeans_prediction_par_"
    val penInputs = (1 to 30).map(x =>{
      val date = DateTime.now().minusDays(x).toString("yyyy-MM-dd")
      (date, storage + date)
    }).filter(prediction_storage => {
      HdfsTools.checkIfFolderExists(new Path(prediction_storage._2))
    })

    penInputs.foreach(println)

    val parquets = penInputs.map(path =>{
      (path._1, hiveContext.read.parquet(path._2))
    })

    val dfs = parquets.map(p => {

      p._2.withColumn("datadate", lit(p._1))

    })

    dfs(0).take(10).foreach(println)

    dfs(0).printSchema()

    dfs.foreach(df => {
      df.insertIntoTable(
        table = "plutus.pen_data",
        overwrite = true,
        partitionBy = Seq("datadate"),
        createTable = true,
        tableStore = ParquetStore(),
        distributeBy = NoDistributeBy
      )
    })
	val storage = "hdfs://nameservice1/user/plutus/data/kmeans_prediction_par_"
	val penInputs = (1 to 30).map(x =>{
	val date = DateTime.now().minusDays(x).toString("yyyy-MM-dd")
	(date, storage + date)
	}).filter(prediction_storage => {
	HdfsTools.checkIfFolderExists(new Path(prediction_storage._2))
	})

	penInputs.foreach(println)

	val parquets = penInputs.map(path =>{
	(path._1, hiveContext.read.parquet(path._2))
	})

	val dfs = parquets.map(p => {

	p._2.withColumn("datadate", lit(p._1))

	})

	dfs(0).take(10).foreach(println)

	dfs(0).printSchema()

	dfs.foreach(df => {
	df.insertIntoTable(
	table = "plutus.pen_data",
	overwrite = true,
	partitionBy = Seq("datadate"),
	createTable = true,
	tableStore = ParquetStore(),
	distributeBy = NoDistributeBy
	)
	})