Skip to content

Instantly share code, notes, and snippets.

View rickyngk's full-sized avatar

Ricky.ngk rickyngk

  • Vietnamworks
  • Vietnam
View GitHub Profile
/* SimpleApp.scala */
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
object SimpleApp {
def textSearch(textData: RDD[String]) {
val cases = textData.filter(line => line.contains("Spark"))
println(cases.count());
@rickyngk
rickyngk / makefile
Created November 17, 2015 17:25
Make file for simple Spark Scala Demo
build:
sbt package
run:
~/spark-1.5.2/bin/spark-submit --class "SimpleApp" --master local[4] target/scala-2.11/simple-project_2.11-1.0.jar
@rickyngk
rickyngk / core-site.xml
Created November 19, 2015 17:45
Hadoop core-site.xml sample
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
@rickyngk
rickyngk / yarn-site.xml
Created November 19, 2015 17:50
Hadoop yarn-site.xml sample
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@rickyngk
rickyngk / mapred-site.xml
Created November 19, 2015 17:53
Hadoop mapred-site.xml sample
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
@rickyngk
rickyngk / hdfs-site.xml
Created November 19, 2015 17:55
Hadoop hdfs-site.xml sample
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
@rickyngk
rickyngk / SimpleApp.scala
Last active November 29, 2016 06:35
SimpleApp.scala with hadoop
/* SimpleApp.scala */
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
object SimpleApp {
val hadoop_uri = "hdfs://172.18.2.108:9000"
def textSearch(textData: RDD[String]) {
@rickyngk
rickyngk / simple.sbt
Created November 21, 2015 08:17
Day 04 simple.sbt
name := "Simple Project"
version := "1.0"
scalaVersion := "2.11.7"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "1.5.2",
"org.apache.hadoop" % "hadoop-client" % "2.6.0"
)
@rickyngk
rickyngk / simple.sbt
Created November 21, 2015 08:23
simple.sbt with mllib
name := "Simple Project"
version := "1.0"
scalaVersion := "2.11.7"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "1.5.2",
"org.apache.hadoop" % "hadoop-client" % "2.6.0",
"org.apache.spark" % "spark-mllib_2.11" % "1.5.2"
)
@rickyngk
rickyngk / SimpleApp.scala
Last active November 21, 2015 08:30
SimpleApp.scala for first Spark Mllib demo
/* SimpleApp.scala */
import org.apache.spark.mllib.linalg.{Vector, Vectors}
object SimpleApp {
def main(args: Array[String]) {
// Create a dense vector (1.0, 0.0, 3.0).
val dv: Vector = Vectors.dense(1.0, 0.0, 3.0)
// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries.
val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries.