RadLikeWhoa · March 13, 2017 14:28
diff --git a/persons.notebook b/persons.notebook
 val inputFile = "/Users/Sacha/Google Drive/FHNW/2017 FS/ivis (6iEng)/02-data-exploration/data/persons-raw.csv"
 val data = spark.read.format("csv").option("header", "true").option("inferSchema", "true").csv(inputFile)
 data.createOrReplaceTempView("persons")
 sqlContext.sql("SELECT `Index`, CONCAT(SUBSTRING(`First Name`, 0, 1), LOWER(SUBSTRING(`First Name`, 2))) AS `First Name`, `Last Name`, regexp_replace(`Date Of Birth`, '^.{2}[.].{2}[.].{2}$', CONCAT(SUBSTRING(`Date Of Birth`, 0, 6), '19', SUBSTRING(`Date Of Birth`, 7, 8))) AS `Date Of Birth`, `Height`, `Weight`, (CASE WHEN `Small Shirt Size` = 'X' THEN 'S' WHEN `Medium Shirt Size` = 'X' THEN 'M' WHEN `Large Shirt Size` = 'X' THEN 'L' ELSE null END) AS Shirtsize, `Hair Color` FROM persons WHERE `Index` IS NOT NULL").show(1000)
	val inputFile = "/Users/Sacha/Google Drive/FHNW/2017 FS/ivis (6iEng)/02-data-exploration/data/persons-raw.csv"
	val data = spark.read.format("csv").option("header", "true").option("inferSchema", "true").csv(inputFile)
	data.createOrReplaceTempView("persons")
	sqlContext.sql("SELECT `Index`, CONCAT(SUBSTRING(`First Name`, 0, 1), LOWER(SUBSTRING(`First Name`, 2))) AS `First Name`, `Last Name`, regexp_replace(`Date Of Birth`, '^.{2}[.].{2}[.].{2}$', CONCAT(SUBSTRING(`Date Of Birth`, 0, 6), '19', SUBSTRING(`Date Of Birth`, 7, 8))) AS `Date Of Birth`, `Height`, `Weight`, (CASE WHEN `Small Shirt Size` = 'X' THEN 'S' WHEN `Medium Shirt Size` = 'X' THEN 'M' WHEN `Large Shirt Size` = 'X' THEN 'L' ELSE null END) AS Shirtsize, `Hair Color` FROM persons WHERE `Index` IS NOT NULL").show(1000)