Jorge belenaj

yarn application -kill application_1428487296152_25597

from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
import sys

sc = SparkContext('local')
spark = SparkSession(sc)

Exercises

Select all "Harry Potter" books
Book with more pages
Top 5 authors with more written books (assume author in first position in the array, "key" field) (assuming each row is a different book)
Top 5 Genres with more books

	// ...

	def endDate = new Date().clearTime() // today
	def startDate = endDate - 30
	def newDateParsed

	startDate.upto(endDate) {
	newDateParsed = it.format("yyyy-MM-dd")
	println(newDateParsed)

	// ...

	def DAYS_BACK = 30
	def iterDate = new Date() - DAYS_BACK
	def newDateParse
	for (i=0; i <DAYS_BACK; i++) {
	iterDate = iterDate + 1
	newDateParse = iterDate.format("yyyy-MM-dd")

	stage("newDateParsed ${newDateParse}") {

	FROM exasol/docker-db:latest

	ENV EXA_BUCKET_PATH="/exa/data/bucketfs/bfsdefault/default"
	ENV CLOUD_STORAGE_VERSION="0.6.0"
	ENV JAR_FILENAME="cloud-storage-etl-udfs-$CLOUD_STORAGE_VERSION.jar"

	ADD https://github.com/exasol/cloud-storage-etl-udfs/releases/download/v$CLOUD_STORAGE_VERSION/$JAR_FILENAME $EXA_BUCKET_PATH/$JAR_FILENAME
	RUN chmod 775 $EXA_BUCKET_PATH/$JAR_FILENAME
	#RUN chown exadefusr:exausers $EXA_BUCKET_PATH/$JAR_FILENAME