Skip to content

Instantly share code, notes, and snippets.

package retail
/**
* Created by itversity on 20/03/17.
*/
/* build.sbt
name := "retail"
@dgadiraju
dgadiraju / hive_create_tables.hql
Created March 22, 2017 06:00
Make sure you save oozie-fork-join-workflow.xml as workflow.xml in a directory (let's say daily_revenue)
use dgadiraju_oozie;
drop table orders;
drop table order_items;
create external table orders (
order_id int,
order_date string,
order_customer_id int,
order_status string
) row format delimited fields terminated by ','
location '/user/dgadiraju/daily_revenue/orders';
package retail
import com.typesafe.config.ConfigFactory
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 27/03/17.
*/
object DailyRevenuePerDayPerDepartment {
package nyse
import com.typesafe.config.ConfigFactory
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 28/03/17.
*/
object TopNStocksByVolume {
package nyse
import com.typesafe.config.ConfigFactory
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 28/03/17.
*/
object TopNStocksByVolumeWithName {
-- first create database using this command
-- create database <database_name>;
-- Switch to database
-- use <database_name>;
-- create tables and load data using below commands
-- Run show tables to list the tables
create table departments (
department_id int,
department_name string
) row format delimited fields terminated by ',';
package retail
import com.typesafe.config.ConfigFactory
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.hive.HiveContext
/**
* Created by itversity on 27/03/17.
* build.sbt
package retail
import com.typesafe.config.ConfigFactory
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 01/04/17.
* problem statement:
* Get the total revenue per day for all completed and closed orders
package nyse
import com.typesafe.config.ConfigFactory
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 30/03/17.
*/
# fmp.conf: a multiplex agent to save one copy of data in HDFS and
# other copy streamed to Kafka so that data can be processed by
# streaming technologies such as Spark Streaming
# Name the components on this agent
fmp.sources = logsource
fmp.sinks = kafkasink hdfssink
fmp.channels = kafkachannel hdfschannel
# Describe/configure the source