Skip to content

Instantly share code, notes, and snippets.

View oivoodoo's full-sized avatar
🏠
Working from home

Alex oivoodoo

🏠
Working from home
View GitHub Profile
@oivoodoo
oivoodoo / d3.demo.js
Created December 4, 2018 07:16
D3 chart example
// D3 Chart Example
import "../styles/index.scss";
import * as d3 from "d3";
d3.csv("/public/data/File.csv", function(d) {
return { value: +d.NumericColumnB, name: d.StringColumnA };
}).then(function(data) {
// sort data by value
data.sort(function(a, b) {
val restarter = new Restarter(ssc, CommandLineArgs.values.env)
restarter.setup()
restarter.await()
puts('[spark] remove marker file by ensuring to trigger graceful stop spark-etl jobs')
`hadoop fs -rm -r /tmp/datalake.marker.*`
while true
app_id = `yarn application -list | grep datalake.Main | awk -F " " '{print $1}'`.strip
status = `yarn application -list | grep datalake.Main | awk -F " " '{print $6}'`.strip
puts("[spark] application status: #{status}")
if status == "ACCEPTED"
puts("[spark] killing #{app_id}")
@oivoodoo
oivoodoo / log4j.properties
Created November 1, 2018 09:43
log4j.properties
# Spark Streaming Logging Configuration
# See also: http://spark.apache.org/docs/2.0.2/running-on-yarn.html#debugging-your-application
log4j.rootLogger=INFO, stdout, stderr
# Write all logs to standard Spark stderr file
log4j.appender.stderr=org.apache.log4j.RollingFileAppender
log4j.appender.stderr.file=${spark.yarn.app.container.log.dir}/stderr
log4j.appender.stderr.threshold=ERROR
log4j.appender.stderr.RollingPolicy=org.apache.log4j.rolling.TimeBasedRollingPolicy
<hours> hours TASK-1
<minutes> minutes TASK-2
<days> days TASK-3
@oivoodoo
oivoodoo / export.py
Created August 5, 2018 04:40
Export from Wunderlist
import wunderpy2
from dateutil import parser
from datetime import date
from csv import writer
ACCESS_TOKEN = '<WUNDERLIST_ACCESS_TOKEN>'
CLIENT_ID = '<WUNDERLIST_CLIENT_ID>'
@oivoodoo
oivoodoo / SparkCopyPostgres.scala
Created April 9, 2018 12:05 — forked from longcao/SparkCopyPostgres.scala
COPY Spark DataFrame rows to PostgreSQL (via JDBC)
import java.io.InputStream
import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils
import org.apache.spark.sql.{ DataFrame, Row }
import org.postgresql.copy.CopyManager
import org.postgresql.core.BaseConnection
val jdbcUrl = s"jdbc:postgresql://..." // db credentials elided
val connectionProperties = {
VERSION=0.1
DIR=`pwd`
build:
docker run --rm -v ${DIR}:/src -w /src node:6-alpine ash -c "npm install" && docker run --rm -v ${DIR}:/src -w /src node:6-alpine ash -c "NODE_ENV=prod npm run build" && (docker rm scheduler-ui || true) && (docker build -t scheduler-ui:${VERSION} .)
.PHONY: build
run:
docker run -p 5000:80 -d scheduler-ui:${VERSION}
#!/bin/sh
APPLICATION_ID=$2
CLUSTER_ID=$1
ENV=$3
(mkdir $CLUSTER_ID || true) && cd $CLUSTER_ID
echo "aws s3 cp s3://bworks-bi-emr-logs/$ENV/spark/$CLUSTER_ID/containers/$APPLICATION_ID/ . --recursive --profile blastworks"
aws s3 cp s3://bworks-bi-emr-logs/$ENV/spark/$CLUSTER_ID/containers/$APPLICATION_ID/ . --recursive --profile blastworks
from pykafka import KafkaClient
client = KafkaClient(hosts="172.32.5.12:9092,172.32.15.116:9092,172.32.12.49:9092")
import logging
kafkalogger = logging.getLogger('pykafka')
kafkalogger.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
console_handler.setLevel(logging.DEBUG)