oluies’s gists

oluies / plotly_2D_density.py

Created August 19, 2016 11:49

	from plotly import __version__
	from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
	import plotly.plotly as py
	from plotly.graph_objs import *
	import pandas as pd
	import requests
	requests.packages.urllib3.disable_warnings()

	init_notebook_mode(connected=True)

oluies / AllListContentFollowers.scala

Created August 1, 2016 19:35

	import com.typesafe.config.ConfigFactory
	import twitter4j.{RateLimitStatus, TwitterFactory}
	import twitter4j.conf.ConfigurationBuilder
	import java.util.{Timer, TimerTask}

	import scala.collection.JavaConverters._
	import scala.collection.mutable

	object AllListContentFollowers {
	def main(args : Array[String]): Unit = {

oluies / pascal.kt

Created August 1, 2016 19:27

	fun pascal(c: Int, r: Int): Int {
	require(c>=0,{"column must be larger than zero"})
	require(r>=0,{"row must be larger than zero"})

	return when {
	r == 0 -> 1
	r == 1 -> 1
	c == 0 -> 1
	c == r -> 1
	else -> pascal(c - 1, r - 1) + pascal(c, r - 1)

oluies / twurl_allmeps

Created July 12, 2016 20:28

> $ twurl /1.1/lists/members.json --data 'count=700&owner_screen_name=Europarl_EN&slug=all-meps-on-twitter' | jq '.users[].id' > meps.txt

oluies / RunRandomForest2.scala

Created April 8, 2016 09:53 — forked from anonymous/RunRandomForest2.scala

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*

oluies / PrepareData.scala

Created April 8, 2016 09:52 — forked from anonymous/PrepareData.scala

	package com.combient.sparkjob.tedsds

	/**
	* Created by olu on 09/03/16.
	*/

	import org.apache.spark.{SparkContext, SparkConf}
	import org.apache.spark.sql.hive.HiveContext
	import org.apache.spark.sql.expressions.Window
	import org.apache.spark.sql.functions._

oluies / coalesce

Created March 12, 2016 07:27

	//read each of your input directory as a dataframe and union them and repartition it to the # of files you want and dump it back
	val dfSeq = MutableList[DataFrame]()

	sourceDirsToConsolidate.map(dir => {
	val df = sqlContext.parquetFile(dir)
	dfSeq += df
	})

	val masterDf = dfSeq.reduce((df1, df2) => df1.unionAll(df2))
	masterDf.coalesce(numOutputFiles).write.mode(saveMode).parquet(destDir)

oluies / application.log

Created March 10, 2016 07:56

	2016-02-21 22:16:19,395 - [INFO] - from play in main
	Application started (Prod)

	2016-02-21 22:16:19,522 - [INFO] - from play in main
	Listening for HTTP on /0:0:0:0:0:0:0:0:9000

	2016-02-21 22:19:12,913 - [DEBUG] - from application in New I/O worker #1
	Notebooks directory in the config is referring ./notebooks. Does it exist? true

	2016-02-21 22:19:13,037 - [INFO] - from application in New I/O worker #1

oluies / spark-notebook

Last active March 10, 2016 07:49

	#!/usr/bin/env bash

	### ------------------------------- ###
	### Helper methods for BASH scripts ###
	### ------------------------------- ###

	die() {
	echo "$@" 1>&2
	exit 1
	}

oluies / application.conf

Last active March 10, 2016 07:46

	include "application"

	manager {
	notebooks {
	dir = "/usr/share/spark-notebook/notebooks"
	override {
	sparkConf = {
	spark.driver.extraJavaOptions: "-Dhdp.version=2.4.0.0-169",
	spark.yarn.am.extraJavaOptions: "-Dhdp.version=2.4.0.0-169"
	}

Örjan Lundberg oluies