Skip to content

Instantly share code, notes, and snippets.

View oluies's full-sized avatar

Örjan Angré (Lundberg) oluies

  • Sweden
  • 18:16 (UTC +02:00)
  • X @oluies
View GitHub Profile
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.plotly as py
from plotly.graph_objs import *
import pandas as pd
import requests
requests.packages.urllib3.disable_warnings()
init_notebook_mode(connected=True)
import com.typesafe.config.ConfigFactory
import twitter4j.{RateLimitStatus, TwitterFactory}
import twitter4j.conf.ConfigurationBuilder
import java.util.{Timer, TimerTask}
import scala.collection.JavaConverters._
import scala.collection.mutable
object AllListContentFollowers {
def main(args : Array[String]): Unit = {
fun pascal(c: Int, r: Int): Int {
require(c>=0,{"column must be larger than zero"})
require(r>=0,{"row must be larger than zero"})
return when {
r == 0 -> 1
r == 1 -> 1
c == 0 -> 1
c == r -> 1
else -> pascal(c - 1, r - 1) + pascal(c, r - 1)
> $ twurl /1.1/lists/members.json --data 'count=700&owner_screen_name=Europarl_EN&slug=all-meps-on-twitter' | jq '.users[].id' > meps.txt
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
package com.combient.sparkjob.tedsds
/**
* Created by olu on 09/03/16.
*/
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
//read each of your input directory as a dataframe and union them and repartition it to the # of files you want and dump it back
val dfSeq = MutableList[DataFrame]()
sourceDirsToConsolidate.map(dir => {
val df = sqlContext.parquetFile(dir)
dfSeq += df
})
val masterDf = dfSeq.reduce((df1, df2) => df1.unionAll(df2))
masterDf.coalesce(numOutputFiles).write.mode(saveMode).parquet(destDir)
2016-02-21 22:16:19,395 - [INFO] - from play in main
Application started (Prod)
2016-02-21 22:16:19,522 - [INFO] - from play in main
Listening for HTTP on /0:0:0:0:0:0:0:0:9000
2016-02-21 22:19:12,913 - [DEBUG] - from application in New I/O worker #1
Notebooks directory in the config is referring ./notebooks. Does it exist? true
2016-02-21 22:19:13,037 - [INFO] - from application in New I/O worker #1
#!/usr/bin/env bash
### ------------------------------- ###
### Helper methods for BASH scripts ###
### ------------------------------- ###
die() {
echo "$@" 1>&2
exit 1
}
include "application"
manager {
notebooks {
dir = "/usr/share/spark-notebook/notebooks"
override {
sparkConf = {
spark.driver.extraJavaOptions: "-Dhdp.version=2.4.0.0-169",
spark.yarn.am.extraJavaOptions: "-Dhdp.version=2.4.0.0-169"
}