Skip to content

Instantly share code, notes, and snippets.

View oluies's full-sized avatar

Örjan Angré (Lundberg) oluies

  • Sweden
  • 16:13 (UTC +02:00)
  • X @oluies
View GitHub Profile
package com.combient.sparkjob.tedsds
/**
* Created by olu on 09/03/16.
*/
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
> $ twurl /1.1/lists/members.json --data 'count=700&owner_screen_name=Europarl_EN&slug=all-meps-on-twitter' | jq '.users[].id' > meps.txt
fun pascal(c: Int, r: Int): Int {
require(c>=0,{"column must be larger than zero"})
require(r>=0,{"row must be larger than zero"})
return when {
r == 0 -> 1
r == 1 -> 1
c == 0 -> 1
c == r -> 1
else -> pascal(c - 1, r - 1) + pascal(c, r - 1)
import com.typesafe.config.ConfigFactory
import twitter4j.{RateLimitStatus, TwitterFactory}
import twitter4j.conf.ConfigurationBuilder
import java.util.{Timer, TimerTask}
import scala.collection.JavaConverters._
import scala.collection.mutable
object AllListContentFollowers {
def main(args : Array[String]): Unit = {
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.plotly as py
from plotly.graph_objs import *
import pandas as pd
import requests
requests.packages.urllib3.disable_warnings()
init_notebook_mode(connected=True)
@oluies
oluies / spark_unix_timestamp.scala
Created August 19, 2016 11:57
Spark: transform timestamp text to timestamp and extract some parts
val df =Seq((1L,"03JUN2015 19.28.00"),(2L,"#$@#@#")).toDF("id","dts")
import org.apache.spark.sql.functions.dayofmonth
import org.apache.spark.sql.functions.unix_timestamp
df.withColumn("ts", unix_timestamp($"dts","ddMMMyy HH.mm.ss").cast("timestamp"))
.withColumn("dom", dayofmonth($"ts"))
.withColumn("month", month($"ts"))
.withColumn("yesar", year($"ts"))
.show(2,false)
import org.apache.spark.sql.Row
import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.{Vector, Vectors}
val t_df = sqlContext.read.parquet("/user/_/Pivoted_cust_weekday_total_with_Clusters.parquet")
val tm_df = t_df.select("IP_ID","assembled")
val emptyVector = DenseVector (Array.fill(7)(0.0))
val zeVector = tm_df
.rdd
@oluies
oluies / vectorsum.scala
Created September 12, 2016 13:04
summarize a vector in spark
import org.apache.spark.sql.Row
import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.{Vector, Vectors}
val t_df = sqlContext.read.parquet("/user/s89718/Pivoted_cust_weekday_total_with_Clusters.parquet")
val tm_df = t_df.select("IP_ID","assembled")
val emptyVector = BDV(Array.fill(7)(0.0))
val zeVector = tm_df
.rdd
def checkSEPnr(pnr:String) = {
val chars = pnr.toList
val removeMinus = chars.view.filter(_ != '-')
val charToInt = removeMinus.view.map(_ - '0')
val lunsum: Int = charToInt.take(9).foldLeft( (0,2) ){
(r,c) =>
(r._1 + (c * r._2) / 10 + (c * r._2) % 10, if (r._2 == 2) 1 else 2)
}._1 % 10