Skip to content

Instantly share code, notes, and snippets.

View oluies's full-sized avatar

Örjan Angré (Lundberg) oluies

  • Sweden
  • 21:12 (UTC +02:00)
  • X @oluies
View GitHub Profile
sqlloggik4_df = """
SELECT *
, CAST(id as BIGINT) *10000 + SUM(new_session)
OVER (PARTITION BY id ORDER BY starttid)
AS session_id
FROM(
SELECT *,
unix_timestamp(l.starttid) - LAG(unix_timestamp(l.starttid)) OVER (PARTITION BY l.id ORDER BY l.starttid) timesincelast,
CASE
WHEN unix_timestamp(l.starttid) - LAG(unix_timestamp(l.starttid)) OVER (PARTITION BY l.id ORDER BY l.starttid) >= 30 * 60
import org.apache.spark.sql.types.{DoubleType,LongType,ShortType, IntegerType, StructField,TimestampType, StructType,StringType,NumericType,BooleanType}
import org.apache.hadoop.fs.{FileSystem,Path}
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
def csvToDF(file: Path, delimiter : String,charset: String = "UTF8", useHeader: Boolean = true, schema: Option[StructType] = None) = {
val df = schema match {
case Some(schema) => sqlContext.read
import org.apache.spark.sql.types.{DoubleType,LongType,ShortType, IntegerType, StructField,TimestampType, StructType,StringType,NumericType,BooleanType}
import org.apache.hadoop.fs.{FileSystem,Path}
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
def getschemametod(): StructType = {
StructType(
Seq(
import java.net.InetAddress
def IPv4ToLong(dottedIP: String): Long = {
val addrArray: Array[String] = dottedIP.split("\\.")
var num: Long = 0
var i: Int = 0
while (i < addrArray.length) {
val power: Int = 3 - i
num = num + ((addrArray(i).toInt % 256) * Math.pow(256, power)).toLong
i += 1
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
echo "15;23;35#18;14;89" | tr '#' '\012' | awk -F';' '{for (i=1;i<=NF;i++){print NR, i, $(i)} }'
# Dictionary to map Spark data types to Hive
d = {'StringType':'STRING', 'DoubleType':'DOUBLE', 'IntegerType': 'INT', 'DateType':'DATE', 'LongType': 'BIGINT'}
# Convert to Hive schema
schemastring = ', '.join([field.name + ' ' + d[str(field.dataType)] for field in df.schema.fields])
hivetablename='mortgage_all'
output_path='path'
filename='filename'
# Create Hive table
ddl = """CREATE EXTERNAL TABLE IF NOT EXISTS %s(%s) STORED AS ORC LOCATION '%s'""" % (hivetablename, schemastring, output_path + filename)
@oluies
oluies / exclude_targetdirs.sh
Created January 21, 2017 18:30 — forked from viktorklang/exclude_targetdirs.sh
Adds all your sbt target dirs as path excludes for Time Machine
#WARNING: Use at your own risk. No warranties expressed or implied. YMMV. Drive responsibly. Eat healthy.
#First, `cd` into the parent dir for all of your `sbt`/`maven` projects (I assume you have one of those)
find "$(cd ..; pwd)" -type d -name "target" -exec sudo tmutil addexclusion -p {} +
@oluies
oluies / sqlserver_schema_to_spark.sql
Created February 20, 2017 14:07
create spark StructFields from a SQL Server schema Raw
use [database_ONE_two]
select 'val ' + so.name + '_sch = StructType(Seq( ' + o.list + '))'
from sysobjects so
cross apply
(SELECT
' StructField("'+column_name+'", ' +
case data_type
when 'char' then 'StringType'
when 'varchar' then 'StringType'
when 'nvarchar' then 'StringType'
@oluies
oluies / after.csv
Created August 8, 2017 12:34 — forked from bfritz/after.csv
rapture-csv in Ammonite REPL
territory_id first_name last_name email employee_id
XMOWSM54 Peter Alexander [email protected] E00QTOF
XMRNBM47 Samuel Lopez [email protected] E00UBFA
XMOWMF87 Elizabeth Stone [email protected] E00WDYK
XMZWPW22 William Carroll [email protected] E00VDYQ
XMOWRW46 Carolyn Little [email protected] E00HUPR
XMZNDX26 Marilyn Robinson [email protected] E00ZJGS
XMZNAI68 Christopher Rogers [email protected] E00DCHF
XMONCD74 Anthony Allen [email protected] E00ACEQ
XMRNMD81 Martin Baker [email protected] E00DKRZ