This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
echo "15;23;35#18;14;89" | tr '#' '\012' | awk -F';' '{for (i=1;i<=NF;i++){print NR, i, $(i)} }' |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.net.InetAddress | |
def IPv4ToLong(dottedIP: String): Long = { | |
val addrArray: Array[String] = dottedIP.split("\\.") | |
var num: Long = 0 | |
var i: Int = 0 | |
while (i < addrArray.length) { | |
val power: Int = 3 - i | |
num = num + ((addrArray(i).toInt % 256) * Math.pow(256, power)).toLong | |
i += 1 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.types.{DoubleType,LongType,ShortType, IntegerType, StructField,TimestampType, StructType,StringType,NumericType,BooleanType} | |
import org.apache.hadoop.fs.{FileSystem,Path} | |
val sqlContext = new org.apache.spark.sql.SQLContext(sc) | |
import sqlContext.implicits._ | |
def getschemametod(): StructType = { | |
StructType( | |
Seq( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.types.{DoubleType,LongType,ShortType, IntegerType, StructField,TimestampType, StructType,StringType,NumericType,BooleanType} | |
import org.apache.hadoop.fs.{FileSystem,Path} | |
val sqlContext = new org.apache.spark.sql.SQLContext(sc) | |
import sqlContext.implicits._ | |
def csvToDF(file: Path, delimiter : String,charset: String = "UTF8", useHeader: Boolean = true, schema: Option[StructType] = None) = { | |
val df = schema match { | |
case Some(schema) => sqlContext.read |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sqlloggik4_df = """ | |
SELECT * | |
, CAST(id as BIGINT) *10000 + SUM(new_session) | |
OVER (PARTITION BY id ORDER BY starttid) | |
AS session_id | |
FROM( | |
SELECT *, | |
unix_timestamp(l.starttid) - LAG(unix_timestamp(l.starttid)) OVER (PARTITION BY l.id ORDER BY l.starttid) timesincelast, | |
CASE | |
WHEN unix_timestamp(l.starttid) - LAG(unix_timestamp(l.starttid)) OVER (PARTITION BY l.id ORDER BY l.starttid) >= 30 * 60 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def checkSEPnr(pnr:String) = { | |
val chars = pnr.toList | |
val removeMinus = chars.view.filter(_ != '-') | |
val charToInt = removeMinus.view.map(_ - '0') | |
val lunsum: Int = charToInt.take(9).foldLeft( (0,2) ){ | |
(r,c) => | |
(r._1 + (c * r._2) / 10 + (c * r._2) % 10, if (r._2 == 2) 1 else 2) | |
}._1 % 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.Row | |
import breeze.linalg.DenseVector | |
import org.apache.spark.mllib.linalg.{Vector, Vectors} | |
val t_df = sqlContext.read.parquet("/user/s89718/Pivoted_cust_weekday_total_with_Clusters.parquet") | |
val tm_df = t_df.select("IP_ID","assembled") | |
val emptyVector = BDV(Array.fill(7)(0.0)) | |
val zeVector = tm_df | |
.rdd |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.Row | |
import breeze.linalg.DenseVector | |
import org.apache.spark.mllib.linalg.{Vector, Vectors} | |
val t_df = sqlContext.read.parquet("/user/_/Pivoted_cust_weekday_total_with_Clusters.parquet") | |
val tm_df = t_df.select("IP_ID","assembled") | |
val emptyVector = DenseVector (Array.fill(7)(0.0)) | |
val zeVector = tm_df | |
.rdd |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val df =Seq((1L,"03JUN2015 19.28.00"),(2L,"#$@#@#")).toDF("id","dts") | |
import org.apache.spark.sql.functions.dayofmonth | |
import org.apache.spark.sql.functions.unix_timestamp | |
df.withColumn("ts", unix_timestamp($"dts","ddMMMyy HH.mm.ss").cast("timestamp")) | |
.withColumn("dom", dayofmonth($"ts")) | |
.withColumn("month", month($"ts")) | |
.withColumn("yesar", year($"ts")) | |
.show(2,false) |