oluies’s gists

oluies / StatsCount.scala

Last active December 18, 2017 09:26

HBase extract kv._2 -> map( column family name, map ( column qualifier name, value ) )

	import java.time.Instant
	import java.{lang, util}

	import org.apache.hadoop.conf.Configuration
	import org.apache.hadoop.hbase.{Cell, HBaseConfiguration}
	import org.apache.hadoop.hbase.client.Result
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable
	import org.apache.hadoop.hbase.mapreduce.TableInputFormat
	import org.apache.hadoop.hbase.util.Bytes
	import org.apache.hadoop.hbase.util.Bytes.toBytes

oluies / spark-read-from-csv-string.scala

Created December 1, 2017 08:32

spark-read-from-string


	val csv:String = """
	\|rowKeyLineage \|eventTimestamp\|dataEventId\|dataDictId \|sourceKey \|transport\|eventType \|recordId\|processingTime\|application\|entity \|product\|lifecycle \|lineDate \|dayOfMonth\|dayOfYear\|
	\|1488327264000:2001200:MUREX:DK_LEI_6000:BOND:CONTRACT \|1488327264000 \|2001200 \|MUREXCONTRACT_DELTA \|20170301MUREXCONTRACT.csv \|FTP \|DISTRIBUTE\| \|1512074938523 \|MUREX \|DK_LEI_6000\|BOND \|CONTRACT \|2017-03-01 01:14:24\|1 \|60 \|
	\|1488327264000:2001200:MUREX:DK_LEI_6100:REPO:CONTRACT \|1488327264000 \|2001200 \|MUREXCONTRACT_DELTA \|20170301MUREXCONTRACT.csv \|FTP \|DISTRIBUTE\| \|1512074938523 \|MUREX \|DK_LEI_6100\|REPO \|CONTRACT \|2017-03-01 01:14:24\|1 \|60 \|
	\|1488327264000:2001200:MUREX:DK_LEI_6200:BOND:CONTRACT \|1488327264000 \|2001200 \|MURE

oluies / beam_elastic.jva

Created November 1, 2017 13:44

	public static void main(String[] args) throws IOException {


	PipelineOptionsFactory.register(PipelineCmdLineOptions.class);
	PipelineCmdLineOptions options =
	PipelineOptionsFactory.fromArgs(args).as(PipelineCmdLineOptions.class);


	//Config config = ConfigFactory.parseFile(new File(args[0]));
	//LOG.info(config.root().render(ConfigRenderOptions.concise().setFormatted(true)));

oluies / brews.txt

Created September 14, 2017 19:34 — forked from anonymous/brews.txt

oluies / gist:babe7f169fea228a5c564a9831c6ca68

Created September 1, 2017 19:41


	test("interpolateColor. Above 60") {
	assert(Visualization.interpolateColor(Color.threasholds, 60.001) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 60.01) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 60.1) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 61) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 62) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 63) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 100) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 1000) === Color(255, 255, 255))

oluies / TemperatureReading

Created August 9, 2017 20:18

	case class Station(STN: Option[Int], WBAN: Option[Int], GPSLocation: Location)

	case class TemperatureReading(stn: String,
	wban: String,
	month: Int,
	day: Int,
	temperature: Double)


	case class Location(lat: Double, lon: Double){

oluies / after.csv

Created August 8, 2017 12:34 — forked from bfritz/after.csv

rapture-csv in Ammonite REPL

oluies / sqlserver_schema_to_spark.sql

Created February 20, 2017 14:07

create spark StructFields from a SQL Server schema Raw

	use [database_ONE_two]
	select 'val ' + so.name + '_sch = StructType(Seq( ' + o.list + '))'
	from sysobjects so
	cross apply
	(SELECT
	' StructField("'+column_name+'", ' +
	case data_type
	when 'char' then 'StringType'
	when 'varchar' then 'StringType'
	when 'nvarchar' then 'StringType'

oluies / exclude_targetdirs.sh

Created January 21, 2017 18:30 — forked from viktorklang/exclude_targetdirs.sh

Adds all your sbt target dirs as path excludes for Time Machine

	#WARNING: Use at your own risk. No warranties expressed or implied. YMMV. Drive responsibly. Eat healthy.

	#First, `cd` into the parent dir for all of your `sbt`/`maven` projects (I assume you have one of those)
	find "$(cd ..; pwd)" -type d -name "target" -exec sudo tmutil addexclusion -p {} +

oluies / SpakDFtoHiveDDL.py

Created January 3, 2017 08:19

	# Dictionary to map Spark data types to Hive
	d = {'StringType':'STRING', 'DoubleType':'DOUBLE', 'IntegerType': 'INT', 'DateType':'DATE', 'LongType': 'BIGINT'}

	# Convert to Hive schema
	schemastring = ', '.join([field.name + ' ' + d[str(field.dataType)] for field in df.schema.fields])
	hivetablename='mortgage_all'
	output_path='path'
	filename='filename'
	# Create Hive table
	ddl = """CREATE EXTERNAL TABLE IF NOT EXISTS %s(%s) STORED AS ORC LOCATION '%s'""" % (hivetablename, schemastring, output_path + filename)


	val csv:String = """
	\|rowKeyLineage \|eventTimestamp\|dataEventId\|dataDictId \|sourceKey \|transport\|eventType \|recordId\|processingTime\|application\|entity \|product\|lifecycle \|lineDate \|dayOfMonth\|dayOfYear\|
	\|1488327264000:2001200:MUREX:DK_LEI_6000:BOND:CONTRACT \|1488327264000 \|2001200 \|MUREXCONTRACT_DELTA \|20170301MUREXCONTRACT.csv \|FTP \|DISTRIBUTE\| \|1512074938523 \|MUREX \|DK_LEI_6000\|BOND \|CONTRACT \|2017-03-01 01:14:24\|1 \|60 \|
	\|1488327264000:2001200:MUREX:DK_LEI_6100:REPO:CONTRACT \|1488327264000 \|2001200 \|MUREXCONTRACT_DELTA \|20170301MUREXCONTRACT.csv \|FTP \|DISTRIBUTE\| \|1512074938523 \|MUREX \|DK_LEI_6100\|REPO \|CONTRACT \|2017-03-01 01:14:24\|1 \|60 \|
	\|1488327264000:2001200:MUREX:DK_LEI_6200:BOND:CONTRACT \|1488327264000 \|2001200 \|MURE

Örjan Lundberg oluies