Skip to content

Instantly share code, notes, and snippets.

View marcintustin's full-sized avatar

marcintustin

View GitHub Profile
@marcintustin
marcintustin / explicitStatic.scala
Last active January 1, 2018 19:52
Spark code with explicit schemas
case class Viewership(
date: Date, segment_time: Timestamp, station_id: Integer, start_time: Timestamp, end_time: Timestamp, viewership_property: String)
case class Schedule(
program_id: Integer, date: Date, segment_time: Timestamp, station_id: Integer, start_time: Timestamp, end_time: Timestamp)
case class ViewershipWithSchedule(
date: Date, segment_time: Timestamp, station_id: Integer, start_time: Timestamp, end_time: Timestamp,
viewership_property: String, program_id: Integer)
@marcintustin
marcintustin / perfect_migration.sql
Last active December 30, 2017 15:12
Perfect, downtime-free table migrations in redshift, hive, and other columnar stores
BEGIN TRANSACTION; -- omit if not supported
-- may have many more columns below
CREATE TABLE "production"."events_new" ("referer" character varying(1024) ENCODE zstd,"subscriptions" character varying(1024) ENCODE zstd,"smtpid" character varying(1024) ENCODE zstd) ;
-- inserts segmented by time allow bringing data over incrementally
-- facilitates restarting and recovering from problems
-- also if transaction locking occurs, rows will be locked incrementally (assuming you have row level locking)
INSERT INTO "production"."events_new" SELECT * FROM "production"."events" WHERE _metadata__timestamp::date = '2017-06-29';
-- many more time segments here if need be
-- Also, if you want to minimize the latency in the new table, you could have a final insert based on full timestamp
-- ONLY do this if you have either a sortkey (Redshift) or partition/bucket (Hive, many other database systems) on the date
https://605tv.slack.com/files/U6LNCLM6J/F7G6126DQ/wcbubel-ratelimitchallenge3_solution__1_.zip
implicit val ExtendedGreetingDecoder: Decoder[ExtendedGreeting] =
Decoder.instance { hello: io.circe.HCursor =>
for {
language <- hello.downField("language").as[String]
greeting <- hello.downField("greeting").as[Greeting]
} yield ExtendedGreeting(greeting, language)
}
val service = HttpService {
// pure example
case GET -> Root / "hello" / name =>
Ok(Json.obj("message" -> Json.fromString(s"Hello, ${name}")))
case request @ POST -> Root / "hello" / name =>
for {
// Decode a user request - simplest way to access the request body
greeting <- request.as(jsonOf[Greeting])
"POST -> /hello/bobo" should "say hello with the greeting specified in the body json" in {
// org.http4s.Request.withBody returns a task that can be run to generate the request
val request = org.http4s.Request(Method.POST, uri("/hello/bobo")).withBody(
"""{"greeting": "hola"}""").run
// run runs the service against the request
// Note the return type
val responseTask: scalaz.concurrent.Task[org.http4s.Response] = HelloWorld.service.run(request)
import boto3
import csv
import itertools
import re
def find_multiplier(regex, text):
"""
segregate allocations here
"""
match = regex.match(text)
@marcintustin
marcintustin / constant_batch.py
Created April 5, 2017 18:37
Boto3 S3 constant batch example
import boto3
BATCH_SIZE = 1024
s3 = boto3.resource('s3')
s3_object = s3.Object('test-bucket', 'example_object')
stream = s3_object.get()['Body']
batch = stream.read(BATCH_SIZE)
@marcintustin
marcintustin / print_object_counts.rb
Created March 22, 2017 23:15
This snippet will print the top 40 object counts
# Drop this snippet wherever you want to get a readout of object counts
counts = {}
ObjectSpace.each_object do |o| counts[o.class] = counts[o.class].to_i + 1 end
counts.entries.sort_by do |x| x[1] end.reverse.slice(0, 40).reverse.each do |x| puts "#{x[0]} #{x[1]}" end
"""
Tests taken from http://www.petrounias.org/articles/2014/09/16/pickling-python-collections-with-non-built-in-type-keys-and-cycles/
Also includes additional testing steps to assist with adding compatibility to jsonpickle
"""
from collections import OrderedDict
from unittest import TestCase
from jsonpickle import encode, decode
class World(object):