Skip to content

Instantly share code, notes, and snippets.

View alexanderdean's full-sized avatar

Alexander Dean alexanderdean

View GitHub Profile
{
"schema": "iglu:com.channel2.vod/video_played/jsonschema/1-0-0",
"data": {
"length": 213,
"id": "hY7gQrO"
}
}
{
"$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"description": "Schema for a video_played event",
"self": {
"vendor": "com.channel2.vod",
"name": "video_played",
"format": "jsonschema",
"version": "1-0-0"
},
"type": "object",
input_lines = LOAD '$INPUT' AS (line:chararray);
-- Extract words from each line and put them into a pig bag
-- datatype, then flatten the bag to get one word on each row
words = FOREACH input_lines GENERATE FLATTEN(TOKENIZE(line)) AS word;
-- filter out any words that are just white spaces
filtered_words = FILTER words BY word MATCHES '\\w+';
-- create a group for each word
// Request body expected to validate against this JSON Schema
private val PayloadDataSchema =
SchemaCriterion("com.snowplowanalytics.snowplow", "payload_data", "jsonschema", 1, 0)
// Check JSON is a payload_data version 1-0-*, and verify it against the schema
val body: ValidatedNel[JsonNode] = bodyNode.verifySchemaAndValidate(schemaCriterion)
/*
* Copyright (c) 2015 Tim Harper.
*/
import sbt._
import Keys._
import xerial.sbt.Pack._
object SamzaTasks {