This is a draft pending the release of Release 81
When a raw event fails Scala Hadoop Enrich validation, it is written out to a bad rows bucket in a format which looks something like this:
{
<script> | |
// If this tag fires more than once (e.g. page view followed by ecommerce action), | |
// we don't want to repeat the trackPageView here | |
if (!window.SNOWPLOW_NAME_HERE) { | |
;(function(p,l,o,w,i,n,g){if(!p[i]){p.GlobalSnowplowNamespace=p.GlobalSnowplowNamespace||[]; | |
p.GlobalSnowplowNamespace.push(i);p[i]=function(){(p[i].q=p[i].q||[]).push(arguments) | |
};p[i].q=p[i].q||[];n=l.createElement(o);g=l.getElementsByTagName(o)[0];n.async=1; | |
n.src=w;g.parentNode.insertBefore(n,g)}}(window,document,"script","//d1fc8wv8zag5ca.cloudfront.net/2.6.1/sp.js","SNOWPLOW_NAME_HERE")); | |
#!/usr/bin/python | |
""" | |
Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. | |
This program is licensed to you under the Apache License Version 2.0, | |
and you may not use this file except in compliance with the Apache License | |
Version 2.0. You may obtain a copy of the Apache License Version 2.0 at | |
http://www.apache.org/licenses/LICENSE-2.0. | |
Unless required by applicable law or agreed to in writing, | |
software distributed under the Apache License Version 2.0 is distributed on |
This is a draft pending the release of Release 81
When a raw event fails Scala Hadoop Enrich validation, it is written out to a bad rows bucket in a format which looks something like this:
{
for srcf in $(find src/main -name '*.scala'); do for lineno in $(grep -n '^import' $srcf | sed 's/:/ /g' | grep -v '{' | awk '{print $1}'); do echo $srcf $lineno; sed -i "${lineno}d" $srcf; sbt compile;if [ $? -eq 0 ]; then echo $srcf $lineno | tee -a output; fi; git checkout .; done; done |
var page = require('webpage').create(); | |
var system = require('system'); | |
if (system.args.length === 1) { | |
console.log('Usage: you must provide some URL to check'); | |
phantom.exit(); | |
} | |
page.open(system.args[1], function(status) { | |
var usesSnowplow; | |
var information; | |
if(status === "success") { |
#!/usr/bin/env python | |
import fileinput | |
import sys | |
import re | |
domain = None | |
ignored_lines = 0 | |
for line in fileinput.input(): | |
if line == "\n": | |
break |
#!/usr/bin/env python3 | |
# | |
# link-validator.py | |
# Brandon Amos <http://bamos.io> | |
import argparse | |
import re | |
from functools import lru_cache | |
from urllib.request import urlopen | |
from urllib.parse import urljoin,urldefrag |
#!/bin/bash | |
set -e | |
path=$1; | |
schema=$2; | |
# String manipulation to extract "s3://first_part_of_path/" | |
path_prefix=$(echo $1 | sed 's_\(^s3://[^/]*/\).*$_\1_g'); |
#!/usr/bin/python | |
import base64 | |
import sys | |
input = sys.argv[1] | |
drop = [0, 2, 3] | |
def get_combinations(s): | |
return [(base64.urlsafe_b64encode('a'*i + s)[drop[i]:]).replace('=', '') for i in range(3)] |
#!/bin/sh | |
aws kinesis describe-stream --stream-name $1 | | |
jq '.StreamDescription.Shards | map(select(.SequenceNumberRange.EndingSequenceNumber == null))' |