Skip to content

Instantly share code, notes, and snippets.

{
"$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"description": "Meta-schema for self-describing JSON schema",
"self": {
"vendor": "com.snowplowanalytics.self-desc",
"name": "schema",
"format": "jsonschema",
"version": "2-0-0"
},
"allOf": [
{
"$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"description": "Schema for a knock out event",
"self": {
"vendor": "com.gamescompany.fighting",
"name": "knock_out",
"format": "jsonschema",
"version": "1-0-0"
},
"metadata": {
-- first let's extract the "entries" array
select admission_data->>'entries' from joint_records limit 1;
-- Following query attempts to explode JSON based on the position of objects in the entries array
-- Note that the results show you cannot depend on the position because some entries have more json objects than others
-- Therefore we're gonig to have to find a way to identify the object in the array with the key e.g. `BabyCryTriage` and then return the corresponding value
select
admission_data->'entries'->0->'key' as "is_BabyCryTriage?",
admission_data->'entries'->0->'values'->0->'label' as "BabyCryTriage",
@yalisassoon
yalisassoon / neotree-deduplicate.sql
Last active February 25, 2020 09:04
Neotree db explore
-- First we create a view where we deduplicate the sessions
create materialized view deduplicated_sessions as
(
with latest_sessions as (
select
scriptid,
uid,
max(id) as id -- This takes the most recent upload
-- of the session as the deduplicated record.
-- We could replace with min(id) to take the
[{
"schema": "iglu:com.optimizely/experiment/jsonschema/1-0-0",
"data": {
"id": "3623930456",
"name": "",
"code": "PDP-Desktop-Rearrange colour size",
"manual": true,
"conditional": false,
"variationIds": [
"3646160533",
@yalisassoon
yalisassoon / raw-optimizely-data-object.json
Created January 20, 2016 19:11
Example Optimizely data object (raw)
{
"audiences": {
"3528620749": {
"conditions": [
"and",
[
"or",
[
"not",
[
@yalisassoon
yalisassoon / Upgrade atomic.events
Created April 20, 2015 15:59
SQL upgrade to release 64 from release 62
-- Copyright (c) 2015 Snowplow Analytics Ltd. All rights reserved.
--
-- This program is licensed to you under the Apache License Version 2.0,
-- and you may not use this file except in compliance with the Apache License Version 2.0.
-- You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the Apache License Version 2.0 is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
-- For every event, identify the previous event timestamp and session index
SELECT
dvce_tstamp AS current_tstamp,
LAG(dvce_tstamp, 1)
OVER (PARTITION BY domain_userid ORDER BY dvce_tstamp) AS previous_tstamp,
domain_sessionidx AS current_session_index,
LAG(domain_sessionidx, 1)
OVER (PARTITION BY domain_userid ORDER BY dvce_tstamp) AS previous_session_index
FROM atomic.events;
-- The following query orders events by `domain_userid`, `domain_sessionidx` and `dvce_tstamp`, then checks the difference
-- (in minutes) between corresponding timestamps. If these are greater than 30 minutes for events in the same session
-- (i.e. `domain_userid` is the same on both rows and `domain_sessionidx` is the same on both rows) then there must be a problem
-- in the Javascript that determines when to increment the `domain_sessionidx` value.
SELECT
domain_userid,
LAG(domain_userid) OVER (ORDER BY domain_userid, domain_sessionidx, dvce_tstamp) AS previous_duserid,
domain_sessionidx AS current_dsessionidx,
LAG(domain_sessionidx) OVER (ORDER BY domain_userid, domain_sessionidx, dvce_tstamp) AS previous_dsessionidx,
@yalisassoon
yalisassoon / gist:8283380
Last active January 2, 2016 09:29
Hive table definition to analyze Amazon Cloudfront Access logs
CREATE EXTERNAL TABLE cloudfront_access_logs (
dt STRING,
tm STRING,
x_edge_location STRING,
sc_bytes STRING,
c_ip STRING,
cs_method STRING,
cs_host STRING,
cs_uri_stem STRING,
sc_status STRING,