Skip to content

Instantly share code, notes, and snippets.

CREATE TEMP FUNCTION term_years(term STRING)
RETURNS ARRAY<STRUCT<year INT64, term_frequency INT64>> AS (
(
SELECT years
FROM `gcp-pdp-words-dev.sandbox.eng_bert_preprocessed_year_term_frequencies`
WHERE preprocessed_term = term
)
-- SELECT ARRAY_AGG(STRUCT<year INT64, term_frequency INT64>(z.year, z.term_frequency + IFNULL(t.term_frequency, 0)))
-- FROM
-- (
@allenday
allenday / worldpop_density.sql
Created September 17, 2021 09:16
worldpop population density per country 2020
SELECT
country_name,
COUNT(1) AS hectares,
SUM(population) AS total_pop,
SUM(population)/COUNT(1) AS pop_per_hectare
FROM
`bigquery-public-data.geo_worldpop.WorldPop_GP_100m` AS pop
,`bigquery-public-data.country_codes.country_codes` AS codes
WHERE TRUE
AND pop.country_code = codes.alpha_3_code
@allenday
allenday / profanity.sh
Created July 13, 2021 16:05
run profanity for x sec to generate a vanity key, then exit
( cmdpid=$BASHPID; (sleep 3600; kill $cmdpid) & exec ./profanity.x64 --matching 888888XXXXXXXXXXXXXXXXXXXXXXXXXXXX888888 | grep --line-buffered Score >> keys.txt )
@allenday
allenday / geohash-precipitation-noaa_gsod.sql
Created July 3, 2021 07:44
get total precipitation within a date range for a geohash polygon
#standardSQL
CREATE TEMPORARY FUNCTION geohashDecode(geohash STRING)
RETURNS STRUCT<bbox ARRAY<FLOAT64>, lat FLOAT64, lng FLOAT64>
LANGUAGE js AS """
if (!geohash) return null;
var base32 = '0123456789bcdefghjkmnpqrstuvwxyz';
geohash = geohash.toLowerCase();
var evenBit = true;
var latMin = -90, latMax = 90;
var lonMin = -180, lonMax = 180;
@allenday
allenday / bq-geohash-udf.sql
Created July 2, 2021 02:59 — forked from vladaman/bq-geohash-udf.sql
Google BigQuery standardSQL UDF function to decode geohash string into bounding box and coordinates
#standardSQL
CREATE TEMPORARY FUNCTION geohashDecode(geohash STRING)
RETURNS STRUCT<bbox ARRAY<FLOAT64>, lat FLOAT64, lng FLOAT64>
LANGUAGE js AS """
if (!geohash) return null;
var base32 = '0123456789bcdefghjkmnpqrstuvwxyz';
geohash = geohash.toLowerCase();
var evenBit = true;
var latMin = -90, latMax = 90;
@allenday
allenday / download-worldpop.sh
Created March 27, 2021 03:57
earth-engine-to-bigquery
#!/bin/sh
CREDENTIALS="credentials.json"
COLLECTION="projects/earthengine-public/assets/WorldPop/GP/100m/pop"
YEAR=2020
export "GOOGLE_APPLICATION_CREDENTIALS=$CREDENTIALS"
IMAGES=$(ogrinfo -ro -al "EEDA:" -oo "COLLECTION=$COLLECTION" -where "year=$YEAR" | grep 'gdal_dataset (String) = ' | cut -d '=' -f2 | tr -d ' ')
for IMAGE in $IMAGES
do
#!/usr/bin/env python3
# See: https://medium.com/@lakshmanok/how-to-load-geojson-files-into-bigquery-gis-9dc009802fb4
# Processes output from: https://code.earthengine.google.com/49115c966c4c83a3ca7ac52bd5aba4f6
import json
from geolib import geohash
# check the accuracy
# PostGIS check from https://postgis.net/docs/ST_GeoHash.html
WITH nearby_health AS (
SELECT pop.geo_id, pop.population, CAST(ST_DISTANCE(ST_CENTROID(pop.geog),layer.geometry)/1000 AS INT64) AS distance
FROM
`bigquery-public-data.worldpop.population_grid_1km` AS pop,
`bigquery-public-data.geo_openstreetmap.planet_layers` AS layer
WHERE TRUE
AND pop.country_name = 'Singapore'
AND layer_name IN ('hospital','doctors')
AND ST_INTERSECTSBOX(layer.geometry, longitude_centroid-5, latitude_centroid-5, longitude_centroid+5, latitude_centroid+5)
AND longitude_centroid > -175 AND longitude_centroid < 175 AND latitude_centroid > -85 AND latitude_centroid < 85

overall structure

  • resources (multiple audio files, video files, and other resources referenced in the sequence)
  • project
    • sequence (a group of multiple assets that compose a single timeline)
      • spine' (a timeline. this is the OUTER spine in which everything else is contained)
        • marker
          • complete="0" is a TODO marker
        • video (ignore these)
        • asset-clip' (a segment of a video or audio file)
  • spine (this is a floating timeline that contains one or more of...)
WITH all_edits AS (
SELECT
CAST(osm_timestamp AS DATE) AS d, 0 AS code, COUNT(*) AS count
FROM `gcp-pdp-osm-dev.osm_to_bq_history.history_layers`
GROUP BY d
),
-- HEALTH
health_creates AS (
SELECT