janbenetka’s gists

janbenetka / geohash_to_polygon.py

Created March 22, 2021 11:47

[Geohash to polygon] #python #geo

	import sys
	!{sys.executable} -m pip install polygon-geohasher;

	from polygon_geohasher.polygon_geohasher import *
	from shapely import geometry


	str(geohash_to_polygon('u2ce02j'))

janbenetka / percentiles.sql

Created February 12, 2021 02:31

[Percentiles in BigQuery] #bigquery

	SELECT
	percentiles[offset(10)] as p10,
	percentiles[offset(25)] as p25,
	percentiles[offset(50)] as p50,
	percentiles[offset(75)] as p75,
	percentiles[offset(90)] as p90,
	FROM (
	SELECT APPROX_QUANTILES(home_duration, 100) as percentiles
	FROM `uc-prox-core-dev.21_day_retention.hw_test6`
	);

janbenetka / median.sql

Created February 12, 2021 02:31

[Median in BigQuery] #bigquery

	SELECT
	APPROX_QUANTILES(home_duration, 100)[OFFSET(50)] as approx_median
	FROM `uc-prox-core-dev.21_day_retention.hw_test6`

janbenetka / hist_bq.sql

Last active November 4, 2022 12:19

[Histogram in BigQuery] #bigquery

	DECLARE HIST_STEPS INT64 DEFAULT 20;

	WITH data AS (
	SELECT green_fraction as value
	FROM `uc-x-data.14_day_retention.green_counties`
	)
	, stats AS (
	SELECT min+stepi min, min+step(i+1)max
	FROM (
	SELECT max-min diff, min, max, (max-min)/HIST_STEPS step, GENERATE_ARRAY(0, HIST_STEPS, 1) i

janbenetka / employee_count.sparql

Created January 31, 2021 17:23

[SPARQL for employee count on DBPedia] #sparql #dbpedia

	PREFIX dbpedia2: <http://dbpedia.org/property/>
	PREFIX dbo: <http://dbpedia.org/ontology/>
	PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>

	SELECT *
	WHERE {
	?s1 dbo:numberOfEmployees ?employee_count .
	?s1 geo:long ?long .
	?s1 geo:lat ?lat .
	OPTIONAL {?s1 dbp:locationCountry ?country .}

janbenetka / example_tracts.txt

Last active December 7, 2020 09:09

[Example Tracts] #geo

	36061000202 -> New York, lower east side
	01063060200 -> Census Tract 602, Greene County, Alabama
	53033008100 -> Seattle, Census Tract 81, King County, Washington
	53033032203 -> Seattle, Census Tract 322.03, King County, Washington
	08035014202 -> Colorado, Census Tract 142.02, Douglas County, Colorado
	08031002000 -> Denver, Colorado, Census Tract 20, Denver County, Colorado

janbenetka / group_concat.sql

Created October 28, 2020 21:49

[BigQuery: Concat all text of grouped by rows] #bigquery #groupby

	SELECT url, COUNT(*) AS popularity, GROUP_CONCAT(tweet)
	FROM Table GROUP BY url ORDER BY popularity

janbenetka / parallel_plot.py

Created October 15, 2020 16:16

[Parallel plot for ranked data] #plotly #ranking #pandas

	import plotly.express as px
	df = px.data.iris()
	fig = px.parallel_coordinates(df, color="species_id",
	dimensions=['sepal_width', 'sepal_length', 'petal_width',
	'petal_length'],
	color_continuous_scale=px.colors.diverging.Tealrose,
	color_continuous_midpoint=2)
	fig.show()

	#-----------------------------------

janbenetka / hist_grouping.sql

Created October 14, 2020 00:48

[Historgram grouping in BigQuery] #bigquery #sql

	with count_per_identifier AS (
	SELECT
	identifier,
	COUNT(DISTINCT home_area_id) home_count
	FROM `uc-prox-core-dev.30_day_retention.home_identifier_weekly_8w_20d_100h`
	GROUP BY identifier
	)
	SELECT home_count, SUM(count) count FROM (SELECT 1 as count, home_count FROM count_per_identifier)
	GROUP BY home_count
	ORDER BY home_count

janbenetka / group_ratio.sql

Created October 7, 2020 05:53

[Ratio per group in BigQuery] #bigquery

	with per_os AS (
	SELECT
	osType,
	fips,
	count(*) count,
	any_value(geog) geog
	FROM `uc-puzzle-data.clusters_dwh.cluster_US` clusters
	, `uc-atlas.maps_us.census_tracts` tract
	WHERE
	localEventDate BETWEEN "2020-01-15" AND "2020-01-30"

Jan Benetka janbenetka