brito · May 23, 2020 02:16 · brito · May 23, 2020
diff --git a/aftoanastochasmos.sql b/aftoanastochasmos.sql
 /*       __ _                              _             _                           __      
   __ _ / _| |_ ___   __ _ _ __   __ _ ___| |_ ___   ___| |__   __ _ ___ _ __ ___   /_/  ___ 
  / _` | |_| __/ _ \ / _` | '_ \ / _` / __| __/ _ \ / __| '_ \ / _` / __| '_ ` _ \ / _ \/ __|
 | (_| |  _| || (_) | (_| | | | | (_| \__ \ || (_) | (__| | | | (_| \__ \ | | | | | (_) \__ \
  \__,_|_|  \__\___/ \__,_|_| |_|\__,_|___/\__\___/ \___|_| |_|\__,_|___/_| |_| |_|\___/|___/ 
  αυτοαναστοχασμός ergo legit */

 select
  _column, _type,
  _nulls,
  _slider(
    (_nulls::float/max(_nulls) over () * 30)::int,
    (_distinct::float/max(_distinct) over () * 30)::int)
    as "nulls vs distinct",
  _distinct,
  
  _range[1]::text,
  
  _bar(_pct_nulls::int) as "% nulls",
  
  _tally.*,
  _iqt.*,
  _histogram.*,
  
  concat_ws(' ± ',
    _avg_length[1]::text::numeric,
    ceil((_max_length[1]::text::int - _min_length[1]::text::int)/2))
    as _width,
    
  _schema,
  _relname,
  _kind

 from _meta (
    '{app_public}',
    '{data_report_session,data_raw_session,data_report}'),   
  -- tally columns
  _tally(_schema,_relname,_column) as _tally,
  
  -- interquartile
  _iqt(_schema,_relname,_column,_type) as _iqt,
  
  -- histogram buckets
  _histogram(_schema,_relname,_column,_type) as _histogram,
    
  -- count distinct values
  round((xpath('/row/count/text()', query_to_xml(format($$
    select count(*) from (select distinct %3$I from %1$I.%2$I) d
  $$,_schema,_relname,_column), false, true, '')))[1]::text::int) as _distinct,
  
  -- count nulls
  round((xpath('/row/count/text()', query_to_xml(format($$
    select count(*) from %I.%I where %I is null
  $$,_schema,_relname,_column), false, true, '')))[1]::text::int) as _nulls,
  
  -- percent null
  round((xpath('/row/p/text()', query_to_xml(format($$
    select round(100*count(*) filter (where %3$I is null) / count(*)) as p from %1$I.%2$I
  $$,_schema,_relname,_column), false, true, '')))[1]::text::int) as _pct_nulls,
  
  -- count zeroes
  -- FIXME not very useful either zeroes or the scale at which "near zero" is calculated
 --   round((xpath('/row/count/text()', query_to_xml(format($$
 --     select count(*) from %1$I.%2$I
 --     where '%4$I' ~ 'int|float' and %3$I::text ~ '^(-|00:00:)0'
 --   $$,_schema,_relname,_column,_type), false, true, '')))[1]::text::int) as _near_zeroes,
  
  -- range
  xpath('/row/r/text()', query_to_xml(format(case
    -- transforming a uuid into a number just to get a range is a bit unnecessary
 --     when typname = '_uuid' then $$
 --       select numrange(
 --         min(('x'||translate(left(%2$I::text,18),'-',''))::bit(64)::bigint),
 --         max(('x'||translate(left(%2$I::text,18),'-',''))::bit(64)::bigint),'[]')
 --     as r from %3$I.%4$I $$
  
    when _type ~ 'int\d|float|numeric' then $$
      select numrange(min(%3$I::numeric),max(%3$I::numeric),'[]')
    as r from %1$I.%2$I $$
      
    when _type = '_timestamptz' then $$
      select tstzrange(min(%3$I),max(%3$I))
    as r from %1$I.%2$I $$
      
    when _type = '_tstzrange' then $$
      select tstzrange(min(lower(%3$I)),max(upper(%3$I)))
    as r from %1$I.%2$I $$
      
    when _type = '_interval' then $$
      select concat_ws(',',min(%3$I),max(%3$I))
    as r from %1$I.%2$I $$
      
    when _type = '_bool' then $$
    select to_json(array[
      count(*) filter (where %3$I = true),
      count(*) filter (where %3$I = false)])
    as r from %1$I.%2$I $$
      
    when _type = '_text' and '%2$I' ~ '^\d\.?\d*' then $$
      select numrange(min(%3$I::numeric),max(%3$I::numeric))
    as r from %1$I.%2$I $$
    
    when _type = '_text' and not '%2$I' ~ '^\d\.?\d*' then $$
      select numrange(min(length(%3$I)),max(length(%3$I)))
    as r from %1$I.%2$I $$
    
  end,_schema,_relname,_column,_type), false, true, '')) as _range,
  
  -- min length as text
  xpath('/row/min/text()', query_to_xml(format($$
    select min(length(%3$I::text)) from %1$I.%2$I
  $$,_schema,_relname,_column), false, true, '')) as _min_length,
  
  -- avg length as text
  xpath('/row/round/text()', query_to_xml(format($$
    select round(avg(length(%3$I::text))) from %1$I.%2$I
  $$,_schema,_relname,_column), false, true, '')) as _avg_length,
  
  -- max length as text
  xpath('/row/max/text()', query_to_xml(format($$
    select max(length(%3$I::text)) from %1$I.%2$I
  $$,_schema,_relname,_column), false, true, '')) as _max_length
 ;
	/* __ _ _ _ __
	__ _ / _\| \|_ ___ __ _ _ __ __ _ ___\| \|_ ___ ___\| \|__ __ _ ___ _ __ ___ /_/ ___
	/ _` \| \|_\| __/ _ \ / _` \| '_ \ / _` / __\| __/ _ \ / __\| '_ \ / _` / __\| '_ ` _ \ / _ \/ __\|
	\| (_\| \| _\| \|\| (_) \| (_\| \| \| \| \| (_\| \__ \ \|\| (_) \| (__\| \| \| \| (_\| \__ \ \| \| \| \| \| (_) \__ \
	\__,_\|_\| \__\___/ \__,_\|_\| \|_\|\__,_\|___/\__\___/ \___\|_\| \|_\|\__,_\|___/_\| \|_\| \|_\|\___/\|___/
	αυτοαναστοχασμός ergo legit */

	select
	_column, _type,
	_nulls,
	_slider(
	(_nulls::float/max(_nulls) over () * 30)::int,
	(_distinct::float/max(_distinct) over () * 30)::int)
	as "nulls vs distinct",
	_distinct,

	_range[1]::text,

	_bar(_pct_nulls::int) as "% nulls",

	_tally.*,
	_iqt.*,
	_histogram.*,

	concat_ws(' ± ',
	_avg_length[1]::text::numeric,
	ceil((_max_length[1]::text::int - _min_length[1]::text::int)/2))
	as _width,

	_schema,
	_relname,
	_kind

	from _meta (
	'{app_public}',
	'{data_report_session,data_raw_session,data_report}'),
	-- tally columns
	_tally(_schema,_relname,_column) as _tally,

	-- interquartile
	_iqt(_schema,_relname,_column,_type) as _iqt,

	-- histogram buckets
	_histogram(_schema,_relname,_column,_type) as _histogram,

	-- count distinct values
	round((xpath('/row/count/text()', query_to_xml(format($$
	select count(*) from (select distinct %3$I from %1$I.%2$I) d
	$$,_schema,_relname,_column), false, true, '')))[1]::text::int) as _distinct,

	-- count nulls
	round((xpath('/row/count/text()', query_to_xml(format($$
	select count(*) from %I.%I where %I is null
	$$,_schema,_relname,_column), false, true, '')))[1]::text::int) as _nulls,

	-- percent null
	round((xpath('/row/p/text()', query_to_xml(format($$
	select round(100count() filter (where %3$I is null) / count(*)) as p from %1$I.%2$I
	$$,_schema,_relname,_column), false, true, '')))[1]::text::int) as _pct_nulls,

	-- count zeroes
	-- FIXME not very useful either zeroes or the scale at which "near zero" is calculated
	-- round((xpath('/row/count/text()', query_to_xml(format($$
	-- select count(*) from %1$I.%2$I
	-- where '%4$I' ~ 'int\|float' and %3$I::text ~ '^(-\|00:00:)0'
	-- $$,_schema,_relname,_column,_type), false, true, '')))[1]::text::int) as _near_zeroes,

	-- range
	xpath('/row/r/text()', query_to_xml(format(case
	-- transforming a uuid into a number just to get a range is a bit unnecessary
	-- when typname = '_uuid' then $$
	-- select numrange(
	-- min(('x'\|\|translate(left(%2$I::text,18),'-',''))::bit(64)::bigint),
	-- max(('x'\|\|translate(left(%2$I::text,18),'-',''))::bit(64)::bigint),'[]')
	-- as r from %3$I.%4$I $$

	when _type ~ 'int\d\|float\|numeric' then $$
	select numrange(min(%3$I::numeric),max(%3$I::numeric),'[]')
	as r from %1$I.%2$I $$

	when _type = '_timestamptz' then $$
	select tstzrange(min(%3$I),max(%3$I))
	as r from %1$I.%2$I $$

	when _type = '_tstzrange' then $$
	select tstzrange(min(lower(%3$I)),max(upper(%3$I)))
	as r from %1$I.%2$I $$

	when _type = '_interval' then $$
	select concat_ws(',',min(%3$I),max(%3$I))
	as r from %1$I.%2$I $$

	when _type = '_bool' then $$
	select to_json(array[
	count(*) filter (where %3$I = true),
	count(*) filter (where %3$I = false)])
	as r from %1$I.%2$I $$

	when _type = '_text' and '%2$I' ~ '^\d\.?\d*' then $$
	select numrange(min(%3$I::numeric),max(%3$I::numeric))
	as r from %1$I.%2$I $$

	when _type = '_text' and not '%2$I' ~ '^\d\.?\d*' then $$
	select numrange(min(length(%3$I)),max(length(%3$I)))
	as r from %1$I.%2$I $$

	end,_schema,_relname,_column,_type), false, true, '')) as _range,

	-- min length as text
	xpath('/row/min/text()', query_to_xml(format($$
	select min(length(%3$I::text)) from %1$I.%2$I
	$$,_schema,_relname,_column), false, true, '')) as _min_length,

	-- avg length as text
	xpath('/row/round/text()', query_to_xml(format($$
	select round(avg(length(%3$I::text))) from %1$I.%2$I
	$$,_schema,_relname,_column), false, true, '')) as _avg_length,

	-- max length as text
	xpath('/row/max/text()', query_to_xml(format($$
	select max(length(%3$I::text)) from %1$I.%2$I
	$$,_schema,_relname,_column), false, true, '')) as _max_length
	;