JSON
CREATE TABLE raw_crashes (
uuid uuid NOT NULL,
raw_crash json NOT NULL,
date_processed timestamp with time zone
);
-- (9.2 compatible code)
CREATE TEMPORARY TABLE new_android_devices
AS
SELECT DISTINCT
json_object_field_text(raw_crash, 'Android_CPU_ABI') as android_cpu_abi
, json_object_field_text(raw_crash, 'Android_Manufacturer') as android_manufacturer
, json_object_field_text(raw_crash, 'Android_Model') as android_model
, json_object_field_text(raw_crash, 'Android_Version') as android_version
FROM raw_crashes
WHERE
date_processed >= updateday
AND date_processed <= (updateday + 1)
GROUP BY
android_cpu_abi
, android_manufacturer
, android_model
, android_version
;
Some syntax:
SELECT '{"key": "value"}'::json->'key';
?column?
----------
"value"
(1 row)
HLL
CREATE TABLE daily_uniques (
report_date date UNIQUE,
users hll
);
UPDATE daily_uniques
SET users = hll_add(users, hll_hash_text('123.123.123.123'))
WHERE report_date = current_date;
Creating a report that retrieves a count of same uptime_string
in a selection of crash signatures for some period of time.
Using subqueries instead of CTEs:
SELECT category
, report_count
, round((report_count * 100::numeric)/total_count,3)::TEXT
as percentage
FROM
(
SELECT
category
, report_count
, sum(report_count) OVER () as total_count
FROM
(
SELECT uptime_string AS category
, sum(report_count) AS report_count
FROM signature_summary_uptime
JOIN signatures USING (signature_id)
WHERE
signatures.signature = 'Fake Signature #1'
AND report_date >= '2013-08-05T00:00:00+00:00'::timestamptz
AND report_date < '2013-08-12T00:00:00+00:00'::timestamptz
AND product_name IN ('Firefox') AND version_string IN ('1')
GROUP BY category
) as crashes
) as totals
ORDER BY report_count DESC
;
Using CTES:
WITH crashes AS (
SELECT uptime_string AS category
, sum(report_count) AS report_count
FROM signature_summary_uptime
JOIN signatures USING (signature_id)
WHERE
signatures.signature = 'Fake Signature #1'
AND report_date >= '2013-08-05T00:00:00+00:00'::timestamptz
AND report_date < '2013-08-12T00:00:00+00:00'::timestamptz
AND product_name IN ('Firefox') AND version_string IN ('1')
GROUP BY category
),
totals AS (
SELECT
category
, report_count
, sum(report_count) OVER () as total_count
FROM crashes
)
SELECT category
, report_count
, round((report_count * 100::numeric)/total_count,3)::TEXT
as percentage
FROM totals
ORDER BY report_count DESC
;
- http://www.postgresql.org/docs/9.3/static/tutorial-window.html
- http://www.postgresql.org/docs/9.3/static/functions-window.html
The following functions are also defined in addition to the standard aggregate functions like sum()
and avg()
:
row_number()
rank()
dense_rank()
percent_rank()
cume_dist()
ntile(num_buckets integer) possible
lag(value any [, offset integer [, default any ]])
lead(value any [, offset integer [, default any ]])
first_value(value any)
last_value(value any)
nth_value(value any, nth integer)
Example of a simple moving average:
CREATE TABLE lag_log (
replica_name text NOT NULL,
moment timestamp with time zone NOT NULL,
lag integer NOT NULL,
master text NOT NULL
);
-- Populate the data
insert into lag_log ( SELECT 'test1', generate_series(now(), now()+'1 hour'::interval, '5 minutes'), generate_series(1, 12, 1), 'master');
select moment, lag, avg(lag)
OVER ( order by moment desc rows between 1 following and 12 following )
FROM lag_log;
- Socorro supports 9.2 and higher - We have a reporting database you can get access to
- Amazon RDS Postgres - https://aws.amazon.com/rds/postgresql/
- Heroku Postgres - https://www.heroku.com/postgres
- Engine Yard - https://www.engineyard.com/resources/database-support
Also, Vertica supports WITH
queries and many key Postgres SQL features.