Last active
October 3, 2025 11:09
-
-
Save geotheory/7449a5b08392200300e58fab10732706 to your computer and use it in GitHub Desktop.
postgresql-datetime-histogram.R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(tidyverse) | |
#> Loading required package: tidyverse | |
require(DBI) | |
#> Loading required package: DBI | |
require(RPostgres) | |
#> Loading required package: RPostgres | |
con = dbConnect(RPostgres::Postgres(), dbname = "xxx") | |
# toy dataset with numeric and datetime fields | |
d = diamonds %>% mutate(timestamp = Sys.time() + scales::rescale(diamonds$carat, to=c(0,365*24*3600))) %>% select(timestamp, price) | |
dbWriteTable(con, "diamonds", d, row.names=FALSE, append=FALSE, overwrite=TRUE) | |
# Example with price | |
price_hist = dbGetQuery(con, " | |
with price_stats as ( | |
select min(price) as min, | |
max(price) as max | |
from diamonds | |
), | |
histogram as ( | |
select width_bucket(price, min, max, 10) as bucket, | |
int4range(min(price), max(price), '[]') as range, | |
count(*) as freq | |
from diamonds, price_stats | |
group by bucket | |
order by bucket | |
) | |
select bucket, range, freq | |
from histogram;") %>% as_tibble() %>% | |
mutate(bin_min = str_extract(range, '[0-9]+(?=,)') %>% as.numeric(), | |
bin_max = str_extract(range, '(?<=,)[0-9]+') %>% as.numeric(), | |
n = as.integer(freq)) %>% | |
select(bucket, bin_min, bin_max, n) | |
print(price_hist) | |
#> # A tibble: 11 x 4 | |
#> bucket bin_min bin_max n | |
#> <int> <dbl> <dbl> <int> | |
#> 1 1 326 2176 25335 | |
#> 2 2 2176 4026 9328 | |
#> 3 3 4026 5873 7393 | |
#> 4 4 5876 7725 3878 | |
#> 5 5 7725 9575 2364 | |
#> 6 6 9576 11423 1745 | |
#> 7 7 11427 13268 1306 | |
#> 8 8 13275 15123 1002 | |
#> 9 9 15124 16971 863 | |
#> 10 10 16975 18819 725 | |
#> 11 11 18823 18824 1 | |
# datetime fail | |
dbGetQuery(con, " | |
with timestamp_stats as ( | |
select min(timestamp) as min, | |
max(timestamp) as max | |
from diamonds | |
), | |
histogram as ( | |
select width_bucket(timestamp, min, max, 10) as bucket, | |
int4range(min(timestamp), max(timestamp), '[]') as range, | |
count(*) as freq | |
from diamonds, timestamp_stats | |
group by bucket | |
order by bucket | |
) | |
select bucket, range, freq | |
from histogram;") | |
#> Error: Failed to prepare query: ERROR: function width_bucket(timestamp with time zone, timestamp with time zone, timestamp with time zone, integer) does not exist | |
#> LINE 8: select width_bucket(timestamp, min, max, 10) as bucket, | |
#> ^ | |
#> HINT: No function matches the given name and argument types. You might need to add explicit type casts. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment