Skip to content

Instantly share code, notes, and snippets.

@geotheory
Last active October 3, 2025 11:09
Show Gist options
  • Save geotheory/7449a5b08392200300e58fab10732706 to your computer and use it in GitHub Desktop.
Save geotheory/7449a5b08392200300e58fab10732706 to your computer and use it in GitHub Desktop.
postgresql-datetime-histogram.R
require(tidyverse)
#> Loading required package: tidyverse
require(DBI)
#> Loading required package: DBI
require(RPostgres)
#> Loading required package: RPostgres
con = dbConnect(RPostgres::Postgres(), dbname = "xxx")
# toy dataset with numeric and datetime fields
d = diamonds %>% mutate(timestamp = Sys.time() + scales::rescale(diamonds$carat, to=c(0,365*24*3600))) %>% select(timestamp, price)
dbWriteTable(con, "diamonds", d, row.names=FALSE, append=FALSE, overwrite=TRUE)
# Example with price
price_hist = dbGetQuery(con, "
with price_stats as (
select min(price) as min,
max(price) as max
from diamonds
),
histogram as (
select width_bucket(price, min, max, 10) as bucket,
int4range(min(price), max(price), '[]') as range,
count(*) as freq
from diamonds, price_stats
group by bucket
order by bucket
)
select bucket, range, freq
from histogram;") %>% as_tibble() %>%
mutate(bin_min = str_extract(range, '[0-9]+(?=,)') %>% as.numeric(),
bin_max = str_extract(range, '(?<=,)[0-9]+') %>% as.numeric(),
n = as.integer(freq)) %>%
select(bucket, bin_min, bin_max, n)
print(price_hist)
#> # A tibble: 11 x 4
#> bucket bin_min bin_max n
#> <int> <dbl> <dbl> <int>
#> 1 1 326 2176 25335
#> 2 2 2176 4026 9328
#> 3 3 4026 5873 7393
#> 4 4 5876 7725 3878
#> 5 5 7725 9575 2364
#> 6 6 9576 11423 1745
#> 7 7 11427 13268 1306
#> 8 8 13275 15123 1002
#> 9 9 15124 16971 863
#> 10 10 16975 18819 725
#> 11 11 18823 18824 1
# datetime fail
dbGetQuery(con, "
with timestamp_stats as (
select min(timestamp) as min,
max(timestamp) as max
from diamonds
),
histogram as (
select width_bucket(timestamp, min, max, 10) as bucket,
int4range(min(timestamp), max(timestamp), '[]') as range,
count(*) as freq
from diamonds, timestamp_stats
group by bucket
order by bucket
)
select bucket, range, freq
from histogram;")
#> Error: Failed to prepare query: ERROR: function width_bucket(timestamp with time zone, timestamp with time zone, timestamp with time zone, integer) does not exist
#> LINE 8: select width_bucket(timestamp, min, max, 10) as bucket,
#> ^
#> HINT: No function matches the given name and argument types. You might need to add explicit type casts.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment