select
by,
sum(score) as sum_score
from
hn
where
time >= now() - interval '7 days'
group by
by
order by
sum_score desc
limit
15
+---------------+-----------+
| by | sum_score |
+---------------+-----------+
| Pulcinella | 245 |
| meetpateltech | 179 |
| jmsflknr | 171 |
| surprisetalk | 164 |
| molli | 163 |
| vouwfietsman | 145 |
| james_pm | 133 |
| tcarambat1010 | 130 |
| darthShadow | 124 |
| belter | 123 |
| luu | 122 |
| Brajeshwar | 106 |
| pseudolus | 100 |
| cmdalsanto | 91 |
| tosh | 89 |
+---------------+-----------+
SELECT
by,
SUM(score) AS sum_score
FROM
hn
WHERE
time >= strftime('%s', 'now') - 7 * 24 * 60 * 60
GROUP BY
by
ORDER BY
sum_score DESC
LIMIT
15;
by sum_score
------------ ---------
thunderbong 10107
todsacerdoti 9645
rntn 9608
Brajeshwar 8211
belter 7979
tosh 7328
bookofjoe 5785
PaulHoule 5539
geox 4441
rbanffy 4198
pseudolus 3923
speckx 3602
Bluestein 3394
fanf2 3276
doener 3028
sqlite> SELECT
by,
SUM(score) AS sum_score
FROM
hn
WHERE
time >= datetime('now', '-7 days')
GROUP BY
by
ORDER BY
sum_score DESC
LIMIT
15
...> ;
by sum_score
------------- ---------
Pulcinella 245
meetpateltech 179
jmsflknr 171
surprisetalk 164
molli 163
vouwfietsman 145
james_pm 133
tcarambat1010 130
darthShadow 124
belter 123
luu 122
Brajeshwar 106
pseudolus 100
cmdalsanto 91
tosh 89
chatgpt / duckdb ("DuckDB supports NOW() and INTERVAL syntax similarly to PostgreSQL, so no major changes are necessary.")
SELECT
by,
SUM(score) AS sum_score
FROM
hn
WHERE
time >= NOW() - INTERVAL '7 days'
GROUP BY
by
ORDER BY
sum_score DESC
LIMIT
15;
┌───────────────┬───────────┐
│ by │ sum_score │
│ varchar │ int128 │
├───────────────┼───────────┤
│ Pulcinella │ 245 │
│ meetpateltech │ 179 │
│ jmsflknr │ 171 │
│ surprisetalk │ 164 │
│ molli │ 163 │
│ vouwfietsman │ 145 │
│ james_pm │ 133 │
│ tcarambat1010 │ 130 │
│ darthShadow │ 124 │
│ belter │ 123 │
│ luu │ 122 │
│ Brajeshwar │ 106 │
│ pseudolus │ 100 │
│ cmdalsanto │ 91 │
│ tosh │ 89 │
├───────────────┴───────────┤
│ 15 rows 2 columns │
└───────────────────────────┘
SELECT
by,
SUM(score) AS sum_score
FROM
hn
WHERE
time >= now() - INTERVAL '7 days'
GROUP BY
by
ORDER BY
sum_score DESC
LIMIT
15
┌───────────────┬───────────┐
│ by │ sum_score │
│ varchar │ int128 │
├───────────────┼───────────┤
│ Pulcinella │ 245 │
│ meetpateltech │ 179 │
│ jmsflknr │ 171 │
│ surprisetalk │ 164 │
│ molli │ 163 │
│ vouwfietsman │ 145 │
│ james_pm │ 133 │
│ tcarambat1010 │ 130 │
│ darthShadow │ 124 │
│ belter │ 123 │
│ luu │ 122 │
│ Brajeshwar │ 106 │
│ pseudolus │ 100 │
│ cmdalsanto │ 91 │
│ tosh │ 89 │
├───────────────┴───────────┤
│ 15 rows 2 columns │
└───────────────────────────┘
with domains as (
select
url,
substring(url from 'http[s]*://([^/$]+)') as domain
from
hn
where
url is not null
),
avg_and_max as (
select
substring(url from 'http[s]*://([^/$]+)') as domain,
max(score::int) as max_score
from
hn
group by
substring(url from 'http[s]*://([^/$]+)')
),
counted as (
select
domain,
count(*)
from
domains
group by
domain
order by
count desc
)
select
a.domain,
c.count,
a.max_score
from
avg_and_max a
join
counted c
using
(domain)
where
c.count > 5
order by
max_score desc
limit 5
+------------------+-------+-----------+
| domain | count | max_score |
+------------------+-------+-----------+
| openai.com | 53 | 1130 |
| www.ftc.gov | 17 | 954 |
| www.space.com | 87 | 830 |
| www.apple.com | 43 | 821 |
| en.wikipedia.org | 1070 | 801 |
+------------------+-------+-----------+
WITH domains AS (
SELECT
url,
substr(url, instr(url, '//') + 2, instr(substr(url, instr(url, '//') + 2), '/') - 1) AS domain
FROM
hn
WHERE
url IS NOT NULL
),
avg_and_max AS (
SELECT
substr(url, instr(url, '//') + 2, instr(substr(url, instr(url, '//') + 2), '/') - 1) AS domain,
MAX(score) AS max_score
FROM
hn
GROUP BY
substr(url, instr(url, '//') + 2, instr(substr(url, instr(url, '//') + 2), '/') - 1)
),
counted AS (
SELECT
domain,
COUNT(*) AS count
FROM
domains
GROUP BY
domain
ORDER BY
count DESC
)
SELECT
a.domain,
c.count,
a.max_score
FROM
avg_and_max a
JOIN
counted c
USING
(domain)
WHERE
c.count > 5
ORDER BY
max_score DESC
LIMIT 5;
domain count max_score
------------- ----- ---------
3786
openai.com 53 1130
/ 1624 1071
www.ftc.gov 17 954
www.space.com 87 830
WITH domains AS ( SELECT url, SUBSTR(url, INSTR(url, '://') + 3) AS full_domain, CASE WHEN INSTR(SUBSTR(url, INSTR(url, '://') + 3), '/') > 0 THEN SUBSTR(SUBSTR(url, INSTR(url, '://') + 3), 1, INSTR(SUBSTR(url, INSTR(url, '://') + 3), '/') - 1) ELSE SUBSTR(url, INSTR(url, '://') + 3) END AS domain FROM hn WHERE url IS NOT NULL AND url LIKE 'http%' ), avg_and_max AS ( SELECT CASE WHEN INSTR(SUBSTR(url, INSTR(url, '://') + 3), '/') > 0 THEN SUBSTR(SUBSTR(url, INSTR(url, '://') + 3), 1, INSTR(SUBSTR(url, INSTR(url, '://') + 3), '/') - 1) ELSE SUBSTR(url, INSTR(url, '://') + 3) END AS domain, MAX(CAST(score AS INTEGER)) AS max_score FROM hn WHERE url IS NOT NULL AND url LIKE 'http%' GROUP BY domain ), counted AS ( SELECT domain, COUNT(*) AS count FROM domains GROUP BY domain ORDER BY count DESC ) SELECT a.domain, c.count, a.max_score FROM avg_and_max a JOIN counted c USING (domain) WHERE c.count > 5 ORDER BY max_score DESC LIMIT 5
domain count max_score
openai.com 53 1130 www.ftc.gov 17 954 www.space.com 87 830 www.apple.com 43 821 en.wikipedia.org 1070 801
"To translate this query into DuckDB, the main adjustment you need is to work with DuckDB's support for regular expressions and type casting. DuckDB supports regular expressions, so you can retain the substring() with regex matching, and ::int typecasting works similarly."
WITH domains AS (
SELECT
url,
regexp_extract(url, 'http[s]*://([^/$]+)', 1) AS domain
FROM
hn
WHERE
url IS NOT NULL
),
avg_and_max AS (
SELECT
regexp_extract(url, 'http[s]*://([^/$]+)', 1) AS domain,
MAX(CAST(score AS INTEGER)) AS max_score
FROM
hn
GROUP BY
regexp_extract(url, 'http[s]*://([^/$]+)', 1)
),
counted AS (
SELECT
domain,
COUNT(*) AS count
FROM
domains
GROUP BY
domain
ORDER BY
count DESC
)
SELECT
a.domain,
c.count,
a.max_score
FROM
avg_and_max a
JOIN
counted c
USING
(domain)
WHERE
c.count > 5
ORDER BY
max_score DESC
LIMIT 5;
┌──────────────────┬───────┬───────────┐
│ domain │ count │ max_score │
│ varchar │ int64 │ int32 │
├──────────────────┼───────┼───────────┤
│ openai.com │ 53 │ 1130 │
│ www.ftc.gov │ 17 │ 954 │
│ www.space.com │ 87 │ 830 │
│ www.apple.com │ 43 │ 821 │
│ en.wikipedia.org │ 1070 │ 801 │
└──────────────────┴───────┴───────────┘
"This query will work as-is in DuckDB."
It didn't.
Binder Error: Referenced column "count" not found in FROM clause!
Candidate bindings: "a.domain"
LINE 28: count desc
"The issue is in the counted CTE where we're using count as both an aggregate function and a column alias, which is causing the conflict. Let's fix this by giving the count a specific alias."