Skip to content

Instantly share code, notes, and snippets.

@judell
Last active September 11, 2024 21:18
Show Gist options
  • Save judell/30b9c308310485a75eddfd7a6366735c to your computer and use it in GitHub Desktop.
Save judell/30b9c308310485a75eddfd7a6366735c to your computer and use it in GitHub Desktop.
translations

q1

   select
          by,
          sum(score) as sum_score
        from
          hn
        where
          time >= now() - interval '7 days'
        group by
          by
        order by
          sum_score desc
        limit
          15
+---------------+-----------+
| by            | sum_score |
+---------------+-----------+
| Pulcinella    | 245       |
| meetpateltech | 179       |
| jmsflknr      | 171       |
| surprisetalk  | 164       |
| molli         | 163       |
| vouwfietsman  | 145       |
| james_pm      | 133       |
| tcarambat1010 | 130       |
| darthShadow   | 124       |
| belter        | 123       |
| luu           | 122       |
| Brajeshwar    | 106       |
| pseudolus     | 100       |
| cmdalsanto    | 91        |
| tosh          | 89        |
+---------------+-----------+

chatgpt / sqlite

 SELECT
  by,
  SUM(score) AS sum_score
FROM
  hn
WHERE
  time >= strftime('%s', 'now') - 7 * 24 * 60 * 60
GROUP BY
  by
ORDER BY
  sum_score DESC
LIMIT
  15;
by            sum_score
------------  ---------
thunderbong   10107
todsacerdoti  9645
rntn          9608
Brajeshwar    8211
belter        7979
tosh          7328
bookofjoe     5785
PaulHoule     5539
geox          4441
rbanffy       4198
pseudolus     3923
speckx        3602
Bluestein     3394
fanf2         3276
doener        3028

claude / sqlite

sqlite> SELECT
    by,
    SUM(score) AS sum_score
FROM
    hn
WHERE
    time >= datetime('now', '-7 days')
GROUP BY
    by
ORDER BY
    sum_score DESC
LIMIT
    15
   ...> ;

by             sum_score
-------------  ---------
Pulcinella     245
meetpateltech  179
jmsflknr       171
surprisetalk   164
molli          163
vouwfietsman   145
james_pm       133
tcarambat1010  130
darthShadow    124
belter         123
luu            122
Brajeshwar     106
pseudolus      100
cmdalsanto     91
tosh           89

chatgpt / duckdb ("DuckDB supports NOW() and INTERVAL syntax similarly to PostgreSQL, so no major changes are necessary.")

SELECT
    by,
    SUM(score) AS sum_score
  FROM
    hn
  WHERE
    time >= NOW() - INTERVAL '7 days'
  GROUP BY
    by
  ORDER BY
    sum_score DESC
  LIMIT
    15;
┌───────────────┬───────────┐
│      by       │ sum_score │
│    varchar    │  int128   │
├───────────────┼───────────┤
│ Pulcinella    │       245 │
│ meetpateltech │       179 │
│ jmsflknr      │       171 │
│ surprisetalk  │       164 │
│ molli         │       163 │
│ vouwfietsman  │       145 │
│ james_pm      │       133 │
│ tcarambat1010 │       130 │
│ darthShadow   │       124 │
│ belter        │       123 │
│ luu           │       122 │
│ Brajeshwar    │       106 │
│ pseudolus     │       100 │
│ cmdalsanto    │        91 │
│ tosh          │        89 │
├───────────────┴───────────┤
│ 15 rows         2 columns │
└───────────────────────────┘

claude / duckdb ("This query will work as-is in DuckDB. ")

SELECT 
    by, 
    SUM(score) AS sum_score 
FROM 
    hn 
WHERE 
    time >= now() - INTERVAL '7 days' 
GROUP BY 
    by 
ORDER BY 
    sum_score DESC 
LIMIT 
    15
┌───────────────┬───────────┐
│      by       │ sum_score │
│    varchar    │  int128   │
├───────────────┼───────────┤
│ Pulcinella    │       245 │
│ meetpateltech │       179 │
│ jmsflknr      │       171 │
│ surprisetalk  │       164 │
│ molli         │       163 │
│ vouwfietsman  │       145 │
│ james_pm      │       133 │
│ tcarambat1010 │       130 │
│ darthShadow   │       124 │
│ belter        │       123 │
│ luu           │       122 │
│ Brajeshwar    │       106 │
│ pseudolus     │       100 │
│ cmdalsanto    │        91 │
│ tosh          │        89 │
├───────────────┴───────────┤
│ 15 rows         2 columns │
└───────────────────────────┘

q1

    with domains as (
      select
        url,
        substring(url from 'http[s]*://([^/$]+)') as domain
    from 
      hn
    where
      url is not null
    ),
    avg_and_max as (
      select
        substring(url from 'http[s]*://([^/$]+)') as domain,
        max(score::int) as max_score
      from
        hn
      group by
        substring(url from 'http[s]*://([^/$]+)')
    ),
    counted as (
      select 
        domain,
        count(*)
      from 
        domains
      group by
        domain
      order by
        count desc
    )
    select
      a.domain,
      c.count,
      a.max_score
    from
      avg_and_max a
    join
      counted c 
    using 
      (domain)
    where
      c.count > 5
    order by
      max_score desc
    limit 5

+------------------+-------+-----------+
| domain           | count | max_score |
+------------------+-------+-----------+
| openai.com       | 53    | 1130      |
| www.ftc.gov      | 17    | 954       |
| www.space.com    | 87    | 830       |
| www.apple.com    | 43    | 821       |
| en.wikipedia.org | 1070  | 801       |
+------------------+-------+-----------+

chatgpt / sqlite

WITH domains AS (
  SELECT
    url,
    substr(url, instr(url, '//') + 2, instr(substr(url, instr(url, '//') + 2), '/') - 1) AS domain
  FROM 
    hn
  WHERE
    url IS NOT NULL
),
avg_and_max AS (
  SELECT
    substr(url, instr(url, '//') + 2, instr(substr(url, instr(url, '//') + 2), '/') - 1) AS domain,
    MAX(score) AS max_score
  FROM
    hn
  GROUP BY
    substr(url, instr(url, '//') + 2, instr(substr(url, instr(url, '//') + 2), '/') - 1)
),
counted AS (
  SELECT 
    domain,
    COUNT(*) AS count
  FROM 
    domains
  GROUP BY
    domain
  ORDER BY
    count DESC
)
SELECT
  a.domain,
  c.count,
  a.max_score
FROM
  avg_and_max a
JOIN
  counted c 
USING 
  (domain)
WHERE
  c.count > 5
ORDER BY
  max_score DESC
LIMIT 5;

domain         count  max_score
-------------  -----  ---------
               3786
openai.com     53     1130
/              1624   1071
www.ftc.gov    17     954
www.space.com  87     830

claude / sqlite

WITH domains AS ( SELECT url, SUBSTR(url, INSTR(url, '://') + 3) AS full_domain, CASE WHEN INSTR(SUBSTR(url, INSTR(url, '://') + 3), '/') > 0 THEN SUBSTR(SUBSTR(url, INSTR(url, '://') + 3), 1, INSTR(SUBSTR(url, INSTR(url, '://') + 3), '/') - 1) ELSE SUBSTR(url, INSTR(url, '://') + 3) END AS domain FROM hn WHERE url IS NOT NULL AND url LIKE 'http%' ), avg_and_max AS ( SELECT CASE WHEN INSTR(SUBSTR(url, INSTR(url, '://') + 3), '/') > 0 THEN SUBSTR(SUBSTR(url, INSTR(url, '://') + 3), 1, INSTR(SUBSTR(url, INSTR(url, '://') + 3), '/') - 1) ELSE SUBSTR(url, INSTR(url, '://') + 3) END AS domain, MAX(CAST(score AS INTEGER)) AS max_score FROM hn WHERE url IS NOT NULL AND url LIKE 'http%' GROUP BY domain ), counted AS ( SELECT domain, COUNT(*) AS count FROM domains GROUP BY domain ORDER BY count DESC ) SELECT a.domain, c.count, a.max_score FROM avg_and_max a JOIN counted c USING (domain) WHERE c.count > 5 ORDER BY max_score DESC LIMIT 5

domain count max_score


openai.com 53 1130 www.ftc.gov 17 954 www.space.com 87 830 www.apple.com 43 821 en.wikipedia.org 1070 801

chatgpt / duckdb

"To translate this query into DuckDB, the main adjustment you need is to work with DuckDB's support for regular expressions and type casting. DuckDB supports regular expressions, so you can retain the substring() with regex matching, and ::int typecasting works similarly."

WITH domains AS (
  SELECT
    url,
    regexp_extract(url, 'http[s]*://([^/$]+)', 1) AS domain
  FROM 
    hn
  WHERE
    url IS NOT NULL
),
avg_and_max AS (
  SELECT
    regexp_extract(url, 'http[s]*://([^/$]+)', 1) AS domain,
    MAX(CAST(score AS INTEGER)) AS max_score
  FROM
    hn
  GROUP BY
    regexp_extract(url, 'http[s]*://([^/$]+)', 1)
),
counted AS (
  SELECT 
    domain,
    COUNT(*) AS count
  FROM 
    domains
  GROUP BY
    domain
  ORDER BY
    count DESC
)
SELECT
  a.domain,
  c.count,
  a.max_score
FROM
  avg_and_max a
JOIN
  counted c 
USING 
  (domain)
WHERE
  c.count > 5
ORDER BY
  max_score DESC
LIMIT 5;

┌──────────────────┬───────┬───────────┐
│      domain      │ count │ max_score │
│     varchar      │ int64 │   int32   │
├──────────────────┼───────┼───────────┤
│ openai.com       │    53 │      1130 │
│ www.ftc.gov      │    17 │       954 │
│ www.space.com    │    87 │       830 │
│ www.apple.com    │    43 │       821 │
│ en.wikipedia.org │  1070 │       801 │
└──────────────────┴───────┴───────────┘

claude / duckdb

"This query will work as-is in DuckDB."

It didn't.

Binder Error: Referenced column "count" not found in FROM clause!
Candidate bindings: "a.domain"
LINE 28:         count desc

"The issue is in the counted CTE where we're using count as both an aggregate function and a column alias, which is causing the conflict. Let's fix this by giving the count a specific alias."

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment