Last active
August 4, 2025 18:09
-
-
Save joswr1ght/cf8283844e644faee1f53d33a220e842 to your computer and use it in GitHub Desktop.
ClickHouse Analysis of Repeated Usernames with Password Disclosure from COMB List
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ``` | |
| ### Create a table to store breach credentials with support for statistical sampling | |
| Mac.localdomain :) CREATE TABLE credentials ( | |
| username String, | |
| password String | |
| ) ENGINE = MergeTree() | |
| ORDER BY (username, cityHash64(username)) | |
| SAMPLE BY cityHash64(username); | |
| CREATE TABLE credentials | |
| ( | |
| `username` String, | |
| `password` String | |
| ) | |
| ENGINE = MergeTree | |
| ORDER BY (username, cityHash64(username)) | |
| SAMPLE BY cityHash64(username) | |
| Query id: 956062ab-4b7f-4f81-9e91-787cd51fbb89 | |
| Ok. | |
| 0 rows in set. Elapsed: 0.005 sec. | |
| ### Insert the COMB data into the table | |
| Mac.localdomain :) INSERT INTO credentials SELECT | |
| splitByChar(':', line)[1] as username, | |
| splitByChar(':', line)[2] as password | |
| FROM file('/Users/jwright/Hack/comb.txt', 'LineAsString', 'line String'); | |
| INSERT INTO credentials SELECT | |
| splitByChar(':', line)[1] AS username, | |
| splitByChar(':', line)[2] AS password | |
| FROM file('/Users/jwright/Hack/comb.txt', 'LineAsString', 'line String') | |
| Query id: 71b9d1b0-9c51-4072-bdb9-9b581374b6ac | |
| Ok. | |
| 0 rows in set. Elapsed: 436.611 sec. Processed 3.52 billion rows, 123.88 GB (8.06 million rows/s., 283.73 MB/s.) | |
| Peak memory usage: 454.37 MiB. | |
| ### | |
| # This query: | |
| # Gets unique username/password combinations with '@' in username | |
| # Groups by username and counts distinct passwords per username | |
| # Calculates what percentage of unique usernames have 4+ different passwords | |
| ### | |
| Mac.localdomain :) WITH unique_creds AS (SELECT DISTINCT username, password FROM credentials SAMPLE 0.01 WHERE username LIKE '%@%'), username_stats AS (SELECT username, count() as password_count FROM unique_creds GROUP BY username) SELECT countIf(password_count >= 4) * 100.0 / count() AS percentage, count() as total_unique_usernames, countIf(password_count >= 4) as usernames_with_4plus_passwords FROM username_stats; | |
| WITH | |
| unique_creds AS | |
| ( | |
| SELECT DISTINCT | |
| username, | |
| password | |
| FROM credentials | |
| SAMPLE 1 / 100 | |
| WHERE username LIKE '%@%' | |
| ), | |
| username_stats AS | |
| ( | |
| SELECT | |
| username, | |
| count() AS password_count | |
| FROM unique_creds | |
| GROUP BY username | |
| ) | |
| SELECT | |
| (countIf(password_count >= 4) * 100.) / count() AS percentage, | |
| count() AS total_unique_usernames, | |
| countIf(password_count >= 4) AS usernames_with_4plus_passwords | |
| FROM username_stats | |
| Query id: 2f029696-1c56-4208-be9b-f7ce4eb4604d | |
| ┌────────percentage─┬─total_unique_usernames─┬─usernames_with_4plus_passwords─┐ | |
| 1. │ 3.797438926163903 │ 23313502 │ 885316 │ | |
| └───────────────────┴────────────────────────┴────────────────────────────────┘ | |
| 1 row in set. Elapsed: 23.423 sec. Processed 3.52 billion rows, 167.35 GB (150.17 million rows/s., 7.14 GB/s.) | |
| Peak memory usage: 5.51 GiB. | |
| ### | |
| # Similar query, but for 10 or more passwords for a given username | |
| ### | |
| Mac.localdomain :) WITH unique_creds AS (SELECT DISTINCT username, password FROM credentials SAMPLE 0.01 WHERE username LIKE '%@%'), username_stats AS (SELECT username, count() as password_count FROM unique_creds GROUP BY username) SELECT countIf(password_count >= 10) * 100.0 / count() AS percentage, count() as total_unique_usernames, countIf(password_count >= 10) as usernames_with_10plus_passwords FROM username_stats; | |
| WITH | |
| unique_creds AS | |
| ( | |
| SELECT DISTINCT | |
| username, | |
| password | |
| FROM credentials | |
| SAMPLE 1 / 100 | |
| WHERE username LIKE '%@%' | |
| ), | |
| username_stats AS | |
| ( | |
| SELECT | |
| username, | |
| count() AS password_count | |
| FROM unique_creds | |
| GROUP BY username | |
| ) | |
| SELECT | |
| (countIf(password_count >= 10) * 100.) / count() AS percentage, | |
| count() AS total_unique_usernames, | |
| countIf(password_count >= 10) AS usernames_with_10plus_passwords | |
| FROM username_stats | |
| Query id: 1b4e2a4c-cf18-44a3-905e-308339217a14 | |
| ┌─────────percentage─┬─total_unique_usernames─┬─usernames_with_10plus_passwords─┐ | |
| 1. │ 0.7765199754202522 │ 23313502 │ 181034 │ | |
| └────────────────────┴────────────────────────┴─────────────────────────────────┘ | |
| 1 row in set. Elapsed: 23.385 sec. Processed 3.52 billion rows, 167.35 GB (150.42 million rows/s., 7.16 GB/s.) | |
| Peak memory usage: 5.50 GiB. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment