Created
November 3, 2024 01:41
-
-
Save notwedtm/020877299ee9adab4492a91c767df6d3 to your computer and use it in GitHub Desktop.
PRISM: Copy Cat Tokens
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- use database | |
use birdeye; | |
WITH filtered_tokens AS ( | |
SELECT | |
name, | |
symbol, | |
liquidity, | |
lowerUTF8(name) AS normalized_name, | |
lengthUTF8(name) AS name_length, | |
liquidity_added_at, | |
-- Extract only alphanumeric characters for comparison | |
replaceRegexpAll(lowerUTF8(name), '[^a-z0-9]', '') AS clean_name | |
FROM token_listings | |
WHERE liquidity > 0 | |
AND liquidity_added_at >= now() - INTERVAL 24 HOUR | |
), | |
similar_tokens AS ( | |
SELECT | |
t1.name AS original_name, | |
t2.name AS potential_copycat_name, | |
t1.symbol AS original_symbol, | |
t2.symbol AS copycat_symbol, | |
t1.liquidity AS original_liquidity, | |
t2.liquidity AS copycat_liquidity, | |
t1.liquidity_added_at AS original_created_at, | |
t2.liquidity_added_at AS copycat_created_at, | |
levenshteinDistance(t1.normalized_name, t2.normalized_name) AS name_distance, | |
-- Calculate time difference in minutes | |
dateDiff('minute', t1.liquidity_added_at, t2.liquidity_added_at) AS minutes_between_creation, | |
-- Check if names are identical when stripped of special characters | |
t1.clean_name = t2.clean_name AS same_base_name, | |
-- Calculate how many extra characters were added | |
lengthUTF8(t2.name) - lengthUTF8(t1.name) AS char_difference | |
FROM filtered_tokens t1 | |
CROSS JOIN filtered_tokens t2 | |
WHERE t1.liquidity_added_at < t2.liquidity_added_at -- t1 is the original, t2 is the potential copycat | |
AND t1.name != t2.name -- Different names | |
AND ( | |
-- Names are very similar when cleaned | |
levenshteinDistance(t1.clean_name, t2.clean_name) <= 2 | |
-- Or same name with added characters | |
OR position(t1.clean_name, t2.clean_name) > 0 | |
OR position(t2.clean_name, t1.clean_name) > 0 | |
) | |
-- Created within 6 hours of each other | |
AND dateDiff('minute', t1.liquidity_added_at, t2.liquidity_added_at) <= 360 | |
) | |
SELECT | |
original_name, | |
potential_copycat_name, | |
original_symbol, | |
copycat_symbol, | |
round(original_liquidity, 2) AS original_liquidity, | |
round(copycat_liquidity, 2) AS copycat_liquidity, | |
formatDateTime(original_created_at, '%Y-%m-%d %H:%M:%S') as original_created_at, | |
formatDateTime(copycat_created_at, '%Y-%m-%d %H:%M:%S') as copycat_created_at, | |
minutes_between_creation, | |
name_distance, | |
char_difference, | |
-- Calculate suspicion score (higher = more suspicious) | |
round( | |
(100 - minutes_between_creation/3.6) * -- Time proximity (max 100) | |
(1 + (char_difference > 0)::Int) * -- Penalty for added characters | |
(1 + same_base_name::Int), -- Extra weight if same base name | |
2 | |
) AS suspicion_score | |
FROM similar_tokens | |
WHERE minutes_between_creation > 0 -- Ensure original came first | |
ORDER BY | |
suspicion_score DESC, -- Most suspicious first | |
minutes_between_creation ASC -- Then by how quickly they were created after original | |
LIMIT 100; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment