Skip to content

Instantly share code, notes, and snippets.

@notwedtm
Created November 3, 2024 01:41
Show Gist options
  • Save notwedtm/020877299ee9adab4492a91c767df6d3 to your computer and use it in GitHub Desktop.
Save notwedtm/020877299ee9adab4492a91c767df6d3 to your computer and use it in GitHub Desktop.
PRISM: Copy Cat Tokens
-- use database
use birdeye;
WITH filtered_tokens AS (
SELECT
name,
symbol,
liquidity,
lowerUTF8(name) AS normalized_name,
lengthUTF8(name) AS name_length,
liquidity_added_at,
-- Extract only alphanumeric characters for comparison
replaceRegexpAll(lowerUTF8(name), '[^a-z0-9]', '') AS clean_name
FROM token_listings
WHERE liquidity > 0
AND liquidity_added_at >= now() - INTERVAL 24 HOUR
),
similar_tokens AS (
SELECT
t1.name AS original_name,
t2.name AS potential_copycat_name,
t1.symbol AS original_symbol,
t2.symbol AS copycat_symbol,
t1.liquidity AS original_liquidity,
t2.liquidity AS copycat_liquidity,
t1.liquidity_added_at AS original_created_at,
t2.liquidity_added_at AS copycat_created_at,
levenshteinDistance(t1.normalized_name, t2.normalized_name) AS name_distance,
-- Calculate time difference in minutes
dateDiff('minute', t1.liquidity_added_at, t2.liquidity_added_at) AS minutes_between_creation,
-- Check if names are identical when stripped of special characters
t1.clean_name = t2.clean_name AS same_base_name,
-- Calculate how many extra characters were added
lengthUTF8(t2.name) - lengthUTF8(t1.name) AS char_difference
FROM filtered_tokens t1
CROSS JOIN filtered_tokens t2
WHERE t1.liquidity_added_at < t2.liquidity_added_at -- t1 is the original, t2 is the potential copycat
AND t1.name != t2.name -- Different names
AND (
-- Names are very similar when cleaned
levenshteinDistance(t1.clean_name, t2.clean_name) <= 2
-- Or same name with added characters
OR position(t1.clean_name, t2.clean_name) > 0
OR position(t2.clean_name, t1.clean_name) > 0
)
-- Created within 6 hours of each other
AND dateDiff('minute', t1.liquidity_added_at, t2.liquidity_added_at) <= 360
)
SELECT
original_name,
potential_copycat_name,
original_symbol,
copycat_symbol,
round(original_liquidity, 2) AS original_liquidity,
round(copycat_liquidity, 2) AS copycat_liquidity,
formatDateTime(original_created_at, '%Y-%m-%d %H:%M:%S') as original_created_at,
formatDateTime(copycat_created_at, '%Y-%m-%d %H:%M:%S') as copycat_created_at,
minutes_between_creation,
name_distance,
char_difference,
-- Calculate suspicion score (higher = more suspicious)
round(
(100 - minutes_between_creation/3.6) * -- Time proximity (max 100)
(1 + (char_difference > 0)::Int) * -- Penalty for added characters
(1 + same_base_name::Int), -- Extra weight if same base name
2
) AS suspicion_score
FROM similar_tokens
WHERE minutes_between_creation > 0 -- Ensure original came first
ORDER BY
suspicion_score DESC, -- Most suspicious first
minutes_between_creation ASC -- Then by how quickly they were created after original
LIMIT 100;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment