Skip to content

Instantly share code, notes, and snippets.

@notwedtm
Created October 30, 2024 19:51
Show Gist options
  • Save notwedtm/e8b6236b60d4f71fe443dfe4191d13da to your computer and use it in GitHub Desktop.
Save notwedtm/e8b6236b60d4f71fe443dfe4191d13da to your computer and use it in GitHub Desktop.
Copy Cat Tokens
use birdeye;
WITH filtered_tokens AS (
SELECT *
FROM token_listings
WHERE liquidity > 0
AND liquidity_added_at >= now() - INTERVAL 24 HOUR
)
SELECT
t1.address AS original_address,
t2.address AS copycat_address,
t1.name AS original_name,
t2.name AS potential_copycat_name,
t1.symbol AS original_symbol,
t2.symbol AS copycat_symbol,
round(t1.liquidity, 2) AS original_liquidity,
round(t2.liquidity, 2) AS copycat_liquidity,
t1.liquidity_added_at AS original_created_at,
t2.liquidity_added_at AS copycat_created_at,
dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) AS ms_between_creation,
concat(
toString(dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)), ' ms',
' (',
toString(round(dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)/1000.0, 2)), ' seconds)'
) AS time_difference,
levenshteinDistance(lowerUTF8(t1.name), lowerUTF8(t2.name)) AS name_distance,
lengthUTF8(t2.name) - lengthUTF8(t1.name) AS char_difference,
round(
(100 - dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)/216000) *
(1 + (lengthUTF8(t2.name) - lengthUTF8(t1.name) > 0)::Int) *
(1 + (replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', '') = replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', ''))::Int) *
(1 + (dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) < 5000)::Int),
2
) AS suspicion_score
FROM filtered_tokens AS t1
CROSS JOIN filtered_tokens AS t2
WHERE
-- Time and basic filtering conditions
t1.liquidity_added_at < t2.liquidity_added_at
AND t1.name != t2.name
AND dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) <= 21600000
AND dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) > 0
-- Name similarity conditions
AND (
levenshteinDistance(
replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', ''),
replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', '')
) <= 2
OR position(
replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', ''),
replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', '')
) > 0
OR position(
replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', ''),
replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', '')
) > 0
)
ORDER BY suspicion_score DESC, ms_between_creation ASC
LIMIT 100;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment