notwedtm · October 30, 2024 19:51
diff --git a/simple.sql b/simple.sql
 use birdeye;

 WITH filtered_tokens AS (
    SELECT *
    FROM token_listings
    WHERE liquidity > 0
        AND liquidity_added_at >= now() - INTERVAL 24 HOUR
 )
 SELECT
    t1.address AS original_address,
    t2.address AS copycat_address,
    t1.name AS original_name,
    t2.name AS potential_copycat_name,
    t1.symbol AS original_symbol,
    t2.symbol AS copycat_symbol,
    round(t1.liquidity, 2) AS original_liquidity,
    round(t2.liquidity, 2) AS copycat_liquidity,
    t1.liquidity_added_at AS original_created_at,
    t2.liquidity_added_at AS copycat_created_at,
    dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) AS ms_between_creation,
    concat(
        toString(dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)), ' ms',
        ' (',
        toString(round(dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)/1000.0, 2)), ' seconds)'
    ) AS time_difference,
    levenshteinDistance(lowerUTF8(t1.name), lowerUTF8(t2.name)) AS name_distance,
    lengthUTF8(t2.name) - lengthUTF8(t1.name) AS char_difference,
    round(
        (100 - dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)/216000) *
        (1 + (lengthUTF8(t2.name) - lengthUTF8(t1.name) > 0)::Int) *
        (1 + (replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', '') = replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', ''))::Int) *
        (1 + (dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) < 5000)::Int),
        2
    ) AS suspicion_score
 FROM filtered_tokens AS t1
 CROSS JOIN filtered_tokens AS t2
 WHERE
    -- Time and basic filtering conditions
    t1.liquidity_added_at < t2.liquidity_added_at
    AND t1.name != t2.name
    AND dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) <= 21600000
    AND dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) > 0
    -- Name similarity conditions
    AND (
        levenshteinDistance(
            replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', ''),
            replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', '')
        ) <= 2
        OR position(
            replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', ''),
            replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', '')
        ) > 0
        OR position(
            replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', ''),
            replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', '')
        ) > 0
    )
 ORDER BY suspicion_score DESC, ms_between_creation ASC
 LIMIT 100;
	use birdeye;

	WITH filtered_tokens AS (
	SELECT *
	FROM token_listings
	WHERE liquidity > 0
	AND liquidity_added_at >= now() - INTERVAL 24 HOUR
	)
	SELECT
	t1.address AS original_address,
	t2.address AS copycat_address,
	t1.name AS original_name,
	t2.name AS potential_copycat_name,
	t1.symbol AS original_symbol,
	t2.symbol AS copycat_symbol,
	round(t1.liquidity, 2) AS original_liquidity,
	round(t2.liquidity, 2) AS copycat_liquidity,
	t1.liquidity_added_at AS original_created_at,
	t2.liquidity_added_at AS copycat_created_at,
	dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) AS ms_between_creation,
	concat(
	toString(dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)), ' ms',
	' (',
	toString(round(dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)/1000.0, 2)), ' seconds)'
	) AS time_difference,
	levenshteinDistance(lowerUTF8(t1.name), lowerUTF8(t2.name)) AS name_distance,
	lengthUTF8(t2.name) - lengthUTF8(t1.name) AS char_difference,
	round(
	(100 - dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at)/216000) *
	(1 + (lengthUTF8(t2.name) - lengthUTF8(t1.name) > 0)::Int) *
	(1 + (replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', '') = replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', ''))::Int) *
	(1 + (dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) < 5000)::Int),
	2
	) AS suspicion_score
	FROM filtered_tokens AS t1
	CROSS JOIN filtered_tokens AS t2
	WHERE
	-- Time and basic filtering conditions
	t1.liquidity_added_at < t2.liquidity_added_at
	AND t1.name != t2.name
	AND dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) <= 21600000
	AND dateDiff('millisecond', t1.liquidity_added_at, t2.liquidity_added_at) > 0
	-- Name similarity conditions
	AND (
	levenshteinDistance(
	replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', ''),
	replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', '')
	) <= 2
	OR position(
	replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', ''),
	replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', '')
	) > 0
	OR position(
	replaceRegexpAll(lowerUTF8(t2.name), '[^a-z0-9]', ''),
	replaceRegexpAll(lowerUTF8(t1.name), '[^a-z0-9]', '')
	) > 0
	)
	ORDER BY suspicion_score DESC, ms_between_creation ASC
	LIMIT 100;