normanlmfung · September 26, 2023 20:32
diff --git a/gistfile1.txt b/gistfile1.txt
 from datetime import datetime
 import time
 from itertools import combinations
 import pandas as pd
 from ccxt import bybit
 from ccxt import okx
 from ccxt import gateio

 CANDIDATE_FILE_NAME = "mm_candidates.csv"

 '''
 Idea is, post limit orders in exchanges with wider spreads, and hedge where spread is tighter (Generally more established, bigger exchanges).
 Script below scans spread between best bid/offer on orderbooks, from selected exchanges. Then find the gap in spread between unique combinations of exchanges for pairs they have in common.
 It will rank the universe according to this gap (in bps). Then filter according to strategy parameters below.

 Parameters:
    min_volume_usdt: Try to trade where there's actually volume and capacity. This determines the scalability of your strategy.
                     To put it into perspective, if you make 10bps pnl on a trade. 10bps of $1000 is $1 pnl.
    min_gap_bps: Minimum abs(gap_bps) between the two legs. This spread needs to cover commission. 
                 Say min_gap_bps=30bps, and you pay 5bps per leg in execution. Pnl capacity max 30bps - 10bps = 20bps. You still need to allow for slippage.
    how_many_minutes: The idea is we calculate rolling average (and standard deviation) over 'gap_bps' over a period of time. The script is scanning the markets on 60 sec intervals. 'how_many_minutes' specifies  how many minutes (or how many intervals you wish to scan for). 
                      Ideally I'd scan for a few days and look at stability on 'gap_bps' for given candidate. For purpose of this test, I just set to 10 (i.e. ten minutes)
 '''
 strategy_param = {
    'min_volume_usdt' : 1000,
    'min_gap_bps' : 30,
    'how_many_minutes' : 10 
 }

 '''
 Instantiate exchanges, lets research linear perpetual markets (Prefered over spot as we have higher volumes and lower trade fees, this said there're more pairs to trade in spot markets in general.
 '''
 exchanges = [ 
    okx({'defaultType': 'swap'}),
    bybit({'defaultType': 'linear'}),
    gateio({'defaultType': 'swap'})
 ]

 def log(msg):
    print(f"{datetime.now()} {msg}")

 for exchange in exchanges:
    exchange.load_markets()

 combinations = list(combinations(exchanges, 2))

 log([f"{x[0].name} vs {x[1].name}" for x in combinations])

 # Step 1. scan for common pairs between two exchanges: build universe
 universe = []
 for combination in combinations:
    leg1_exchange = combination[0]
    leg2_exchange = combination[1]
    for leg1_market in leg1_exchange.markets:
        if leg1_market in leg2_exchange.markets:
            if leg1_exchange.markets[leg1_market]['linear'] and leg2_exchange.markets[leg1_market]['linear']:
                universe.append(f"{leg1_exchange.name}-{leg1_market} {leg2_exchange}-{leg1_market}")

 def _calc_spread_gap(x):
    gap_bps = x[1]['leg1_spread_bps'] - x[1]['leg2_spread_bps']
    x[1]['gap_bps'] = gap_bps
    return gap_bps

 order_book_stats = []
 iteration = 0
 while iteration<strategy_param['how_many_minutes']:
    # Step 2. calculate spreads and rank the universe
    for key in universe:
        try:
            leg1_exchange_name = key.split(' ')[0].split('-')[0]
            leg1_ticker = key.split(' ')[0].split('-')[1]
            leg2_exchange_name = key.split(' ')[1].split('-')[0]
            leg2_ticker = key.split(' ')[1].split('-')[1]

            leg1_exchange = [ x for x in exchanges if x.name == leg1_exchange_name ][0]
            leg2_exchange = [ x for x in exchanges if x.name == leg2_exchange_name ][0]

            leg1_ob = leg1_exchange.fetch_order_book(leg1_ticker, limit=5)
            leg1_best_bid = max(leg1_ob['bids'])
            leg1_best_bid_px = leg1_best_bid[0]
            leg1_best_bid_volume = leg1_best_bid[1]
            leg1_best_ask = min(leg1_ob['asks'])
            leg1_best_ask_px = leg1_best_ask[0]
            leg1_best_ask_volume = leg1_best_ask[1]
            leg1_mid = (leg1_best_ask_px + leg1_best_bid_px)/2
            leg1_spread_bps = ((leg1_best_ask_px/leg1_best_bid_px) - 1) * 10000

            leg2_ob = leg2_exchange.fetch_order_book(leg2_ticker, limit=5)
            leg2_best_bid = max(leg2_ob['bids'])
            leg2_best_bid_px = leg2_best_bid[0]
            leg2_best_bid_volume = leg2_best_bid[1]
            leg2_best_ask = min(leg2_ob['asks'])
            leg2_best_ask_px = leg2_best_ask[0]
            leg2_best_ask_volume = leg2_best_ask[1]
            leg2_mid = (leg2_best_ask_px + leg2_best_bid_px)/2
            leg2_spread_bps = ((leg2_best_ask_px/leg2_best_bid_px) - 1) * 100000

            # You are ranking by spread, but you still want volume as that'd be indicative of how much capacity the pair 
            order_book_stats.append({
                    'key' : key,
                    'iteration' : iteration,
                    'ticker' : leg1_ticker,
                    'leg1_exchange_name' : leg1_exchange_name,
                    'leg2_exchange_name' : leg2_exchange_name,

                    'leg1_spread_bps' : leg1_spread_bps,
                    'leg1_best_bid_volume' : leg1_best_bid_volume,
                    'leg1_best_bid_volume_usdt' : leg1_best_bid_volume * leg1_mid,
                    'leg1_best_ask_volume' : leg1_best_ask_volume,
                    'leg1_best_ask_volume_usdt' : leg1_best_ask_volume * leg1_mid,
                    'leg2_spread_bps' : leg2_spread_bps,
                    'leg2_best_bid_volume' : leg2_best_bid_volume,
                    'leg2_best_bid_volume_usdt' : leg2_best_bid_volume * leg2_mid,
                    'leg2_best_ask_volume' : leg2_best_ask_volume,
                    'leg2_best_ask_volume_usdt' : leg2_best_ask_volume * leg2_mid,

                    'leg1_ob_ts' : leg1_ob['timestamp'],
                    'leg1_ob_dt' : datetime.fromtimestamp(leg1_ob['timestamp']/1000),
                    'leg2_ob_ts' : leg2_ob['timestamp'],
                    'leg2_ob_dt' : datetime.fromtimestamp(leg2_ob['timestamp']/1000),

                    'gap_bps' : leg1_spread_bps - leg2_spread_bps
                }
            )
        except Exception as ticker_error:
            log(f"Oops {ticker} {ticker_error}")

    iteration += 1
    log(f"iteration: {iteration}")
    time.sleep(60) # Wait 60 seconds before each iteration

 pd_sorted_universe = pd.DataFrame(order_book_stats)
 pd_sorted_universe = pd_sorted_universe[abs(pd_sorted_universe.gap_bps)>=strategy_param['min_gap_bps']]
 pd_sorted_universe = pd_sorted_universe[ (pd_sorted_universe.leg1_best_bid_volume_usdt>=strategy_param['min_volume_usdt']) & (pd_sorted_universe.leg1_best_ask_volume_usdt>=strategy_param['min_volume_usdt']) & (pd_sorted_universe.leg2_best_bid_volume_usdt>=strategy_param['min_volume_usdt']) & (pd_sorted_universe.leg2_best_ask_volume_usdt>=strategy_param['min_volume_usdt'])  ]

 window_size = int(strategy_param['how_many_minutes'] / 2)
 pd_sorted_universe['rolling_avg_gap_bps'] = pd_sorted_universe.groupby('key')['gap_bps'].rolling(window=window_size).mean().reset_index(level=0, drop=True)
 pd_sorted_universe['rolling_std_gap_bps'] = pd_sorted_universe.groupby('key')['gap_bps'].rolling(window=window_size).std().reset_index(level=0, drop=True)

 pd_sorted_universe.to_csv(CANDIDATE_FILE_NAME)
 log(f"Done filter universe, check candidates file {CANDIDATE_FILE_NAME}")
	from datetime import datetime
	import time
	from itertools import combinations
	import pandas as pd
	from ccxt import bybit
	from ccxt import okx
	from ccxt import gateio

	CANDIDATE_FILE_NAME = "mm_candidates.csv"

	'''
	Idea is, post limit orders in exchanges with wider spreads, and hedge where spread is tighter (Generally more established, bigger exchanges).
	Script below scans spread between best bid/offer on orderbooks, from selected exchanges. Then find the gap in spread between unique combinations of exchanges for pairs they have in common.
	It will rank the universe according to this gap (in bps). Then filter according to strategy parameters below.

	Parameters:
	min_volume_usdt: Try to trade where there's actually volume and capacity. This determines the scalability of your strategy.
	To put it into perspective, if you make 10bps pnl on a trade. 10bps of $1000 is $1 pnl.
	min_gap_bps: Minimum abs(gap_bps) between the two legs. This spread needs to cover commission.
	Say min_gap_bps=30bps, and you pay 5bps per leg in execution. Pnl capacity max 30bps - 10bps = 20bps. You still need to allow for slippage.
	how_many_minutes: The idea is we calculate rolling average (and standard deviation) over 'gap_bps' over a period of time. The script is scanning the markets on 60 sec intervals. 'how_many_minutes' specifies how many minutes (or how many intervals you wish to scan for).
	Ideally I'd scan for a few days and look at stability on 'gap_bps' for given candidate. For purpose of this test, I just set to 10 (i.e. ten minutes)
	'''
	strategy_param = {
	'min_volume_usdt' : 1000,
	'min_gap_bps' : 30,
	'how_many_minutes' : 10
	}

	'''
	Instantiate exchanges, lets research linear perpetual markets (Prefered over spot as we have higher volumes and lower trade fees, this said there're more pairs to trade in spot markets in general.
	'''
	exchanges = [
	okx({'defaultType': 'swap'}),
	bybit({'defaultType': 'linear'}),
	gateio({'defaultType': 'swap'})
	]

	def log(msg):
	print(f"{datetime.now()} {msg}")

	for exchange in exchanges:
	exchange.load_markets()

	combinations = list(combinations(exchanges, 2))

	log([f"{x[0].name} vs {x[1].name}" for x in combinations])

	# Step 1. scan for common pairs between two exchanges: build universe
	universe = []
	for combination in combinations:
	leg1_exchange = combination[0]
	leg2_exchange = combination[1]
	for leg1_market in leg1_exchange.markets:
	if leg1_market in leg2_exchange.markets:
	if leg1_exchange.markets[leg1_market]['linear'] and leg2_exchange.markets[leg1_market]['linear']:
	universe.append(f"{leg1_exchange.name}-{leg1_market} {leg2_exchange}-{leg1_market}")

	def _calc_spread_gap(x):
	gap_bps = x[1]['leg1_spread_bps'] - x[1]['leg2_spread_bps']
	x[1]['gap_bps'] = gap_bps
	return gap_bps

	order_book_stats = []
	iteration = 0
	while iteration<strategy_param['how_many_minutes']:
	# Step 2. calculate spreads and rank the universe
	for key in universe:
	try:
	leg1_exchange_name = key.split(' ')[0].split('-')[0]
	leg1_ticker = key.split(' ')[0].split('-')[1]
	leg2_exchange_name = key.split(' ')[1].split('-')[0]
	leg2_ticker = key.split(' ')[1].split('-')[1]

	leg1_exchange = [ x for x in exchanges if x.name == leg1_exchange_name ][0]
	leg2_exchange = [ x for x in exchanges if x.name == leg2_exchange_name ][0]

	leg1_ob = leg1_exchange.fetch_order_book(leg1_ticker, limit=5)
	leg1_best_bid = max(leg1_ob['bids'])
	leg1_best_bid_px = leg1_best_bid[0]
	leg1_best_bid_volume = leg1_best_bid[1]
	leg1_best_ask = min(leg1_ob['asks'])
	leg1_best_ask_px = leg1_best_ask[0]
	leg1_best_ask_volume = leg1_best_ask[1]
	leg1_mid = (leg1_best_ask_px + leg1_best_bid_px)/2
	leg1_spread_bps = ((leg1_best_ask_px/leg1_best_bid_px) - 1) * 10000

	leg2_ob = leg2_exchange.fetch_order_book(leg2_ticker, limit=5)
	leg2_best_bid = max(leg2_ob['bids'])
	leg2_best_bid_px = leg2_best_bid[0]
	leg2_best_bid_volume = leg2_best_bid[1]
	leg2_best_ask = min(leg2_ob['asks'])
	leg2_best_ask_px = leg2_best_ask[0]
	leg2_best_ask_volume = leg2_best_ask[1]
	leg2_mid = (leg2_best_ask_px + leg2_best_bid_px)/2
	leg2_spread_bps = ((leg2_best_ask_px/leg2_best_bid_px) - 1) * 100000

	# You are ranking by spread, but you still want volume as that'd be indicative of how much capacity the pair
	order_book_stats.append({
	'key' : key,
	'iteration' : iteration,
	'ticker' : leg1_ticker,
	'leg1_exchange_name' : leg1_exchange_name,
	'leg2_exchange_name' : leg2_exchange_name,

	'leg1_spread_bps' : leg1_spread_bps,
	'leg1_best_bid_volume' : leg1_best_bid_volume,
	'leg1_best_bid_volume_usdt' : leg1_best_bid_volume * leg1_mid,
	'leg1_best_ask_volume' : leg1_best_ask_volume,
	'leg1_best_ask_volume_usdt' : leg1_best_ask_volume * leg1_mid,
	'leg2_spread_bps' : leg2_spread_bps,
	'leg2_best_bid_volume' : leg2_best_bid_volume,
	'leg2_best_bid_volume_usdt' : leg2_best_bid_volume * leg2_mid,
	'leg2_best_ask_volume' : leg2_best_ask_volume,
	'leg2_best_ask_volume_usdt' : leg2_best_ask_volume * leg2_mid,

	'leg1_ob_ts' : leg1_ob['timestamp'],
	'leg1_ob_dt' : datetime.fromtimestamp(leg1_ob['timestamp']/1000),
	'leg2_ob_ts' : leg2_ob['timestamp'],
	'leg2_ob_dt' : datetime.fromtimestamp(leg2_ob['timestamp']/1000),

	'gap_bps' : leg1_spread_bps - leg2_spread_bps
	}
	)
	except Exception as ticker_error:
	log(f"Oops {ticker} {ticker_error}")

	iteration += 1
	log(f"iteration: {iteration}")
	time.sleep(60) # Wait 60 seconds before each iteration

	pd_sorted_universe = pd.DataFrame(order_book_stats)
	pd_sorted_universe = pd_sorted_universe[abs(pd_sorted_universe.gap_bps)>=strategy_param['min_gap_bps']]
	pd_sorted_universe = pd_sorted_universe[ (pd_sorted_universe.leg1_best_bid_volume_usdt>=strategy_param['min_volume_usdt']) & (pd_sorted_universe.leg1_best_ask_volume_usdt>=strategy_param['min_volume_usdt']) & (pd_sorted_universe.leg2_best_bid_volume_usdt>=strategy_param['min_volume_usdt']) & (pd_sorted_universe.leg2_best_ask_volume_usdt>=strategy_param['min_volume_usdt']) ]

	window_size = int(strategy_param['how_many_minutes'] / 2)
	pd_sorted_universe['rolling_avg_gap_bps'] = pd_sorted_universe.groupby('key')['gap_bps'].rolling(window=window_size).mean().reset_index(level=0, drop=True)
	pd_sorted_universe['rolling_std_gap_bps'] = pd_sorted_universe.groupby('key')['gap_bps'].rolling(window=window_size).std().reset_index(level=0, drop=True)

	pd_sorted_universe.to_csv(CANDIDATE_FILE_NAME)
	log(f"Done filter universe, check candidates file {CANDIDATE_FILE_NAME}")