Skip to content

Instantly share code, notes, and snippets.

@nicklatin
Last active May 3, 2022 02:04
Show Gist options
  • Save nicklatin/77c60c2b27a5e972b3ac912f4529f3ce to your computer and use it in GitHub Desktop.
Save nicklatin/77c60c2b27a5e972b3ac912f4529f3ce to your computer and use it in GitHub Desktop.
Extract, transform, load functions for python-binance API.
import pandas as pd
import logging
# initialize binance client
client = Client(os.environ.get('BINANCE_API_KEY'), os.environ.get('BINANCE_API_SECRET'))
# get metadata from binance exchange
def get_metadata_bin(quote='usdt', as_list=False):
"""
Pulls metadata from python-binance API.
Parameters
----------
quote: str, {'usdt', 'busd', 'btc', 'eth'}, default 'usdt'
Quote currency.
as_list: bool, default False
Returns pairs/index as list
Returns
-------
meta: DataFrame or list
Metadata from Binance exchange for selected quote currency pairs.
"""
# get pairs metadata from API
meta = pd.DataFrame(client.get_exchange_info()['symbols'])
# rename cols and set index
meta.rename(columns={'symbol' :'ticker'}, inplace=True)
meta.set_index('ticker', inplace=True)
# get quote pairs
if quote is not None:
meta = meta[(meta.quoteAsset == quote.upper())]
# list of assets
if as_list:
meta = meta.baseAsset.to_list()
return meta
# pull OHLCV data from Binance exchange
@timethis
def pull_ohlcv_bin(tickers='BTCUSDT', freq='1d', start_date=None, end_date=None, limit=1000):
"""
Pulls historical OHLCV prices from python-binance.
Parameters
----------
tickers: List or str
List of ticker symbols.
frequency: str, {'1m', '5m', '30m', '1h', '2h', '4h', '8h', '1d', '1w', '1m'}, default '1d'
Frequency of data observations.
start_date: str, optional, default None
Start date from which to pull data in 'yyyy-mm-dd' format, e.g. '2010-01-01'.
end_date: str, optional, default None
End date until which to pull data in 'yyyy-mm-dd' format, e.g. '2020-12-31'.
limit: int, default 1000
Max data observations to return.
Returns
-------
data: DataFrame - MultiIndex
Dataframe with DatetimeIndex (level 0), tickers (level 1) and OHLCV data (cols).
"""
# convert ticker str to list
if type(tickers) is str:
ticker_list = []
ticker_list.append(tickers)
tickers = ticker_list
# create empty dict
data_dict = {}
# loop through tickers list
for ticker in tickers:
# uppercase ticker
ticker = ticker.upper()
# earliest start date timestamp
earliest_tmsp = client._get_earliest_valid_timestamp(ticker, '1d')
# set start date
if start_date is not None and (pd.to_datetime(earliest_tmsp, unit='ms') < pd.to_datetime(start_date)):
start = round(pd.Timestamp(pd.to_datetime(start_date), unit='ms').timestamp() * 1000)
else:
start = earliest_tmsp
# end date
if pd.to_datetime(end_date) < pd.to_datetime(datetime.utcnow()):
end = round(pd.Timestamp(pd.to_datetime(end_date), unit='ms').timestamp() * 1000)
# create empty ohlcv df
df = pd.DataFrame()
# set number of attempts and bool for while loop
attempts, not_pulled = 1, True
# run a while loop to get ohlcv prices in case the pull attempt fails
while not_pulled:
try:
# request historical candle (or klines) data
data = client.get_historical_klines(ticker, freq, str(start), str(end), limit=limit)
not_pulled = False
except:
logging.warning("Failed to pull OHLCV data for: " + ticker + ". Attempt #" + str(attempts))
attempts += 1
if attempts >= 3:
print("Failed to pull OHLCV data " + ticker + " after " + str(attempts) + " attempts")
not_pulled = False
# create df with data
df = pd.DataFrame(data).astype(float)
# col names
df.columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_volume', 'trades',
'tbav', 'tbqav', 'q']
# round close time to date timestamp
df['date'] = pd.to_datetime(df['close_time'], unit='ms').dt.round('1s')
# add ticker info
df['ticker'] = ticker
# reorder cols and set index
df = df[['date', 'ticker', 'open', 'high', 'low', 'close', 'volume']]
df.set_index(['date', 'ticker'], inplace=True)
# add ohlcv df to data dict
data_dict[ticker] = df
# logging.info
logging.info(f"OHLCV data for {ticker} was pulled from {df.index[0][0]} to {df.index[-1][0]}.")
# create multiIndex df from data dict
ohlcv_df = pd.concat(data_dict).droplevel(0)
return ohlcv_df.sort_index()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment