Last active
May 3, 2022 02:04
-
-
Save nicklatin/77c60c2b27a5e972b3ac912f4529f3ce to your computer and use it in GitHub Desktop.
Extract, transform, load functions for python-binance API.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import logging | |
# initialize binance client | |
client = Client(os.environ.get('BINANCE_API_KEY'), os.environ.get('BINANCE_API_SECRET')) | |
# get metadata from binance exchange | |
def get_metadata_bin(quote='usdt', as_list=False): | |
""" | |
Pulls metadata from python-binance API. | |
Parameters | |
---------- | |
quote: str, {'usdt', 'busd', 'btc', 'eth'}, default 'usdt' | |
Quote currency. | |
as_list: bool, default False | |
Returns pairs/index as list | |
Returns | |
------- | |
meta: DataFrame or list | |
Metadata from Binance exchange for selected quote currency pairs. | |
""" | |
# get pairs metadata from API | |
meta = pd.DataFrame(client.get_exchange_info()['symbols']) | |
# rename cols and set index | |
meta.rename(columns={'symbol' :'ticker'}, inplace=True) | |
meta.set_index('ticker', inplace=True) | |
# get quote pairs | |
if quote is not None: | |
meta = meta[(meta.quoteAsset == quote.upper())] | |
# list of assets | |
if as_list: | |
meta = meta.baseAsset.to_list() | |
return meta | |
# pull OHLCV data from Binance exchange | |
@timethis | |
def pull_ohlcv_bin(tickers='BTCUSDT', freq='1d', start_date=None, end_date=None, limit=1000): | |
""" | |
Pulls historical OHLCV prices from python-binance. | |
Parameters | |
---------- | |
tickers: List or str | |
List of ticker symbols. | |
frequency: str, {'1m', '5m', '30m', '1h', '2h', '4h', '8h', '1d', '1w', '1m'}, default '1d' | |
Frequency of data observations. | |
start_date: str, optional, default None | |
Start date from which to pull data in 'yyyy-mm-dd' format, e.g. '2010-01-01'. | |
end_date: str, optional, default None | |
End date until which to pull data in 'yyyy-mm-dd' format, e.g. '2020-12-31'. | |
limit: int, default 1000 | |
Max data observations to return. | |
Returns | |
------- | |
data: DataFrame - MultiIndex | |
Dataframe with DatetimeIndex (level 0), tickers (level 1) and OHLCV data (cols). | |
""" | |
# convert ticker str to list | |
if type(tickers) is str: | |
ticker_list = [] | |
ticker_list.append(tickers) | |
tickers = ticker_list | |
# create empty dict | |
data_dict = {} | |
# loop through tickers list | |
for ticker in tickers: | |
# uppercase ticker | |
ticker = ticker.upper() | |
# earliest start date timestamp | |
earliest_tmsp = client._get_earliest_valid_timestamp(ticker, '1d') | |
# set start date | |
if start_date is not None and (pd.to_datetime(earliest_tmsp, unit='ms') < pd.to_datetime(start_date)): | |
start = round(pd.Timestamp(pd.to_datetime(start_date), unit='ms').timestamp() * 1000) | |
else: | |
start = earliest_tmsp | |
# end date | |
if pd.to_datetime(end_date) < pd.to_datetime(datetime.utcnow()): | |
end = round(pd.Timestamp(pd.to_datetime(end_date), unit='ms').timestamp() * 1000) | |
# create empty ohlcv df | |
df = pd.DataFrame() | |
# set number of attempts and bool for while loop | |
attempts, not_pulled = 1, True | |
# run a while loop to get ohlcv prices in case the pull attempt fails | |
while not_pulled: | |
try: | |
# request historical candle (or klines) data | |
data = client.get_historical_klines(ticker, freq, str(start), str(end), limit=limit) | |
not_pulled = False | |
except: | |
logging.warning("Failed to pull OHLCV data for: " + ticker + ". Attempt #" + str(attempts)) | |
attempts += 1 | |
if attempts >= 3: | |
print("Failed to pull OHLCV data " + ticker + " after " + str(attempts) + " attempts") | |
not_pulled = False | |
# create df with data | |
df = pd.DataFrame(data).astype(float) | |
# col names | |
df.columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_volume', 'trades', | |
'tbav', 'tbqav', 'q'] | |
# round close time to date timestamp | |
df['date'] = pd.to_datetime(df['close_time'], unit='ms').dt.round('1s') | |
# add ticker info | |
df['ticker'] = ticker | |
# reorder cols and set index | |
df = df[['date', 'ticker', 'open', 'high', 'low', 'close', 'volume']] | |
df.set_index(['date', 'ticker'], inplace=True) | |
# add ohlcv df to data dict | |
data_dict[ticker] = df | |
# logging.info | |
logging.info(f"OHLCV data for {ticker} was pulled from {df.index[0][0]} to {df.index[-1][0]}.") | |
# create multiIndex df from data dict | |
ohlcv_df = pd.concat(data_dict).droplevel(0) | |
return ohlcv_df.sort_index() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment