-
-
Save uneasyguy/483951efa6969749f451d807667ffd1f to your computer and use it in GitHub Desktop.
import os | |
import sys | |
import shutil | |
import multiprocessing as mp | |
from itertools import repeat as re | |
import dateparser | |
import pytz | |
import json | |
import csv | |
import datetime | |
from dateutil.rrule import rrule, DAILY | |
from binance.client import Client | |
pathname = os.path.dirname(sys.argv[0]) | |
platform = sys.platform | |
def os_file_prefix(platform,intended_dir): | |
if platform == 'win32': | |
return '{}\\'.format(intended_dir) | |
else: | |
return '{}/'.format(intended_dir) | |
full_path = os_file_prefix(platform,os.path.abspath(pathname)) | |
def one_or_many(): | |
data_pull_quantity = input('Would you like to pull data for one currency pair or many? (answer: 1 or many) ') | |
if data_pull_quantity.lower() in ['1','one']: | |
return 'one' | |
else: | |
return 'many' | |
def list_currency_option(): | |
list_or_base_query = input('Would you like to provide a list of pairs, or pull all available pairs by base currency? (answer: list or base) ') | |
if list_or_base_query.lower() == 'list': | |
return True | |
def provide_pair_list(): | |
binance_pair_list = list() | |
currency_pair_list = list() | |
info = Client(None,None) | |
pair_query = info.get_all_tickers() | |
for i,currency_pair in enumerate(pair_query): | |
binance_pair_list.append(currency_pair['symbol']) | |
pair_list_input = input('Please provide a comma separated list of currency pairs you would like to collect data for: (i.e. BATBTC,ARKETH,BTCUSDT) ') | |
for pair in pair_list_input.split(','): | |
if pair.strip().upper() in set(binance_pair_list): | |
currency_pair_list.append(pair.strip().upper()) | |
else: | |
raise ValueError('INVALID INPUT: {} is not a valid currency pair'.format(str(pair))) | |
return currency_pair_list | |
def grab_base_currency(): | |
base_currency_preference = input('Which base currency would you like to grab data for?( BTC, ETH, BNB, USDT, PAX, TUSD, XRP, or USDC) ') | |
return base_currency_preference.upper() | |
def grab_quote_currency(base_currency): | |
pair_list = list() | |
quote_currency_preference = input('Which quote currency would you like to grab data for? ') | |
pair_list.append('{}{}'.format(quote_currency_preference.upper(),base_currency)) | |
return pair_list | |
def grab_currencies_list(): | |
currencies_list = list() | |
info = Client(None,None) | |
pair_query = info.get_all_tickers() | |
list_preference = input('Which base currencies would you like to grab data for?(eg. BTC or ETH,BNB or BTC,ETH,BNB,USDT) ') | |
for base_currency in list_preference.split(','): | |
for z,currency_pair in enumerate(pair_query): | |
if currency_pair['symbol'][-len(base_currency):] == base_currency.strip().upper(): | |
currencies_list.append(currency_pair['symbol']) | |
return currencies_list | |
def currency_data_user_input(): | |
pull_quantity = one_or_many() | |
if pull_quantity == 'one': | |
base_currency = grab_base_currency() | |
pair_list = grab_quote_currency(base_currency) | |
else: | |
if list_currency_option(): | |
try: | |
pair_list = provide_pair_list() | |
else: | |
pair_list = grab_currencies_list() | |
return pair_list | |
def grab_date_interval(): | |
start_date_input = input('What date range would you like to pull data from?\nIn MM/DD/YYYY format,except you can enter now for end date to get most recent.\nStart date: ') | |
start_date_month,start_date_day,start_date_year= start_date_input.replace('.','/').split('/') | |
start_date = datetime.datetime(int(start_date_year),int(start_date_month),int(start_date_day)) | |
binance_start_date = datetime.datetime(2017,7,1) | |
if start_date < binance_start_date: | |
raise ValueError('INVALID START DATE: Binance opened in July of 2017, please try a date later than 07/01/2017') | |
end_date_input = input('End date: ') | |
if end_date_input.lower() in ['now']: | |
end_date = datetime.datetime.now() | |
else: | |
end_date_month,end_date_day,end_date_year = end_date_input.replace('.','/').split('/') | |
end_date = datetime.datetime(int(end_date_year),int(end_date_month),int(end_date_day)) | |
return start_date,end_date | |
def grab_kline_interval(): | |
kline_interval = input('What Kline Interal would you prefer? Options: 1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h ') | |
valid_kline_intervals =['1m','3m','5m','15m','30m','1h','2h','4h','6h','8h','12h'] | |
if kline_interval in valid_kline_intervals: | |
return kline_interval | |
else: | |
raise ValueError('INVALID KLINE INTERVAL: {} is an invalid option, please try again'.format(str(kline_interval))) | |
def create_directories(pair_list,kline_interval,start_date,end_date): | |
start_date = start_date.strftime('%Y-%m-%d') | |
end_date = end_date.strftime('%Y-%m-%d') | |
historical_price_data_directory = '{}historical_price_data'.format(str(full_path)) | |
try: | |
os.makedirs(historical_price_data_directory) | |
except OSError: | |
pass | |
kline_interval_directory = ''.join([os_file_prefix(platform,historical_price_data_directory),'{}_{}_{}'.format(str(start_date),str(end_date),str(kline_interval))]) | |
try: | |
os.makedirs(kline_interval_directory) | |
except OSError: | |
pass | |
for x,p in enumerate(pair_list): | |
pair_directory = ''.join([os_file_prefix(platform,kline_interval_directory),'{}'.format(str(p))]) | |
try: | |
os.makedirs(pair_directory) | |
except OSError: | |
pass | |
return kline_interval_directory | |
def date_to_milliseconds(date_str): | |
epoch = datetime.datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc) | |
d = dateparser.parse(date_str) | |
if d.tzinfo is None or d.tzinfo.utcoffset(d) is None: | |
d = d.replace(tzinfo=pytz.utc) | |
return int((d - epoch).total_seconds() * 1000.0) | |
def interval_to_milliseconds(interval): | |
ms = None | |
seconds_per_unit = {'m': 60,'h': 60 * 60,'d': 24 * 60 * 60,'w': 7 * 24 * 60 * 60} | |
unit = interval[-1] | |
if unit in seconds_per_unit: | |
try: | |
ms = int(interval[:-1]) * seconds_per_unit[unit] * 1000 | |
except ValueError: | |
pass | |
return ms | |
def get_historical_klines(symbol, interval, start_str, end_str=None): | |
client = Client(None, None) | |
output_data = [] | |
limit = 1000 | |
timeframe = interval_to_milliseconds(interval) | |
start_ts = date_to_milliseconds(start_str) | |
end_ts = None | |
if end_str: | |
end_ts = date_to_milliseconds(end_str) | |
idx = 0 | |
symbol_existed = False | |
while True: | |
try: | |
temp_data = client.get_klines(symbol=symbol,interval=interval,limit=limit,startTime=start_ts,endTime=end_ts) | |
if not symbol_existed and len(temp_data): | |
symbol_existed = True | |
if symbol_existed: | |
output_data += temp_data | |
start_ts = temp_data[len(temp_data) - 1][0] + timeframe | |
else: | |
start_ts += timeframe | |
idx += 1 | |
except Exception as e: | |
print (str(e)) | |
idx+=1 | |
if len(temp_data) < limit: | |
break | |
return output_data | |
def process_dates(start_date,end_date): | |
end_date = end_date+datetime.timedelta(days=1) | |
dates =[date for date in rrule(DAILY,dtstart=start_date, until=end_date)] | |
return dates | |
def grab_data(pair,start_date,end_date,dates,kline_interval_directory,interval,csv_file_info): | |
titles = ('Date','Open','High','Low','Close','Volume') | |
partial_path = ''.join([os_file_prefix(platform,kline_interval_directory),os_file_prefix(platform,pair)]) | |
for x,date in enumerate(dates): | |
if date != dates[-1]: | |
year = str(date.year) | |
numerical_month = str(date.month) | |
month_abbreviation_dict = {'1':'Jan','2':'Feb','3':'Mar','4':'Apr','5':'May','6':'Jun','7':'Jul','8':'Aug','9':'Sept','10':'Oct','11':'Nov','12':'Dec'} | |
calendar_month = month_abbreviation_dict.get(numerical_month,"") | |
klines_date = '{}, {}'.format(calendar_month,year) | |
start = '{} {}'.format(date.strftime('%d'),klines_date) | |
end = '{} {}'.format(dates[x+1].strftime('%d'),klines_date) | |
print ('currency pair: {} start: {} end: {}'.format(pair,start,end)) | |
klines = get_historical_klines(pair, interval, start, end) | |
if klines: | |
if int(date.day) in range(1,10): | |
csv_day = '0{}'.format(str(date.day)) | |
else: | |
csv_day = str(date.day) | |
if int(date.month) in range(1,9): | |
csv_month ='{}-0{}-'.format(year,numerical_month) | |
else: | |
csv_month = '{}-{}-'.format(year,numerical_month) | |
results_csv = '{}{}{}_{}.csv'.format(str(partial_path),str(csv_month),str(csv_day),str(interval)) | |
with open(results_csv, 'a') as f: | |
writer = csv.writer(f) | |
writer.writerow(titles) | |
for x,k in enumerate(klines): | |
if k !=klines[-1]: | |
open_timestamp,open_,high,low,close_,volume,close_timestamp,quote_asset_volume,num_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore = k | |
open_time = datetime.datetime.utcfromtimestamp(float(open_timestamp)/1000).strftime('%Y-%m-%d %H:%M:%S.%f') | |
fields = (open_time,open_,high,low,close_,volume) | |
with open(results_csv, 'a') as f: | |
writer = csv.writer(f) | |
writer.writerow(fields) | |
file_retrevial_info = pair,partial_path,start_date.strftime('%Y-%m-%d'),end_date.strftime('%Y-%m-%d'),interval | |
csv_file_info.append(file_retrevial_info) | |
def concatenate_csvs(csv_file_info): | |
for x,file_info in enumerate(csv_file_info): | |
pair,partial_path,start_date,end_date,interval = file_info | |
individual_csvs_directory = '{}individual_csvs'.format(str(partial_path)) | |
try: | |
os.makedirs(individual_csvs_directory) | |
except OSError: | |
pass | |
csv_files = [f for f in os.listdir('{}'.format(partial_path)) if os.path.isfile(os.path.join('{}'.format(partial_path), f))] | |
csv_files = sorted(csv_files) | |
concat_csv = '{}_{}_{}_{}.csv'.format(str(pair),str(start_date),str(end_date),str(interval)) | |
if concat_csv in csv_files: | |
old_concat_csvs_path = '{}old_concatenated_csvs'.format(str(partial_path)) | |
try: | |
os.makedirs(old_concat_csvs_path) | |
except OSError: | |
pass | |
shutil.move('{}{}'.format(str(partial_path),str(concat_csv)),'{}/{}'.format(str(old_concat_csvs_path),str(concat_csv))) | |
csv_files = [f for f in os.listdir('{}'.format(str(partial_path))) if os.path.isfile(os.path.join('{}'.format(str(partial_path)), f))] | |
csv_files = sorted(csv_files) | |
if csv_files: | |
for x,csv_file in enumerate(csv_files): | |
outpath = '{}{}'.format(str(partial_path),str(concat_csv)) | |
fout=open(outpath,'a') | |
full_file_path = '{}{}'.format(str(partial_path),str(csv_file)) | |
writer = csv.writer(fout,lineterminator='\n') | |
with open(full_file_path) as f: | |
if x != 0: | |
f.__next__() | |
for line in f: | |
if len(line)>1: | |
timestamp,open_,high,low,close_,volume = line.split(',') | |
writer.writerow([timestamp,open_,high,low,close_,volume.strip()]) | |
f.close() | |
fout.close() | |
shutil.move(full_file_path,''.join([os_file_prefix(platform,individual_csvs_directory) ,'{}'.format(csv_file)])) | |
def main(): | |
csv_file_info = mp.Manager().list() | |
pair_list = currency_data_user_input() | |
interval = grab_kline_interval() | |
start_date,end_date = grab_date_interval() | |
kline_interval_directory = create_directories(pair_list,interval,start_date,end_date) | |
dates = process_dates(start_date,end_date) | |
pair = [currency_pair for i,currency_pair in enumerate(pair_list)] | |
lock = mp.Lock() | |
pool = mp.Pool(processes=mp.cpu_count(),initargs=(lock,)) | |
data = pool.starmap(grab_data,zip(pair,re(start_date),re(end_date),re(dates),re(kline_interval_directory),re(interval),re(csv_file_info))) | |
pool.close() | |
pool.join() | |
concatenate_csvs(list(set(csv_file_info))) | |
if __name__ == '__main__': | |
main() |
Thanks a lot!
Appreciate the kind words, really hacked it together at first. Spent a little time tonight cleaning it up and should be considerably cleaner on the date parsing parts now. Haven't done a ton of testing but it's handled everything so far. No more iterating over future dates with no data to be collected and the like.
Great work, thanks
Line 82
IndentationError: unexpected unindent
Line 82
IndentationError: unexpected unindent
Hi Ashger, I don't really maintain this one anymore as I released a more fully functional python package you can access at the link below:
https://github.com/uneasyguy/binance_data
Line 82
IndentationError: unexpected unindentHi Ashger, I don't really maintain this one anymore as I released a more fully functional python package you can access at the link below:
https://github.com/uneasyguy/binance_data
Thank you. Is there a way to donate?
Line 82
IndentationError: unexpected unindentHi Ashger, I don't really maintain this one anymore as I released a more fully functional python package you can access at the link below:
https://github.com/uneasyguy/binance_data
The package is not working properly can you please help?
Thanks a lot!