Skip to content

Instantly share code, notes, and snippets.

@buswedg
Created April 8, 2020 03:51
Show Gist options
  • Select an option

  • Save buswedg/e870dd5909d18947c3f1b87fc3ee3020 to your computer and use it in GitHub Desktop.

Select an option

Save buswedg/e870dd5909d18947c3f1b87fc3ee3020 to your computer and use it in GitHub Desktop.
pulling_free_us_stock_market_data\tracker_generator
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"\n",
"import logging\n",
"\n",
"import datetime as dt\n",
"from datetime import date\n",
"\n",
"import string\n",
"import re\n",
"\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def check_or_create_dir(directory):\n",
"\n",
" if not check_exists(directory):\n",
" os.makedirs(directory)\n",
" \n",
" \n",
"def check_exists(path):\n",
"\n",
" return os.path.exists(path)\n",
"\n",
"\n",
"def get_latest_file(dir):\n",
" ls_files = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]\n",
" ls_files.sort(key=lambda x: dt.datetime.strptime(x.split('_')[0], '%Y%m%d'))\n",
"\n",
" latest_file = ls_files[0] if len(ls_files) > 0 else None\n",
"\n",
" return latest_file"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def aggregate_lists(ls_cols_list, list_dirs, dic_dir):\n",
" df = pd.DataFrame([], columns=ls_cols_list)\n",
"\n",
" for list_dir in list_dirs:\n",
" list_dir = os.path.join(dic_dir['base'], list_dir)\n",
"\n",
" df_list_file = get_latest_file(list_dir)\n",
" df_list_path = os.path.join(list_dir, df_list_file)\n",
"\n",
" df_list = pd.read_csv(df_list_path, header=0, sep=';', quotechar='\"')\n",
"\n",
" df = df.append(df_list[df.columns.intersection(df_list.columns)], sort=False)\n",
"\n",
" return df\n",
"\n",
"\n",
"def to_csv(df, dic_dir, dic_args):\n",
" df['last_ticker_pull'] = dt.datetime(1990, 1, 1)\n",
" df['last_indicator_calc'] = dt.datetime(1990, 1, 1)\n",
"\n",
" dic_dir['tracker_dir'] = os.path.join(dic_dir['base'], dic_args['tracker_dir'])\n",
"\n",
" for param in re.findall(r'\\{(.*?)\\}', dic_dir['tracker_dir']):\n",
" dic_dir['tracker_dir'] = dic_dir['tracker_dir'].replace('{'+param+'}', dic_args[param])\n",
"\n",
" creation_date = date.today().strftime('%Y%m%d')\n",
" output_file = creation_date + \"_tracker\" + \".csv\"\n",
"\n",
" tracker_path = os.path.join(dic_dir['tracker_dir'], output_file)\n",
"\n",
" check_or_create_dir(dic_dir['tracker_dir'])\n",
"\n",
" df.to_csv(tracker_path, sep=';', quotechar='\"', date_format='%Y-%m-%d %H:%M:%S.%f', index=False)\n",
"\n",
"\n",
"def create_list(dic_dir, dic_args):\n",
" df_exchange_components_list = aggregate_lists(ls_cols_list, dic_args['exchange_components_list_dirs'], dic_dir)\n",
" df_index_components_list = aggregate_lists(ls_cols_list, dic_args['index_components_list_dirs'], dic_dir)\n",
"\n",
" for index in df_index_components_list['composite_symbol'].unique():\n",
" index_entities = df_index_components_list.loc[df_index_components_list['composite_symbol'] == index, 'entity_symbol'].values\n",
"\n",
" df_exchange_components_list.loc[:, index] = \"N\"\n",
" df_exchange_components_list.loc[df_exchange_components_list['entity_symbol'].isin(index_entities), index] = \"Y\"\n",
"\n",
" df = df_exchange_components_list.sort_values(by=['entity_symbol'])\n",
"\n",
" to_csv(df, dic_dir, dic_args)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if sys.platform == \"win32\":\n",
" base_dir = r\"\\\\dir\" # dir for win machines\n",
"else:\n",
" base_dir = \"/dir\" # dir for non-win machines\n",
"\n",
" \n",
"ls_cols_list = [\n",
" 'pr_key',\n",
" 'entity_symbol',\n",
" 'company_name',\n",
" 'region',\n",
" 'composite_symbol',\n",
" 'sector',\n",
" 'sub_sector',\n",
" 'market_cap',\n",
" 'market_weight'\n",
"]\n",
"\n",
"\n",
"dic_args_set = {\n",
"\n",
" 'args1': {'func': 'US_tracker_gen',\n",
" 'region': 'US',\n",
" 'data_cat': 'tracker',\n",
" 'data_subcat': 'entity_tracker',\n",
" 'data_type': 'price_data',\n",
" 'data_source': 'yfinance',\n",
" 'tracker_dir': os.path.join('{region}', '{data_cat}', '{data_subcat}', '{data_type}', '{data_source}'),\n",
" 'exchange_components_list_dirs': [\n",
" os.path.join('US', 'list', 'exchange_components_list', 'NYSE', 'eod_http'),\n",
" os.path.join('US', 'list', 'exchange_components_list', 'NASDAQ', 'eod_http')\n",
" ],\n",
" 'index_components_list_dirs': [\n",
" os.path.join('US', 'list', 'index_components_list', 'SP500', 'wiki_http')\n",
" ]},\n",
"\n",
" 'args2': {'func': 'US_tracker_gen',\n",
" 'region': 'US',\n",
" 'data_cat': 'tracker',\n",
" 'data_subcat': 'entity_tracker',\n",
" 'data_type': 'fund_data',\n",
" 'data_source': 'yfinance',\n",
" 'tracker_dir': os.path.join('{region}', '{data_cat}', '{data_subcat}', '{data_type}', '{data_source}'),\n",
" 'exchange_components_list_dirs': [\n",
" os.path.join('US', 'list', 'exchange_components_list', 'NYSE', 'eod_http'),\n",
" os.path.join('US', 'list', 'exchange_components_list', 'NASDAQ', 'eod_http')\n",
" ],\n",
" 'index_components_list_dirs': [\n",
" os.path.join('US', 'list', 'index_components_list', 'SP500', 'wiki_http')\n",
" ]},\n",
"\n",
" 'args3': {'func': 'US_entity_gen',\n",
" 'region': 'US',\n",
" 'data_cat': 'tracker',\n",
" 'data_subcat': 'entity_tracker',\n",
" 'data_type': 'fund_data',\n",
" 'data_source': 'yahoofinance',\n",
" 'tracker_dir': os.path.join('{region}', '{data_cat}', '{data_subcat}', '{data_type}', '{data_source}'),\n",
" 'exchange_components_list_dirs': [\n",
" os.path.join('US', 'list', 'exchange_components_list', 'NYSE', 'eod_http'),\n",
" os.path.join('US', 'list', 'exchange_components_list', 'NASDAQ', 'eod_http')\n",
" ],\n",
" 'index_components_list_dirs': [\n",
" os.path.join('US', 'list', 'index_components_list', 'SP500', 'wiki_http')\n",
" ]},\n",
"\n",
" }\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for dic_args in dic_args_set.values():\n",
" creation_date = date.today().strftime('%Y%m%d')\n",
" log_file = creation_date + \"_\" + dic_args['func'] + \".log\"\n",
"\n",
" logs_dir = os.path.join(base_dir, '_logs')\n",
"\n",
" dic_dir = {'base': base_dir,\n",
" 'logs': logs_dir}\n",
"\n",
" check_or_create_dir(dic_dir['logs'])\n",
"\n",
" log_path = os.path.join(dic_dir['logs'], log_file)\n",
"\n",
" logging.basicConfig(filename=log_path, level=logging.INFO)\n",
"\n",
" log_str = \"{}({}, {}, {}, {})\".format(dic_args['func'], dic_args['region'], dic_args['data_cat'],\n",
" dic_args['data_subcat'], dic_args['data_source'])\n",
" \n",
" try:\n",
" print(\"\\n\" + \"STARTING: \" + log_str)\n",
" logging.info(\"\\n\" + \"STARTING: \" + log_str)\n",
"\n",
" create_list(dic_dir, dic_args) \n",
"\n",
" print(\"COMPLETED: \" + log_str)\n",
" logging.info(\"COMPLETED: \" + log_str)\n",
"\n",
" except Exception as e:\n",
" print(str(e))\n",
" logging.error(str(e))\n",
"\n",
" print(\"FAILED: \" + log_str)\n",
" logging.error(\"FAILED: \" + log_str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment