Created
February 12, 2024 14:26
-
-
Save sz55net/c88efd4c367579bb5f1fabd46322ebe5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Configs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"offset = 0\n", | |
"limit = 3000\n", | |
"period = 'max' # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Download all NASDAQ traded symbols" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"total number of symbols traded = 8867\n" | |
] | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"data = pd.read_csv(\"http://www.nasdaqtrader.com/dynamic/SymDir/nasdaqtraded.txt\", sep='|')\n", | |
"data_clean = data[data['Test Issue'] == 'N']\n", | |
"symbols = data_clean['NASDAQ Symbol'].tolist()\n", | |
"print('total number of symbols traded = {}'.format(len(symbols)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Download Historic data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"! pip install yfinance > /dev/null 2>&1\n", | |
"! mkdir hist" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import yfinance as yf\n", | |
"import os, contextlib" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total number of valid symbols downloaded = 2733\n", | |
"CPU times: user 9min 34s, sys: 10.8 s, total: 9min 44s\n", | |
"Wall time: 20min 3s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"\n", | |
"limit = limit if limit else len(symbols)\n", | |
"end = min(offset + limit, len(symbols))\n", | |
"is_valid = [False] * len(symbols)\n", | |
"# force silencing of verbose API\n", | |
"with open(os.devnull, 'w') as devnull:\n", | |
" with contextlib.redirect_stdout(devnull):\n", | |
" for i in range(offset, end):\n", | |
" s = symbols[i]\n", | |
" data = yf.download(s, period=period)\n", | |
" if len(data.index) == 0:\n", | |
" continue\n", | |
" \n", | |
" is_valid[i] = True\n", | |
" data.to_csv('hist/{}.csv'.format(s))\n", | |
"\n", | |
"print('Total number of valid symbols downloaded = {}'.format(sum(is_valid)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"valid_data = data_clean[is_valid]\n", | |
"valid_data.to_csv('symbols_valid_meta.csv', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Separating ETFs and Stocks" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!mkdir stocks\n", | |
"!mkdir etfs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"etfs = valid_data[valid_data['ETF'] == 'Y']['NASDAQ Symbol'].tolist()\n", | |
"stocks = valid_data[valid_data['ETF'] == 'N']['NASDAQ Symbol'].tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import shutil\n", | |
"from os.path import isfile, join\n", | |
"\n", | |
"def move_symbols(symbols, dest):\n", | |
" for s in symbols:\n", | |
" filename = '{}.csv'.format(s)\n", | |
" shutil.move(join('hist', filename), join(dest, filename))\n", | |
" \n", | |
"move_symbols(etfs, \"etfs\")\n", | |
"move_symbols(stocks, \"stocks\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"! rmdir hist" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment