Created
February 3, 2021 12:37
-
-
Save maurobaraldi/ff85febbf360c7f1aaff1932592b8104 to your computer and use it in GitHub Desktop.
Yahoo Finance Stock Price Scraper - Mosaic of 40 Brazilian stocks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from mpl_finance import candlestick_ohlc | |
import numpy as np | |
from matplotlib import pyplot | |
import pandas as pd | |
from bs4 import BeautifulSoup | |
import urllib.request | |
import re | |
pyplot.style.use("ggplot") | |
#pyplot.figure(figsize=(25.60,14.40)) | |
pyplot.figure(figsize=(12.80,7.20)) | |
def getstk(code) : | |
url = "https://finance.yahoo.com/quote/"+code+"/history" | |
webpage = urllib.request.urlopen(url) | |
soup = BeautifulSoup(webpage,"lxml") | |
d = list(soup.findAll("td")) | |
tf = [] | |
def isfloat(v) : | |
try : | |
float(v) | |
return True | |
except: | |
return False | |
def isvol(v) : | |
try : | |
int("".join(v.split(","))) == int | |
return True | |
except: | |
return False | |
for n in range(0, len(d)-1) : # | |
x = re.split("[>+<]",str(d[n])) | |
x1 = re.split("[>+<]",str(d[n+1])) | |
if len(x) == 5 : # Length of 0 day volume | |
if x[4] == "" : # Days of zero volume | |
tf.append(0) | |
if len(x) == 9 : # normal day length | |
if isfloat(x[4]) == True : # Prices | |
tf.append(float(x[4])) | |
if isvol(x[4]) == True : # Volume | |
tf.append(int("".join(x[4].split(",")))) | |
if isvol(x[4]) == False and isfloat(x[4]) == False: # Dates | |
if len(x1) < 10 : # Dividend has len of 17 | |
tf.append(x[4]) | |
stockFile = pd.DataFrame() | |
def MakeDf(name,v) : | |
l = [] | |
for n in range(v,len(tf),7) : | |
l.append(tf[n]) | |
stockFile[name] = l[::-1] | |
return | |
MakeDf("Date",0) | |
MakeDf("Open",1) | |
MakeDf("High",2) | |
MakeDf("Low",3) | |
MakeDf("Close",4) | |
MakeDf("Adj.Close",5) | |
MakeDf("Volume",6) | |
# Adjusting the prices for dividends | |
stockFile["Dif"] = stockFile["Adj.Close"] - stockFile["Close"] | |
stockFile["Open.Adj"] = stockFile["Open"] + stockFile["Dif"] | |
stockFile["High.Adj"] = stockFile["High"] + stockFile["Dif"] | |
stockFile["Low.Adj"] = stockFile["Low"] + stockFile["Dif"] | |
stockFile = stockFile.query('Volume !=0') # excluding days without moviment (volume = 0) | |
stockFile = stockFile.dropna() | |
stockFile.index = range(len(stockFile)) # Adjusting the index if a drop is done | |
return stockFile | |
def grafico(stk,eixo) : | |
stk1 = stk + '.sa' # .sa = local code (Bovespa) | |
stock = getstk(stk1) | |
dia = stock["Date"][len(stock)-1] | |
print () | |
print ("Stock : ", stk) | |
print ("Last date : ", dia) | |
print ("Last price : ", stock["Close"][len(stock)-1]) | |
#Moving average calculation | |
MM1 = list(stock["Close"].rolling(window = 20).mean()) # 20-day period | |
OHLC = [] | |
prd = 15 # days window for each stock | |
eixoX = np.linspace(1, prd+1, num = prd+1) | |
eixoY = [MM1[-1]] * (prd+1) | |
print ("Moving average : ", round(MM1[-1],2)) | |
l = len(stock) | |
dt = 1 | |
for n in range (l-prd-1,l,) : # Making a candlestick list | |
xm = dt,stock['Open.Adj'].iloc[n],stock['High.Adj'].iloc[n],stock['Low.Adj'].iloc[n],stock['Adj.Close'].iloc[n] | |
OHLC.append(xm) | |
dt +=1 | |
bboxP = dict(boxstyle = "round,pad=0.1",fc="white",ec = "black",lw = 0.5, alpha = 0.35) | |
candlestick_ohlc(eixo, OHLC, width=0.7, colorup = 'green', colordown = 'red') | |
pyplot.plot(eixoX, eixoY, color = "darkorange", alpha = 0.5) # last day moving average level | |
pyplot.text(prd+1,OHLC[-1][4],str(round(OHLC[-1][4],2)), ha = "center", va = "center", size = 6.0, bbox = bboxP) # last price label | |
g = pyplot.gca() | |
pyplot.title(stk,fontsize = 7.6) | |
g.set_xticklabels([]) | |
g.set_yticklabels([]) | |
return (dia) | |
# Stock list (40) | |
lista = ['abev3','bbas3','bbdc4','bbse3','brap4','brfs3','brkm5','btow3','ccro3','ciel3', | |
'cmig4','cple6','csna3','dtex3','elet6','embr3','eztc3','ggbr4','grnd3','itub4', | |
'irbr3','klbn4','lame4','mdia3','mglu3','natu3','pcar4','petr3','pssa3','sbsp3', | |
'suzb3','timp3','trpl4','radl3','rapt4','tots3','usim5','vale3','vivt4','wege3'] | |
ct = 0 | |
data = [] | |
grid = (4,10) # 4 lines 10 columns | |
for l in range(0,grid[0]) : # 4 lines | |
for c in range(0,grid[1]) : # 10 columns | |
eixo = pyplot.subplot2grid(grid,(l,c), colspan=1, rowspan=1) | |
dia = grafico(lista[ct],eixo) | |
data.append(dia) | |
pyplot.draw() | |
ct +=1 | |
pyplot.suptitle(dia,fontsize = 8) | |
pyplot.subplots_adjust(left = 0.03, right = 0.97, top = 0.93, bottom = 0.03, hspace = 0.20, wspace = 0.14) # minor chart windows adjustments | |
pyplot.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment