-
-
Save scrapehero-code/6d87e1e1369ee701dcea8880b4b620e9 to your computer and use it in GitHub Desktop.
from lxml import html | |
import requests | |
import json | |
import argparse | |
from collections import OrderedDict | |
def get_headers(): | |
return {"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", | |
"accept-encoding": "gzip, deflate, br", | |
"accept-language": "en-GB,en;q=0.9,en-US;q=0.8,ml;q=0.7", | |
"cache-control": "max-age=0", | |
"dnt": "1", | |
"sec-fetch-dest": "document", | |
"sec-fetch-mode": "navigate", | |
"sec-fetch-site": "none", | |
"sec-fetch-user": "?1", | |
"upgrade-insecure-requests": "1", | |
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"} | |
def parse(ticker): | |
url = "http://finance.yahoo.com/quote/%s?p=%s" % (ticker, ticker) | |
response = requests.get( | |
url, verify=False, headers=get_headers(), timeout=30) | |
print("Parsing %s" % (url)) | |
parser = html.fromstring(response.text) | |
summary_table = parser.xpath( | |
'//div[contains(@data-test,"summary-table")]//tr') | |
summary_data = OrderedDict() | |
other_details_json_link = "https://query2.finance.yahoo.com/v10/finance/quoteSummary/{0}?formatted=true&lang=en-US®ion=US&modules=summaryProfile%2CfinancialData%2CrecommendationTrend%2CupgradeDowngradeHistory%2Cearnings%2CdefaultKeyStatistics%2CcalendarEvents&corsDomain=finance.yahoo.com".format( | |
ticker) | |
summary_json_response = requests.get(other_details_json_link) | |
try: | |
json_loaded_summary = json.loads(summary_json_response.text) | |
summary = json_loaded_summary["quoteSummary"]["result"][0] | |
y_Target_Est = summary["financialData"]["targetMeanPrice"]['raw'] | |
earnings_list = summary["calendarEvents"]['earnings'] | |
eps = summary["defaultKeyStatistics"]["trailingEps"]['raw'] | |
datelist = [] | |
for i in earnings_list['earningsDate']: | |
datelist.append(i['fmt']) | |
earnings_date = ' to '.join(datelist) | |
for table_data in summary_table: | |
raw_table_key = table_data.xpath( | |
'.//td[1]//text()') | |
raw_table_value = table_data.xpath( | |
'.//td[2]//text()') | |
table_key = ''.join(raw_table_key).strip() | |
table_value = ''.join(raw_table_value).strip() | |
summary_data.update({table_key: table_value}) | |
summary_data.update({'1y Target Est': y_Target_Est, 'EPS (TTM)': eps, | |
'Earnings Date': earnings_date, 'ticker': ticker, | |
'url': url}) | |
return summary_data | |
except ValueError: | |
print("Failed to parse json response") | |
return {"error": "Failed to parse json response"} | |
except: | |
return {"error": "Unhandled Error"} | |
if __name__ == "__main__": | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('ticker', help='') | |
args = argparser.parse_args() | |
ticker = args.ticker | |
print("Fetching data for %s" % (ticker)) | |
scraped_data = parse(ticker) | |
print("Writing data to output file") | |
with open('%s-summary.json' % (ticker), 'w') as fp: | |
json.dump(scraped_data, fp, indent=4) |
This is very helpful but I have noticed that the json file has error and it is empty if target price is not available.
Hi,
I noticed the first request to url = "http://finance.yahoo.com/quote/%s?p=%s" % (ticker, ticker)
results in the data protection privacy consent page.
This results in summary_table
being empty.
So the results .json of AAPL is not the complete list as stated above..
{ "1y Target Est": 425.17, "EPS (TTM)": 13.185, "Earnings Date": "2020-10-28 to 2020-11-02", "ticker": "AAPL", "url": "https://finance.yahoo.com/quote/AAPL?p=AAPL" }
Hi,
I noticed the first request tourl = "http://finance.yahoo.com/quote/%s?p=%s" % (ticker, ticker)
results in the data protection privacy consent page.
This results insummary_table
being empty.
So the results .json of AAPL is not the complete list as stated above..
{ "1y Target Est": 425.17, "EPS (TTM)": 13.185, "Earnings Date": "2020-10-28 to 2020-11-02", "ticker": "AAPL", "url": "https://finance.yahoo.com/quote/AAPL?p=AAPL" }
Same issue here!
I had the same problem and found an alternative solution that grabs the same data in one line of code:
from yahoo_fin import stock_info as si
si.get_quote_table("aapl")
Very helpful and very easy to use! Thanks for sharing
line 51: table_key = ''.join(raw_table_key).strip() is giving me an invalid syntax error. Anyone know why?
How to Run Scraper
Example
will create a file called
AAPL-summary.json
Full Tutorial at How to scrape Yahoo Finance