Created
July 12, 2018 14:14
-
-
Save danielecook/f62cf2a55be16af389fa2fdd0c3887c1 to your computer and use it in GitHub Desktop.
Fetch stock company summary from YAHOO Finance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import html | |
import requests | |
from bs4 import BeautifulSoup | |
def boolify(s): | |
if s == 'True': | |
return True | |
if s == 'False': | |
return False | |
raise ValueError("huh?") | |
def autoconvert(s): | |
for fn in (boolify, int, float): | |
try: | |
return fn(s) | |
except ValueError: | |
pass | |
return s | |
def extract_key(key, json_text): | |
m = re.search(r"\"%s\":[\"]?([^\"\{\}]+)[\"]?" % (key,), json_text) | |
if m: | |
# Ugly, yes. | |
return autoconvert(html.unescape(m.group(1)).replace("\\u002F", "/").strip(",")) | |
else: | |
return None | |
def company_info(symbol): | |
""" | |
Fetch info from yahoo finance for a company. | |
""" | |
page = requests.get(f"https://finance.yahoo.com/quote/{symbol}/profile?p={symbol}") | |
soup = BeautifulSoup(page.content, 'html.parser') | |
script = soup.find("script", text=re.compile("address1")) | |
json_text = script.text | |
key_set = ['website', | |
'address1', | |
'address2', | |
'city' | |
'state', | |
'country', | |
'zip', | |
'phone', | |
'longBusinessSummary', | |
'sector', | |
'industry', | |
'fullTimeEmployees', | |
'longBusinessSummary'] | |
return {k: extract_key(k, json_text) for k in key_set} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment