Skip to content

Instantly share code, notes, and snippets.

@jvkersch
Created June 11, 2015 10:29
Show Gist options
  • Save jvkersch/3546d3f56a5ba95dbdbe to your computer and use it in GitHub Desktop.
Save jvkersch/3546d3f56a5ba95dbdbe to your computer and use it in GitHub Desktop.
""" Obtain a quote of the Dow Jones Industrial Average from Bloomberg.
"""
import json
from bs4 import BeautifulSoup
import requests
url = 'http://www.bloomberg.com/quote/INDU:IND'
# Get the entire Bloomberg web page from the web.
resp = requests.get(url)
data = resp.text
# Save a copy of the HTML (possibly to cache subsequent requests).
with open('INDU_IND.html', 'a') as f:
f.write(data.encode('utf-8'))
soup = BeautifulSoup(data)
# Find quote information by CSS class attribute.
quotes = soup.find_all('div', class_='schema-org-financial-quote')
quote = quotes[0]
# Extract the meta tags from inside the 'schema-org-financial-quote' tag. These
# tags hold the actual financial info; each meta tag has attributes
# 'itemprop' and 'content', describing the category (e.g. 'tickerSymbol') and
# the corresponding value (e.g. 'INDU').
quote_info = {
meta['itemprop']: meta['content'] for meta in quote.find_all('meta')
}
# Print out a report of the data obtained so far.
print 'Quote Summary'
print '============='
for attribute, value in quote_info.items():
print '{:<20}: {}'.format(attribute, value)
# Encode the quote info to JSON and save for further processing.
with open('INDU_IND.json', 'a') as f:
json.dump(quote_info, f)
""" Process historical stock records in CSV.
Source: https://uk.finance.yahoo.com/q/hp?s=AAPL
"""
# This script assumes that a set of stock records has been downloaded to the
# current folder as table.csv .
import csv
with open('table.csv', 'rb') as csvfile:
csvreader = csv.reader(csvfile)
header = csvreader.next()
rows = list(csvreader)
with open('table.csv', 'rb') as csvfile:
csvreader = csv.DictReader(csvfile)
stockdata = list(csvreader)
# Loop over the data, and add a data member indicating how much the stock
# fluctuated over one day.
processed_data = []
for quote in stockdata:
quote_hi = float(quote['High'])
quote_lo = float(quote['Low'])
quote['Rel Difference'] = (quote_hi - quote_lo) / quote_lo
# Write processed data back to file.
fieldnames = header + ['Rel Difference']
with open('table-processed.csv', 'wb') as csvfile:
csvwriter = csv.DictWriter(csvfile, fieldnames)
csvwriter.writeheader()
csvwriter.writerows(stockdata)
import tables
class StockRecord(tables.IsDescription):
date = tables.StringCol(16)
high = tables.Float64Col()
low = tables.Float64Col()
h5file = tables.open_file("stocks.h5", mode="w", title="Stock data")
group = h5file.create_group("/", 'stocks', 'Stock info')
for name in ['AAPL', 'GOOG', 'FB']:
table = h5file.create_table(group, name, StockRecord, "Simple example")
stockrecord = table.row
for i in range(1, 31):
stockrecord['date'] = '2015-1-{}'.format(i)
stockrecord['high'] = 100.0 + i
stockrecord['low'] = 99.0 + i
stockrecord.append()
table.flush()
h5file.close()
import xml.etree.cElementTree as etree # Just ElementTree in Python 3
# curl -O http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml
tree = etree.parse('hamlet.xml')
root = tree.getroot()
print root
# Print all characters in the play.
for persona_tag in root.iter('PERSONA'):
print persona_tag.text
for persona_tag in root.iter('PERSONA'):
if persona_tag.text.startswith('CLAUDIUS'):
persona_tag.text = 'CLAUDIUS (he did it)'
persona_tag.set('bad', 'yes')
tree.write('hamlet_modified.xml')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment