jvkersch · June 11, 2015 10:29
diff --git a/bs4_demo.py b/bs4_demo.py
 """ Obtain a quote of the Dow Jones Industrial Average from Bloomberg.

 """

 import json

 from bs4 import BeautifulSoup
 import requests

 url = 'http://www.bloomberg.com/quote/INDU:IND'

 # Get the entire Bloomberg web page from the web.
 resp = requests.get(url)
 data = resp.text

 # Save a copy of the HTML (possibly to cache subsequent requests).
 with open('INDU_IND.html', 'a') as f:
    f.write(data.encode('utf-8'))

 soup = BeautifulSoup(data)

 # Find quote information by CSS class attribute.
 quotes = soup.find_all('div', class_='schema-org-financial-quote')
 quote = quotes[0]

 # Extract the meta tags from inside the 'schema-org-financial-quote' tag. These
 # tags hold the actual financial info; each meta tag has attributes
 # 'itemprop' and 'content', describing the category (e.g. 'tickerSymbol') and
 # the corresponding value (e.g. 'INDU').
 quote_info = {
    meta['itemprop']: meta['content'] for meta in quote.find_all('meta')
 }

 # Print out a report of the data obtained so far.
 print 'Quote Summary'
 print '============='
 for attribute, value in quote_info.items():
    print '{:<20}: {}'.format(attribute, value)

 # Encode the quote info to JSON and save for further processing.
 with open('INDU_IND.json', 'a') as f:
    json.dump(quote_info, f)
diff --git a/csv_demo.py b/csv_demo.py
 """ Process historical stock records in CSV.

 Source: https://uk.finance.yahoo.com/q/hp?s=AAPL

 """

 # This script assumes that a set of stock records has been downloaded to the
 # current folder as table.csv .

 import csv

 with open('table.csv', 'rb') as csvfile:
    csvreader = csv.reader(csvfile)
    header = csvreader.next()
    rows = list(csvreader)

 with open('table.csv', 'rb') as csvfile:
    csvreader = csv.DictReader(csvfile)
    stockdata = list(csvreader)

 # Loop over the data, and add a data member indicating how much the stock
 # fluctuated over one day.

 processed_data = []
 for quote in stockdata:
    quote_hi = float(quote['High'])
    quote_lo = float(quote['Low'])
    quote['Rel Difference'] = (quote_hi - quote_lo) / quote_lo

 # Write processed data back to file.
 fieldnames = header + ['Rel Difference']
 with open('table-processed.csv', 'wb') as csvfile:
    csvwriter = csv.DictWriter(csvfile, fieldnames)
    csvwriter.writeheader()
    csvwriter.writerows(stockdata)
diff --git a/h5_demo.py b/h5_demo.py
 import tables


 class StockRecord(tables.IsDescription):
    date = tables.StringCol(16)
    high = tables.Float64Col()
    low = tables.Float64Col()


 h5file = tables.open_file("stocks.h5", mode="w", title="Stock data")
 group = h5file.create_group("/", 'stocks', 'Stock info')

 for name in ['AAPL', 'GOOG', 'FB']:
    table = h5file.create_table(group, name, StockRecord, "Simple example")

    stockrecord = table.row
    for i in range(1, 31):
        stockrecord['date'] = '2015-1-{}'.format(i)
        stockrecord['high'] = 100.0 + i
        stockrecord['low'] = 99.0 + i
        stockrecord.append()

    table.flush()

 h5file.close()
diff --git a/xml_parsing.py b/xml_parsing.py
 import xml.etree.cElementTree as etree  # Just ElementTree in Python 3

 # curl -O http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml

 tree = etree.parse('hamlet.xml')
 root = tree.getroot()
 print root

 # Print all characters in the play.
 for persona_tag in root.iter('PERSONA'):
    print persona_tag.text

 for persona_tag in root.iter('PERSONA'):
    if persona_tag.text.startswith('CLAUDIUS'):
        persona_tag.text = 'CLAUDIUS (he did it)'
        persona_tag.set('bad', 'yes')

 tree.write('hamlet_modified.xml')
	""" Obtain a quote of the Dow Jones Industrial Average from Bloomberg.

	"""

	import json

	from bs4 import BeautifulSoup
	import requests

	url = 'http://www.bloomberg.com/quote/INDU:IND'

	# Get the entire Bloomberg web page from the web.
	resp = requests.get(url)
	data = resp.text

	# Save a copy of the HTML (possibly to cache subsequent requests).
	with open('INDU_IND.html', 'a') as f:
	f.write(data.encode('utf-8'))

	soup = BeautifulSoup(data)

	# Find quote information by CSS class attribute.
	quotes = soup.find_all('div', class_='schema-org-financial-quote')
	quote = quotes[0]

	# Extract the meta tags from inside the 'schema-org-financial-quote' tag. These
	# tags hold the actual financial info; each meta tag has attributes
	# 'itemprop' and 'content', describing the category (e.g. 'tickerSymbol') and
	# the corresponding value (e.g. 'INDU').
	quote_info = {
	meta['itemprop']: meta['content'] for meta in quote.find_all('meta')
	}

	# Print out a report of the data obtained so far.
	print 'Quote Summary'
	print '============='
	for attribute, value in quote_info.items():
	print '{:<20}: {}'.format(attribute, value)

	# Encode the quote info to JSON and save for further processing.
	with open('INDU_IND.json', 'a') as f:
	json.dump(quote_info, f)
	""" Process historical stock records in CSV.

	Source: https://uk.finance.yahoo.com/q/hp?s=AAPL

	"""

	# This script assumes that a set of stock records has been downloaded to the
	# current folder as table.csv .

	import csv

	with open('table.csv', 'rb') as csvfile:
	csvreader = csv.reader(csvfile)
	header = csvreader.next()
	rows = list(csvreader)

	with open('table.csv', 'rb') as csvfile:
	csvreader = csv.DictReader(csvfile)
	stockdata = list(csvreader)

	# Loop over the data, and add a data member indicating how much the stock
	# fluctuated over one day.

	processed_data = []
	for quote in stockdata:
	quote_hi = float(quote['High'])
	quote_lo = float(quote['Low'])
	quote['Rel Difference'] = (quote_hi - quote_lo) / quote_lo

	# Write processed data back to file.
	fieldnames = header + ['Rel Difference']
	with open('table-processed.csv', 'wb') as csvfile:
	csvwriter = csv.DictWriter(csvfile, fieldnames)
	csvwriter.writeheader()
	csvwriter.writerows(stockdata)
	import tables


	class StockRecord(tables.IsDescription):
	date = tables.StringCol(16)
	high = tables.Float64Col()
	low = tables.Float64Col()


	h5file = tables.open_file("stocks.h5", mode="w", title="Stock data")
	group = h5file.create_group("/", 'stocks', 'Stock info')

	for name in ['AAPL', 'GOOG', 'FB']:
	table = h5file.create_table(group, name, StockRecord, "Simple example")

	stockrecord = table.row
	for i in range(1, 31):
	stockrecord['date'] = '2015-1-{}'.format(i)
	stockrecord['high'] = 100.0 + i
	stockrecord['low'] = 99.0 + i
	stockrecord.append()

	table.flush()

	h5file.close()
	import xml.etree.cElementTree as etree # Just ElementTree in Python 3

	# curl -O http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml

	tree = etree.parse('hamlet.xml')
	root = tree.getroot()
	print root

	# Print all characters in the play.
	for persona_tag in root.iter('PERSONA'):
	print persona_tag.text

	for persona_tag in root.iter('PERSONA'):
	if persona_tag.text.startswith('CLAUDIUS'):
	persona_tag.text = 'CLAUDIUS (he did it)'
	persona_tag.set('bad', 'yes')

	tree.write('hamlet_modified.xml')