Last active
October 26, 2018 22:46
-
-
Save cbjorgol/dbef282749c3432590d868b10efe7db6 to your computer and use it in GitHub Desktop.
Simple script to pull redfin entries and plot a price histogram
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import feedparser | |
import numpy as np | |
from matplotlib import pyplot as plt | |
from matplotlib.ticker import FuncFormatter | |
# Local file so none of you see my actual search | |
from rss_feed_url import url | |
def get_houses(url): | |
""" | |
Pull the 'entries' item from a Redfin RSS feed address | |
Parameters | |
---------- | |
url : str | |
Redfin RSS URL to pull from (e.g. 'http://www.redfin.com/stingray/do/rss-saved-search/11111111') | |
Return | |
------ | |
List : One item per house in 'entries' | |
""" | |
feed = feedparser.parse(url) | |
return feed['entries'] | |
def get_prices(entries): | |
""" | |
Extract the prices from the RSS feed. I didn't see | |
a great way to get the price other than the title. | |
Parameters | |
---------- | |
entries : list | |
'entries' item from Redfin RSS feed | |
Return | |
------ | |
Price of houses | |
""" | |
prices = [] | |
for i in range(len(entries)): | |
title = entries[i]['title'] | |
# This is a hail mary that worked in my subset of houses | |
price = title[1:title.find(' ')].replace(',', '') | |
prices.append(int(price)) | |
return prices | |
def plot_prices(prices, figsize=(10, 5), bins=None, | |
range_text=None, range_low=None, range_high=None): | |
""" | |
Plot a histogram of house prices | |
prices : list | |
A list of house prices | |
figsize : tuple, optional (default = (10, 5)) | |
(width * height) of plot | |
bins : list or tuple, optional, default is None | |
X bins for histogram | |
Return | |
------ | |
fig, ax | |
""" | |
fig, ax = plt.subplots(figsize=figsize) | |
# Add a plot | |
ax.hist(prices, bins=bins, color='#0066ff', edgecolor='white') | |
if range_text is not None: | |
# Add some commentary | |
plt.text(np.mean([range_low, range_high]), | |
12, | |
range_text, | |
ha='center', | |
fontsize=12, | |
color='red' | |
) | |
plt.axvspan(range_low, range_high, facecolor='lightgrey', | |
alpha=0.5, lw=0) | |
# Labels | |
ax.set_title("# of Eligible Houses by Price Range") | |
plt.ylabel('Number of houses in price range') | |
plt.xlabel('House Price') | |
# Formatting stuff | |
plt.tick_params(axis='both', which='both', | |
top='off', left='off', right='off', bottom='off') | |
formatter = FuncFormatter(thousands) | |
ax.xaxis.set_major_formatter(formatter) | |
return fig, ax | |
def thousands(x, pos): | |
"Don't need to use the `pos` parameter" | |
return '${:,.0f}K'.format(x / 1000.) | |
if __name__ == '__main__': | |
entries = get_houses(url) | |
prices = get_prices(entries) | |
fig, ax = plot_prices(prices, | |
bins=range(300000, 650000, 25000), | |
range_text='Ideal price range', | |
range_low=475000, | |
range_high=550000) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello cbjorgol: Does this feed for Redfin still work. I have tried putting in different values at the end of: http://www.redfin.com/stingray/do/rss-saved-search/11111111; and I get different answers; but I can't figure what these values correlate with or how to link a feed to my saved searchs? Thanks, Steve