Skip to content

Instantly share code, notes, and snippets.

@cbjorgol
Last active October 26, 2018 22:46
Show Gist options
  • Save cbjorgol/dbef282749c3432590d868b10efe7db6 to your computer and use it in GitHub Desktop.
Save cbjorgol/dbef282749c3432590d868b10efe7db6 to your computer and use it in GitHub Desktop.
Simple script to pull redfin entries and plot a price histogram
import feedparser
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.ticker import FuncFormatter
# Local file so none of you see my actual search
from rss_feed_url import url
def get_houses(url):
"""
Pull the 'entries' item from a Redfin RSS feed address
Parameters
----------
url : str
Redfin RSS URL to pull from (e.g. 'http://www.redfin.com/stingray/do/rss-saved-search/11111111')
Return
------
List : One item per house in 'entries'
"""
feed = feedparser.parse(url)
return feed['entries']
def get_prices(entries):
"""
Extract the prices from the RSS feed. I didn't see
a great way to get the price other than the title.
Parameters
----------
entries : list
'entries' item from Redfin RSS feed
Return
------
Price of houses
"""
prices = []
for i in range(len(entries)):
title = entries[i]['title']
# This is a hail mary that worked in my subset of houses
price = title[1:title.find(' ')].replace(',', '')
prices.append(int(price))
return prices
def plot_prices(prices, figsize=(10, 5), bins=None,
range_text=None, range_low=None, range_high=None):
"""
Plot a histogram of house prices
prices : list
A list of house prices
figsize : tuple, optional (default = (10, 5))
(width * height) of plot
bins : list or tuple, optional, default is None
X bins for histogram
Return
------
fig, ax
"""
fig, ax = plt.subplots(figsize=figsize)
# Add a plot
ax.hist(prices, bins=bins, color='#0066ff', edgecolor='white')
if range_text is not None:
# Add some commentary
plt.text(np.mean([range_low, range_high]),
12,
range_text,
ha='center',
fontsize=12,
color='red'
)
plt.axvspan(range_low, range_high, facecolor='lightgrey',
alpha=0.5, lw=0)
# Labels
ax.set_title("# of Eligible Houses by Price Range")
plt.ylabel('Number of houses in price range')
plt.xlabel('House Price')
# Formatting stuff
plt.tick_params(axis='both', which='both',
top='off', left='off', right='off', bottom='off')
formatter = FuncFormatter(thousands)
ax.xaxis.set_major_formatter(formatter)
return fig, ax
def thousands(x, pos):
"Don't need to use the `pos` parameter"
return '${:,.0f}K'.format(x / 1000.)
if __name__ == '__main__':
entries = get_houses(url)
prices = get_prices(entries)
fig, ax = plot_prices(prices,
bins=range(300000, 650000, 25000),
range_text='Ideal price range',
range_low=475000,
range_high=550000)
plt.show()
@selster
Copy link

selster commented Jun 8, 2018

Hello cbjorgol: Does this feed for Redfin still work. I have tried putting in different values at the end of: http://www.redfin.com/stingray/do/rss-saved-search/11111111; and I get different answers; but I can't figure what these values correlate with or how to link a feed to my saved searchs? Thanks, Steve

@jwelham
Copy link

jwelham commented Oct 26, 2018

Hi! I'd also like to know where you get this RSS link values as @selster mentions above.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment