Created
August 6, 2012 07:07
-
-
Save ptone/3271822 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
from lxml.html.soupparser import fromstring | |
import requests | |
from datetime import datetime, timedelta | |
start_date = datetime(2012, 8, 6) | |
end_date = start_date + timedelta(days=45) | |
url = 'http://www.reserveamerica.com/campsiteCalendar.do?page=calendar&contractCode=CA&parkId=120030&calarvdate={caldate}&sitepage=true&startIdx={page_index}&sitefilter=STANDARD' | |
s = requests.session() | |
s.config['keep_alive'] = False | |
calendar_start = start_date | |
calendar_page = 1 | |
page_downloaded = 1 | |
while calendar_start < end_date: | |
days = [] | |
for i in range(14): | |
days.append(calendar_start + timedelta(days=i)) | |
calendar_start_str = calendar_start.strftime("%m/%d/%Y") | |
page_index = 0 | |
while page_index < 125: | |
page_url = url.format(caldate=calendar_start_str, | |
page_index=page_index) | |
print(page_url) | |
page_request = requests.get(page_url, prefetch=True) | |
page = page_request.content | |
with open ("page{}.html".format(page_downloaded), 'w') as f: | |
f.write(page) | |
page_downloaded += 1 | |
tree = fromstring(page) | |
sites = [] | |
# availability = defaultdict(set) | |
availables = [] | |
availables = tree.cssselect('td.status.a') | |
# print(len(availables)) | |
for item in availables: | |
row = item.getparent() | |
site_id = row[0].text_content().split('\n')[0] | |
if site_id not in sites: | |
sites.append(site_id) | |
for i, cell in enumerate(row[2:]): | |
if cell not in availables: | |
continue | |
cell_date = days[i] | |
if cell_date.weekday() in [4, 5]: | |
print("booya -- site: {} on {}".format(site_id, cell_date)) | |
page_index += 25 | |
calendar_start = calendar_start + timedelta(days=14) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment