Created
September 23, 2012 06:54
-
-
Save michellesun/3769146 to your computer and use it in GitHub Desktop.
visa wait days
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import operator | |
import pprint | |
def get_cities(soup): | |
raw_cities = [option.get('value') for option in soup.findAll('option')] | |
return raw_cities | |
def make_url(raw_cities): | |
cities = ["+".join(city.split()) for city in raw_cities] #transform cities | |
url_list = [ 'http://travel.state.gov/visa/temp/wait/wait_4788.html?post=%s' %city for city in cities] | |
return url_list | |
def get_wait_days(soup): | |
raw_days = soup.findAll('font',color='#76a9d8') | |
# print raw_days | |
for each in raw_days: | |
d = str(each).split() | |
wait_day = d[-2][-1] | |
return wait_day | |
def shortest_exchange_wait(dict_exchange): | |
shortest = sorted(dict_exchange.iteritems(), key = operator.itemgetter(1)) | |
return shortest[:10] | |
def main(): | |
first = 'http://travel.state.gov/visa/temp/wait/wait_4788.html?post=Beijing' | |
page = requests.get(first) | |
soup = BeautifulSoup(page.content) | |
cities = get_cities(soup) | |
dict_exchange = {} | |
for city in cities: | |
join_city = "+".join(city.split()) | |
url = 'http://travel.state.gov/visa/temp/wait/wait_4788.html?post=%s' %join_city | |
each_soup = BeautifulSoup(requests.get(url).content) | |
exchange_wait_day = get_wait_days(each_soup) | |
dict_exchange[city] = exchange_wait_day | |
shortest = shortest_exchange_wait(dict_exchange) | |
pprint.pprint(shortest) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment