Last active
December 1, 2016 04:31
-
-
Save rana-ahmed/5bc4ec1491b407691c44cb20d66f5b9e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pprint | |
| import requests | |
| from bs4 import BeautifulSoup | |
| ''' | |
| JSON Format: {"Dhaka":{"Motijheel":{"BangabhabanTSO":"1222","DilkushaTSO":"1223"}}} | |
| ''' | |
| url_list = { | |
| 'Barisal': 'http://www.bangladeshpost.gov.bd/PostCodeList.asp?DivID=1', | |
| 'Chittagong': 'http://www.bangladeshpost.gov.bd/PostCodeList.asp?DivID=2', | |
| 'Dhaka': 'http://www.bangladeshpost.gov.bd/PostCodeList.asp?DivID=3', | |
| 'Khulna': 'http://www.bangladeshpost.gov.bd/PostCodeList.asp?DivID=4', | |
| 'Rajshahi': 'http://www.bangladeshpost.gov.bd/PostCodeList.asp?DivID=5', | |
| 'Sylhet': 'http://www.bangladeshpost.gov.bd/PostCodeList.asp?DivID=6', | |
| 'Rangpur': 'http://www.bangladeshpost.gov.bd/PostCodeList.asp?DivID=7', | |
| } | |
| pp = pprint.PrettyPrinter(indent=4) | |
| final_data = {} | |
| def parseData(content, division): | |
| soup = BeautifulSoup(content) | |
| rows = soup.find("table", width='85%', bordercolor='#669966', border='1').findAll("tr")[1:] | |
| for row in rows: | |
| col = row.findAll('td') | |
| col_district = col[0].string.strip() | |
| col_thana = col[1].string.strip() | |
| col_post = col[2].string.strip() | |
| col_code = col[3].string.strip() | |
| # Add distric object | |
| if not col_district in final_data: | |
| final_data[col_district] = {} | |
| # Add thana object | |
| if not col_thana in final_data[col_district]: | |
| final_data[col_district][col_thana] = {} | |
| # Add post_office object | |
| final_data[col_district][col_thana][col_post] = col_code | |
| def fetchData(url, division): | |
| r = requests.get(url) | |
| if r.status_code != 200: | |
| return int(r.status_code) | |
| content = r.text.encode('utf-8', 'ignore') | |
| parseData(content, division) | |
| return r.status_code | |
| for division, url in url_list.items(): | |
| fetchData(url, division) | |
| pp.pprint(final_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment