Created
March 17, 2023 16:00
-
-
Save 2minchul/6ebd790f20cca7b48d1c3d3b71ac051d to your computer and use it in GitHub Desktop.
Kakao 우편번호 서비스 for python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from typing import Optional, List, Tuple, Dict | |
import requests | |
from bs4 import BeautifulSoup | |
def search_many_postcode(queries: List[Tuple[str, str]]) -> Dict[tuple, Optional[dict]]: | |
""" | |
:param queries: list of tuple(keyword, zipcode) | |
:return: {tuple(keyword, zipcode): dict of address} | |
""" | |
result = {} | |
with ThreadPoolExecutor(max_workers=10) as pool: | |
with requests.Session() as session: | |
# Run `search_postcode(pair[0], pair[1], session)` in thread | |
futures = {pool.submit(search_postcode, pair[0], pair[1], session): pair for pair in queries} | |
for future in as_completed(futures): | |
pair = futures[future] | |
result[pair] = future.result() | |
return result | |
def search_postcode(keyword: str, zipcode: str, session: Optional[requests.Session] = None) -> Optional[dict]: | |
html = request_search_postcode(keyword, session) | |
for data in parse_iter_postcode(html, keyword): | |
if data.get('zonecode') == zipcode: | |
return data | |
def request_search_postcode(keyword: str, session: Optional[requests.Session] = None) -> str: | |
headers = { | |
'authority': 'postcode.map.daum.net', | |
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', | |
'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7', | |
'referer': 'https://postcode.map.daum.net', | |
'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', | |
'sec-ch-ua-mobile': '?0', | |
'sec-ch-ua-platform': '"Windows"', | |
'sec-fetch-dest': 'iframe', | |
'sec-fetch-mode': 'navigate', | |
'sec-fetch-site': 'same-origin', | |
'sec-fetch-user': '?1', | |
'upgrade-insecure-requests': '1', | |
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36' | |
} | |
url = 'https://postcode.map.daum.net/search?' \ | |
f'region_name={keyword}&cq={keyword}&cpage=1&origin=https://postcode.map.daum.net&' \ | |
f'isp=N&isgr=N&isgj=N&ongr=&ongj=®ionid=®ionname=&roadcode=&roadname=&banner=on&' \ | |
f'ubl=on&indaum=off&vt=popup&amr=on&amj=on&ani=off&mode=transmit&sd=on&fi=on&fc=on&hmb=off&' \ | |
f'heb=off&asea=off&smh=off&zo=on&theme=&bit=&sit=&sgit=&sbit=&pit=&mit=&lcit=&plrg=&plrgt=1.5&' \ | |
f'us=on&msi=10&ahs=off&whas=500&zn=Y&sm=on&CWinWidth=500&sptype=&sporgq=&a51=off' | |
response: requests.Response | |
if session: | |
response = session.get(url, headers=headers) | |
else: | |
response = requests.get(url, headers=headers) | |
with response: | |
response.raise_for_status() | |
return response.text | |
def _get_data_attrs(tag): | |
return {k[5:]: v for k, v in tag.attrs.items() if k.startswith('data-')} | |
def parse_iter_postcode(html, keyword=''): | |
soup = BeautifulSoup(html, 'html.parser') | |
ul = soup.find('ul', attrs={'class': 'list_post'}) | |
if not ul: | |
return | |
for li_tag in ul.find_all('li'): | |
searched = _get_data_attrs(li_tag) | |
address_dl = li_tag.find('dl', attrs={'class': 'list_address'}) | |
if not address_dl: | |
continue | |
road = {} | |
jibun = {} | |
road_dd = address_dl.find('dd', attrs={'class': 'main_road'}) or address_dl.find( | |
'dd', attrs={'class': 'rel_road'}) | |
if road_dd: | |
span = road_dd.find('span', attrs={'class': 'txt_address'}) | |
road = _get_data_attrs(span) | |
jibun_dd = address_dl.find('dd', attrs={'class': 'main_jibun'}) or address_dl.find( | |
'dd', attrs={'class': 'rel_jibun'}) | |
if jibun_dd: | |
span = jibun_dd.find('span', attrs={'class': 'txt_address'}) | |
jibun = _get_data_attrs(span) | |
data = { | |
'query': keyword, | |
'_from': 'html', | |
'addressType': searched.get('addr_type', ''), | |
'userSelectedType': searched.get('addr_type', ''), | |
'address': searched.get('addr', ''), | |
'addressEnglish': searched.get('addr_eng', ''), | |
'bcode': searched.get('bcode', ''), | |
'bname': searched.get('bname', ''), | |
'bnameEnglish': searched.get('bname_eng', ''), | |
'bname1': searched.get('bname1', ''), | |
'bname1English': searched.get('bname1_eng', ''), | |
'bname2': searched.get('bname2', ''), | |
'bname2English': searched.get('bname2_eng', ''), | |
'buildingCode': searched.get('building_code', '') or road.get('building_code', '') or jibun.get( | |
'building_code', ''), | |
'buildingName': searched.get('building_name', '') or road.get('building_name', '') or jibun.get( | |
'building_name', ''), | |
'hname': searched.get('hname', ''), | |
'apartment': searched.get('is_multi_building', '') == 'true', | |
'roadname': searched.get('roadname', '') or road.get('roadname', ''), | |
'roadnameCode': searched.get('roadname_code', '') or road.get('roadname_code', ''), | |
'roadnameEnglish': searched.get('roadname_eng', '') or road.get('roadname_eng', ''), | |
'sido': searched.get('sido', ''), | |
'sidoEnglish': searched.get('sido_eng', ''), | |
'sigungu': searched.get('sigungu', ''), | |
'sigunguCode': searched.get('sigungu_code', ''), | |
'sigunguEnglish': searched.get('sigungu_eng', ''), | |
'zonecode': searched.get('zonecode', ''), | |
'jibunAddress': jibun.get('addr', ''), | |
'jibunAddressEnglish': jibun.get('addr_eng', ''), | |
'roadAddress': road.get('addr', ''), | |
'roadAddressEnglish': road.get('addr_eng', ''), | |
} | |
yield data | |
if __name__ == '__main__': | |
from pprint import pprint | |
result = search_many_postcode([ | |
('서울 강남구 가로수길 5', '06035'), | |
('서울 동작구 동작동 316', '06905'), | |
('없는주소', '12345'), | |
]) | |
pprint(result) | |
"""output | |
{('서울 강남구 가로수길 5', '06035'): {'_from': 'html', | |
'address': '서울 강남구 가로수길 5', | |
'addressEnglish': '5, Garosu-gil, Gangnam-gu, ' | |
'Seoul, Korea', | |
'addressType': 'R', | |
'apartment': False, | |
'bcode': '1168010700', | |
'bname': '신사동', | |
'bname1': '', | |
'bname1English': '', | |
'bname2': '신사동', | |
'bname2English': 'Sinsa-dong', | |
'bnameEnglish': 'Sinsa-dong', | |
'buildingCode': '1168010700105370005011918', | |
'buildingName': '', | |
'hname': '', | |
'jibunAddress': '서울 강남구 신사동 537-5', | |
'jibunAddressEnglish': '537-5, Sinsa-dong, ' | |
'Gangnam-gu, Seoul, Korea', | |
'query': '서울 강남구 가로수길 5', | |
'roadAddress': '서울 강남구 가로수길 5', | |
'roadAddressEnglish': '5, Garosu-gil, ' | |
'Gangnam-gu, Seoul, Korea', | |
'roadname': '가로수길', | |
'roadnameCode': '4858362', | |
'roadnameEnglish': 'Garosu-gil', | |
'sido': '서울', | |
'sidoEnglish': 'Seoul', | |
'sigungu': '강남구', | |
'sigunguCode': '11680', | |
'sigunguEnglish': 'Gangnam-gu', | |
'userSelectedType': 'R', | |
'zonecode': '06035'}, | |
('서울 동작구 동작동 316', '06905'): {'_from': 'html', | |
'address': '서울 동작구 동작동 316', | |
'addressEnglish': '316, Dongjak-dong, ' | |
'Dongjak-gu, Seoul, Korea', | |
'addressType': 'J', | |
'apartment': False, | |
'bcode': '1159010600', | |
'bname': '동작동', | |
'bname1': '', | |
'bname1English': '', | |
'bname2': '동작동', | |
'bname2English': 'Dongjak-dong', | |
'bnameEnglish': 'Dongjak-dong', | |
'buildingCode': '1159010600103160000000001', | |
'buildingName': '반포수난구조대', | |
'hname': '사당2동', | |
'jibunAddress': '서울 동작구 동작동 316', | |
'jibunAddressEnglish': '316, Dongjak-dong, ' | |
'Dongjak-gu, Seoul, ' | |
'Korea', | |
'query': '서울 동작구 동작동 316', | |
'roadAddress': '서울 동작구 동작대로 335-1', | |
'roadAddressEnglish': '335-1, Dongjak-daero, ' | |
'Dongjak-gu, Seoul, Korea', | |
'roadname': '동작대로', | |
'roadnameCode': '2005009', | |
'roadnameEnglish': 'Dongjak-daero', | |
'sido': '서울', | |
'sidoEnglish': 'Seoul', | |
'sigungu': '동작구', | |
'sigunguCode': '11590', | |
'sigunguEnglish': 'Dongjak-gu', | |
'userSelectedType': 'J', | |
'zonecode': '06905'}, | |
('없는주소', '12345'): None} | |
""" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment