Skip to content

Instantly share code, notes, and snippets.

@2minchul
Created March 17, 2023 16:00
Show Gist options
  • Save 2minchul/6ebd790f20cca7b48d1c3d3b71ac051d to your computer and use it in GitHub Desktop.
Save 2minchul/6ebd790f20cca7b48d1c3d3b71ac051d to your computer and use it in GitHub Desktop.
Kakao 우편번호 서비스 for python
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Optional, List, Tuple, Dict
import requests
from bs4 import BeautifulSoup
def search_many_postcode(queries: List[Tuple[str, str]]) -> Dict[tuple, Optional[dict]]:
"""
:param queries: list of tuple(keyword, zipcode)
:return: {tuple(keyword, zipcode): dict of address}
"""
result = {}
with ThreadPoolExecutor(max_workers=10) as pool:
with requests.Session() as session:
# Run `search_postcode(pair[0], pair[1], session)` in thread
futures = {pool.submit(search_postcode, pair[0], pair[1], session): pair for pair in queries}
for future in as_completed(futures):
pair = futures[future]
result[pair] = future.result()
return result
def search_postcode(keyword: str, zipcode: str, session: Optional[requests.Session] = None) -> Optional[dict]:
html = request_search_postcode(keyword, session)
for data in parse_iter_postcode(html, keyword):
if data.get('zonecode') == zipcode:
return data
def request_search_postcode(keyword: str, session: Optional[requests.Session] = None) -> str:
headers = {
'authority': 'postcode.map.daum.net',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
'referer': 'https://postcode.map.daum.net',
'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'iframe',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
url = 'https://postcode.map.daum.net/search?' \
f'region_name={keyword}&cq={keyword}&cpage=1&origin=https://postcode.map.daum.net&' \
f'isp=N&isgr=N&isgj=N&ongr=&ongj=&regionid=&regionname=&roadcode=&roadname=&banner=on&' \
f'ubl=on&indaum=off&vt=popup&amr=on&amj=on&ani=off&mode=transmit&sd=on&fi=on&fc=on&hmb=off&' \
f'heb=off&asea=off&smh=off&zo=on&theme=&bit=&sit=&sgit=&sbit=&pit=&mit=&lcit=&plrg=&plrgt=1.5&' \
f'us=on&msi=10&ahs=off&whas=500&zn=Y&sm=on&CWinWidth=500&sptype=&sporgq=&a51=off'
response: requests.Response
if session:
response = session.get(url, headers=headers)
else:
response = requests.get(url, headers=headers)
with response:
response.raise_for_status()
return response.text
def _get_data_attrs(tag):
return {k[5:]: v for k, v in tag.attrs.items() if k.startswith('data-')}
def parse_iter_postcode(html, keyword=''):
soup = BeautifulSoup(html, 'html.parser')
ul = soup.find('ul', attrs={'class': 'list_post'})
if not ul:
return
for li_tag in ul.find_all('li'):
searched = _get_data_attrs(li_tag)
address_dl = li_tag.find('dl', attrs={'class': 'list_address'})
if not address_dl:
continue
road = {}
jibun = {}
road_dd = address_dl.find('dd', attrs={'class': 'main_road'}) or address_dl.find(
'dd', attrs={'class': 'rel_road'})
if road_dd:
span = road_dd.find('span', attrs={'class': 'txt_address'})
road = _get_data_attrs(span)
jibun_dd = address_dl.find('dd', attrs={'class': 'main_jibun'}) or address_dl.find(
'dd', attrs={'class': 'rel_jibun'})
if jibun_dd:
span = jibun_dd.find('span', attrs={'class': 'txt_address'})
jibun = _get_data_attrs(span)
data = {
'query': keyword,
'_from': 'html',
'addressType': searched.get('addr_type', ''),
'userSelectedType': searched.get('addr_type', ''),
'address': searched.get('addr', ''),
'addressEnglish': searched.get('addr_eng', ''),
'bcode': searched.get('bcode', ''),
'bname': searched.get('bname', ''),
'bnameEnglish': searched.get('bname_eng', ''),
'bname1': searched.get('bname1', ''),
'bname1English': searched.get('bname1_eng', ''),
'bname2': searched.get('bname2', ''),
'bname2English': searched.get('bname2_eng', ''),
'buildingCode': searched.get('building_code', '') or road.get('building_code', '') or jibun.get(
'building_code', ''),
'buildingName': searched.get('building_name', '') or road.get('building_name', '') or jibun.get(
'building_name', ''),
'hname': searched.get('hname', ''),
'apartment': searched.get('is_multi_building', '') == 'true',
'roadname': searched.get('roadname', '') or road.get('roadname', ''),
'roadnameCode': searched.get('roadname_code', '') or road.get('roadname_code', ''),
'roadnameEnglish': searched.get('roadname_eng', '') or road.get('roadname_eng', ''),
'sido': searched.get('sido', ''),
'sidoEnglish': searched.get('sido_eng', ''),
'sigungu': searched.get('sigungu', ''),
'sigunguCode': searched.get('sigungu_code', ''),
'sigunguEnglish': searched.get('sigungu_eng', ''),
'zonecode': searched.get('zonecode', ''),
'jibunAddress': jibun.get('addr', ''),
'jibunAddressEnglish': jibun.get('addr_eng', ''),
'roadAddress': road.get('addr', ''),
'roadAddressEnglish': road.get('addr_eng', ''),
}
yield data
if __name__ == '__main__':
from pprint import pprint
result = search_many_postcode([
('서울 강남구 가로수길 5', '06035'),
('서울 동작구 동작동 316', '06905'),
('없는주소', '12345'),
])
pprint(result)
"""output
{('서울 강남구 가로수길 5', '06035'): {'_from': 'html',
'address': '서울 강남구 가로수길 5',
'addressEnglish': '5, Garosu-gil, Gangnam-gu, '
'Seoul, Korea',
'addressType': 'R',
'apartment': False,
'bcode': '1168010700',
'bname': '신사동',
'bname1': '',
'bname1English': '',
'bname2': '신사동',
'bname2English': 'Sinsa-dong',
'bnameEnglish': 'Sinsa-dong',
'buildingCode': '1168010700105370005011918',
'buildingName': '',
'hname': '',
'jibunAddress': '서울 강남구 신사동 537-5',
'jibunAddressEnglish': '537-5, Sinsa-dong, '
'Gangnam-gu, Seoul, Korea',
'query': '서울 강남구 가로수길 5',
'roadAddress': '서울 강남구 가로수길 5',
'roadAddressEnglish': '5, Garosu-gil, '
'Gangnam-gu, Seoul, Korea',
'roadname': '가로수길',
'roadnameCode': '4858362',
'roadnameEnglish': 'Garosu-gil',
'sido': '서울',
'sidoEnglish': 'Seoul',
'sigungu': '강남구',
'sigunguCode': '11680',
'sigunguEnglish': 'Gangnam-gu',
'userSelectedType': 'R',
'zonecode': '06035'},
('서울 동작구 동작동 316', '06905'): {'_from': 'html',
'address': '서울 동작구 동작동 316',
'addressEnglish': '316, Dongjak-dong, '
'Dongjak-gu, Seoul, Korea',
'addressType': 'J',
'apartment': False,
'bcode': '1159010600',
'bname': '동작동',
'bname1': '',
'bname1English': '',
'bname2': '동작동',
'bname2English': 'Dongjak-dong',
'bnameEnglish': 'Dongjak-dong',
'buildingCode': '1159010600103160000000001',
'buildingName': '반포수난구조대',
'hname': '사당2동',
'jibunAddress': '서울 동작구 동작동 316',
'jibunAddressEnglish': '316, Dongjak-dong, '
'Dongjak-gu, Seoul, '
'Korea',
'query': '서울 동작구 동작동 316',
'roadAddress': '서울 동작구 동작대로 335-1',
'roadAddressEnglish': '335-1, Dongjak-daero, '
'Dongjak-gu, Seoul, Korea',
'roadname': '동작대로',
'roadnameCode': '2005009',
'roadnameEnglish': 'Dongjak-daero',
'sido': '서울',
'sidoEnglish': 'Seoul',
'sigungu': '동작구',
'sigunguCode': '11590',
'sigunguEnglish': 'Dongjak-gu',
'userSelectedType': 'J',
'zonecode': '06905'},
('없는주소', '12345'): None}
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment