Created
March 28, 2020 13:00
-
-
Save ntuaha/73edd0479bf318db8202a655f2dc1824 to your computer and use it in GitHub Desktop.
擷取台灣地址NER
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import re" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"r = re.compile('(?P<zipcode>^\\d{5}|^\\d{3})?(?P<縣市>\\D+?[縣市])?(?P<鄉鎮市區>\\D+?[鄉鎮市區])?(?P<村里>\\D+?[村里])?(?P<路街道段>\\D+[路街道段])?(?P<鄰>\\d+鄰)?(?P<巷>\\d+巷)?(?P<弄>\\d+弄)?(?P<號>\\d+號)?(?P<樓>\\d+樓)?(?P<其他>.+)?')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g = r.match('123新北市建中街152號7樓')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'zipcode': '123',\n", | |
" '縣市': '新北市',\n", | |
" '鄉鎮市區': None,\n", | |
" '村里': None,\n", | |
" '路街道段': '建中街',\n", | |
" '鄰': None,\n", | |
" '巷': None,\n", | |
" '弄': None,\n", | |
" '號': '152號',\n", | |
" '樓': '7樓',\n", | |
" '其他': None}" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"g.groupdict()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<re.Match object; span=(0, 17), match='12334新北市建中街152號7樓'>" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"g" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment