Last active
August 29, 2015 14:07
-
-
Save t0mst0ne/a065623ca4644b84f86b to your computer and use it in GitHub Desktop.
Parse the pharmacy information from data.fda.gov.tw
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding:UTF-8 | |
import xml.etree.cElementTree as ET | |
import pandas as pd | |
from pygeocoder import Geocoder | |
import urllib | |
import time | |
feed = urllib.urlopen("http://data.fda.gov.tw/cacheData/35_1.xml;jsessionid=C8F4570A382FF8BFC9CC901CD802F0EF") | |
tree = ET.parse(feed) | |
root = tree.getroot() | |
mylist = [{child2.tag:child2.text for child2 in child_of_root } for child_of_root in root] | |
data = pd.DataFrame(mylist) | |
data | |
# Find the coordinate for each items | |
HL = u'花蓮縣' + data[data[u'地址縣市別'] == u'花蓮縣'][u'地址鄉鎮市區'] + data[data[u'地址縣市別'] == u'花蓮縣'][u'地址街道巷弄號'] | |
G_HL = [] | |
for x in range ( 7729, 7829): | |
time.sleep( 0.5 ) | |
G_HL.append(Geocoder.geocode(HL[x])[0].coordinates) | |
# make new dataframe Add_HL | |
Add_HL = pd.DataFrame(G_HL) | |
pharma_name = list(data[data[u'地址縣市別'] == u'花蓮縣'][u'機構名稱']) | |
Add_HL['Address'] = list(HL) | |
Add_HL['pharma_name'] = pharma_name | |
Add_HL.to_csv('HL_Address.csv', sep=',', encoding='utf-8') |
HL_Address.csv output:
0,23.9783678,121.609356,花蓮縣花蓮市復興街56號,建生藥局
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
data : output looks like
地址縣市別 地址街道巷弄號 地址鄉鎮市區 是否為健保特約藥局 機構名稱 機構狀態 負責人姓名 負責人性別 電話
0 臺北市 金華街47號 中正區 是 五福藥局 開業 陳錦山 男 02-23928934
1 臺北市 齊東街88號 中正區 是 崇信藥局 開業 黃榮欽 男 02-23417557
2 臺北市 濟南路二段45-1號1樓 中正區 是 民生藥局 開業 吳肇修 男 02-23416470
3 臺北市 信陽街3號1樓 中正區 否 新德星中西藥局 開業 鍾權隆 男 02-23614598