Skip to content

Instantly share code, notes, and snippets.

@t0mst0ne
Last active August 29, 2015 14:04
Show Gist options
  • Save t0mst0ne/f40aab166fb536ae3e1b to your computer and use it in GitHub Desktop.
Save t0mst0ne/f40aab166fb536ae3e1b to your computer and use it in GitHub Desktop.
Parsing ER information from KCGMH
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
import re
import time
import json
import os
os.environ['TZ'] = 'ROC'
html = urllib2.urlopen("https://www.cgmh.org.tw/bed/erd/index.asp?loc=2").read().decode("Big5")
query_time = re.findall(u'更新時間: (.*)</td>', html)
update_time = int(time.mktime(time.strptime(query_time[0], "%Y/%m/%d %H:%M:%S")))
pattern = ur'<img.*?<FONT.*?>(.*?)</td>'
results = re.findall(pattern, html)
if results[0] == u'是' :
full_reported = True
else:
full_reported = False
pending_doctor = int(results[1])
pending_bed = int(results[2])
pending_ward = int(results[3])
pending_icu = int(results[4])
hospital_sn = str(1111060015)
report = [{"full_reported":full_reported, "pending_doctor":pending_doctor, "pending_bed":pending_bed,"pending_ward":pending_ward, "pending_icu":pending_icu, "update_time":update_time, "hospital_sn":hospital_sn }]
print json.dumps(report, ensure_ascii=False)
@audreyt
Copy link

audreyt commented Aug 4, 2014

#!/usr/bin/env python
#coding:UTF-8
import urllib2, re, json
html = urllib2.urlopen("https://www.cgmh.org.tw/bed/erd/index.asp?loc=2").read().decode("Big5")

Grabtime = re.findall(u'更新時間:', html)
Full_reported = re.findall(u'item1.jpg.*微軟正黑體">(.*)</td>.*item2.jpg',html)
Pending_doctor = re.findall(u'item2.jpg.*微軟正黑體">(.*)</td>.*item3.jpg',html)
Pending_bed = re.findall(u'item3.jpg.*微軟正黑體">(.*)</td>.*item4.jpg',html)
Pending_ward = re.findall(u'item4.jpg.*微軟正黑體">(.*)</td>.*item5.jpg',html)
Pending_ICU = re.findall(u'item5.jpg.*微軟正黑體">(.*)</td></tr> ',html)
#'通報時間', grabtime
#'已向119通報滿床', Full_reported => 這個中文字 "是" "否" 會出現另一種coding , 不知道怎麼改?
#'等待看診人數' , Pending_doctor
#'等待推床人數', Pending_bed
#'等待住院人數', Pending_ward
#'等待加護病房人數', Pending_ICU
#'醫院代號', Hospital_SN 長庚醫療財團法人基隆長庚紀念醫院:1111060015

report = [{"Hospital_SN":1111060015, "Grabtime":Grabtime, "Full_reported":Full_reported, "Pending_doctor":Pending_doctor, "Pending_bed":Pending_bed, "Pending_ward":Pending_ward, "Pending_ICU":Pending_ICU }]

print json.dumps(report, ensure_ascii=False)

@thewayiam
Copy link

from datetime import datetime
t="2014/08/05"
print datetime.strptime(t, "%Y/%m/%d").strftime('%s')
1407168000

@thewayiam
Copy link

Another solution for your reference

!/usr/bin/env python

coding:UTF-8

import re
import json
import requests
from datetime import datetime

keys = ['pending_doctor', 'pending_bed', 'pending_ward', 'pending_icu', 'full_reported', 'update_time']
r = requests.get("https://www.cgmh.org.tw/bed/erd/index.asp?loc=2")
r.encoding = 'big5'

match = [int(value) for value in re.findall(u'>(\d+)', r.text)]
match_report = re.findall(u'>(是|否)', r.text)[0]
match_time = re.findall(u'更新時間:\s_(._)', r.text)[0]
match.append(True if match_report == u'是' else False)
match.append(datetime.strptime(match_time,'%Y/%m/%d %H:%M:%S').strftime('%s'))
print json.dumps(dict(zip(keys, match)), ensure_ascii=False)

@t0mst0ne
Copy link
Author

t0mst0ne commented Aug 7, 2014

import time
update_time = int(time.mktime(time.strptime(query_time[0], '%Y/%m/%d %H:%M:%S')))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment