tamlt2704 · June 6, 2018 12:56
diff --git a/data visualisation b/data visualisation
 from __future__ import division

 import pandas as pd
 import matplotlib.pyplot as plt
 from datetime import timedelta

 %matplotlib inline

 SLEEP_INTERVAL = 5 # minutes
 # https://www.timeanddate.com/sun/singapore/singapore?month=4&year=2018
 SUNRISE_TIME, SUNSET_TIME = '07:04', '19:12'

 # https://en.wikipedia.org/wiki/Heart_rate
 HR_LOW_LVL = 50
 HR_REST_LVL = 100

 xl = pd.ExcelFile('./patient_1.xlsx')
 xl.sheet_names

 data = xl.parse('data')
 data['timestamp'] = pd.to_datetime(data.timestamp)
 min_datetime = pd.to_datetime('{} 12:00:00'.format(data.timestamp.min().date()))
 data['day'] = data.timestamp.apply(lambda x: (x - min_datetime).days)
 data = data.sort_values(['day', 'timestamp'])
 data.head()


 columns = ['Activity', 'hr', 'rr', 'temp', 'sleep', 'timestamp', 'Alarm', 'AF', 'sym', 'day']
 df = data[data.day == 1]


 def get_sleep_duration(daily_data):
    daily_data['sleep_instance'] = (daily_data.sleep.diff().fillna(0) != 0).cumsum()
    sleep_df = daily_data[lambda x: x.sleep == 1].groupby('sleep_instance')['timestamp'].agg({'start': min, 'end': max})
    sleep_df['timegap'] = (sleep_df['start'] - sleep_df['end'].shift(1)).fillna(timedelta(0)).astype('timedelta64[m]').astype(int)
    sleep_df['sleep_instance'] = (sleep_df['timegap'] > SLEEP_INTERVAL).cumsum()
    durations = []
    for k, v in sleep_df.groupby('sleep_instance'):
        durations.append((v['start'].min(), v['end'].max()))
    return pd.DataFrame(durations, columns=['start', 'end'])

 def get_hr_lvl(hr):
    if hr < HR_LOW_LVL:
        return 1 # low
    if (hr >= HR_LOW_LVL) and (hr <= HR_REST_LVL):
        return 2 # normal
    if (hr > HR_REST_LVL):
        return 3 # high
    return 
 # def get_hr_duration(daily_data)
 daily_data = df.copy()
 daily_data['hr_lvl'] = df.hr.apply(get_hr_lvl)
 daily_data['hr_instance'] = (daily_data['hr_lvl'].diff().fillna(0) != 0).cumsum()

 durations = []
 for k , g in daily_data.groupby('hr_instance'):
    start, end = g.timestamp.min(), g.timestamp.max()
    hr_lvl = g['hr_lvl'].mode().iloc[0]
    durations.append((start, end, hr_lvl))
 pd.DataFrame(durations, columns=['start', 'end', 'hr_lvl'])


 get_sleep_duration(df)
	from __future__ import division

	import pandas as pd
	import matplotlib.pyplot as plt
	from datetime import timedelta

	%matplotlib inline

	SLEEP_INTERVAL = 5 # minutes
	# https://www.timeanddate.com/sun/singapore/singapore?month=4&year=2018
	SUNRISE_TIME, SUNSET_TIME = '07:04', '19:12'

	# https://en.wikipedia.org/wiki/Heart_rate
	HR_LOW_LVL = 50
	HR_REST_LVL = 100

	xl = pd.ExcelFile('./patient_1.xlsx')
	xl.sheet_names

	data = xl.parse('data')
	data['timestamp'] = pd.to_datetime(data.timestamp)
	min_datetime = pd.to_datetime('{} 12:00:00'.format(data.timestamp.min().date()))
	data['day'] = data.timestamp.apply(lambda x: (x - min_datetime).days)
	data = data.sort_values(['day', 'timestamp'])
	data.head()


	columns = ['Activity', 'hr', 'rr', 'temp', 'sleep', 'timestamp', 'Alarm', 'AF', 'sym', 'day']
	df = data[data.day == 1]


	def get_sleep_duration(daily_data):
	daily_data['sleep_instance'] = (daily_data.sleep.diff().fillna(0) != 0).cumsum()
	sleep_df = daily_data[lambda x: x.sleep == 1].groupby('sleep_instance')['timestamp'].agg({'start': min, 'end': max})
	sleep_df['timegap'] = (sleep_df['start'] - sleep_df['end'].shift(1)).fillna(timedelta(0)).astype('timedelta64[m]').astype(int)
	sleep_df['sleep_instance'] = (sleep_df['timegap'] > SLEEP_INTERVAL).cumsum()
	durations = []
	for k, v in sleep_df.groupby('sleep_instance'):
	durations.append((v['start'].min(), v['end'].max()))
	return pd.DataFrame(durations, columns=['start', 'end'])

	def get_hr_lvl(hr):
	if hr < HR_LOW_LVL:
	return 1 # low
	if (hr >= HR_LOW_LVL) and (hr <= HR_REST_LVL):
	return 2 # normal
	if (hr > HR_REST_LVL):
	return 3 # high
	return
	# def get_hr_duration(daily_data)
	daily_data = df.copy()
	daily_data['hr_lvl'] = df.hr.apply(get_hr_lvl)
	daily_data['hr_instance'] = (daily_data['hr_lvl'].diff().fillna(0) != 0).cumsum()

	durations = []
	for k , g in daily_data.groupby('hr_instance'):
	start, end = g.timestamp.min(), g.timestamp.max()
	hr_lvl = g['hr_lvl'].mode().iloc[0]
	durations.append((start, end, hr_lvl))
	pd.DataFrame(durations, columns=['start', 'end', 'hr_lvl'])


	get_sleep_duration(df)