Created
November 12, 2019 21:11
-
-
Save shaabhishek/ec13106a699b8d2110ee6de51aac9a14 to your computer and use it in GitHub Desktop.
dense_rank use case for assigning the visit_counter to each hospital visit for each patient
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## imports and other stuff | |
# Boilerplate for setting things up | |
query_args = {'dbname': dbname, 'host': host, 'port': port, 'user': dbusername} | |
conn = psycopg2.connect(**query_args) | |
cur = conn.cursor() | |
cur.execute('SET search_path to ' + schema_name) | |
# Setup the query- note it is ALMOST the same as the sql file's query | |
query = \ | |
""" | |
with visitcnts as | |
( | |
select d.subject_id, count(distinct d.hadm_id) as visits | |
from diagnoses_icd d | |
group by d.subject_id | |
), diag as | |
( | |
select visitcnts.subject_id, d2.hadm_id, d2.icd9_code | |
from visitcnts | |
inner join diagnoses_icd d2 | |
on visitcnts.subject_id = d2.subject_id | |
where visits > 1 | |
) | |
select diag.*, a.admittime | |
from diag | |
inner join admissions a | |
on diag.hadm_id = a.hadm_id | |
""" | |
# read table into dataframe | |
data_df = pd.read_sql_query(query, conn) | |
# extract out only the unique subject-hospital_admission-admittime combinations | |
hadm_times = data_df.drop('icd9_code', axis=1) | |
hadm_times = hadm_times.drop_duplicates() | |
## group by subject - create a list of indices for each hospital visit - assign each (subject,hadm) pair an index | |
# works because group keys are sorted | |
visit_idxs = hadm_times.groupby(['subject_id']).apply(lambda x: pd.Series(np.arange(len(x)))).values | |
hadm_times = hadm_times.sort_values(['subject_id', 'admittime']) | |
hadm_times['visit_idx'] = visit_idxs | |
# don't really need admittimes anymore | |
hadm_times.drop('admittime', axis=1, inplace=True) | |
data_df.drop('admittime', axis=1, inplace=True) | |
# join the visit-ranking with the full table | |
data_df = hadm_times.merge(data_df, on=['subject_id', 'hadm_id'], how='inner') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with visitcnts as | |
( | |
select d.subject_id, count(distinct d.hadm_id) as visits | |
from diagnoses_icd d | |
group by d.subject_id | |
), diag as | |
( | |
select visitcnts.subject_id, d2.hadm_id, d2.icd9_code | |
from visitcnts | |
inner join diagnoses_icd d2 | |
on visitcnts.subject_id = d2.subject_id | |
where visits > 1 | |
) | |
select diag.*, a.admittime, | |
dense_rank() over ( | |
partition by diag.subject_id | |
order by a.admittime | |
) visit_idx | |
from diag | |
inner join admissions a | |
on diag.hadm_id = a.hadm_id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment