Last active
August 13, 2024 22:28
-
-
Save tashrifbillah/557d5564c511e5f35e63ff93a5d4ad97 to your computer and use it in GitHub Desktop.
Troubleshoot omission of all PRESCIENT cases from ampscz_psychs01_{baseline,month_1,month_2}
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# cd /data/predict1/to_nda/nda-submissions/network_combined | |
df=pd.read_csv('/data/predict1/home/dm1447/data/ampscz_all_subjects.csv') | |
df.set_index('subject_id',inplace=True) | |
dfpre=pd.read_excel('form_status_tracker_PRESCIENT.xlsx') | |
dfpre.set_index('subject',inplace=True) | |
dfpsychs=pd.read_csv('original/ampscz_psychs01_baseline.csv',header=1,dtype=str) | |
for i,row in dfpsychs.iterrows(): | |
s=row['src_subject_id'] | |
if df.loc[s,'network']=='PRESCIENT': | |
print(s,dfpre.loc[s,['psychs_p1p8_fu_baseline','psychs_p9ac32_fu_baseline']].values) | |
# how many of the original PRESCIENT subjects have empty psychs_screening columns | |
count=0 | |
for i,row in dfpsychs.iterrows(): | |
s=row['src_subject_id'] | |
if df.loc[s,'network']=='PRESCIENT': | |
if pd.isna(dfpre.loc[s,'psychs_p1p8_screening']) and pd.isna(dfpre.loc[s,'psychs_p9ac32_screening']): | |
count+=1 | |
print(s,dfpre.loc[s,['psychs_p1p8_fu_baseline','psychs_p9ac32_fu_baseline']].values) | |
print(count) | |
# how many of the original ProNET subjects have empty psychs_screening columns | |
dfpro=pd.read_excel('form_status_tracker_PRONET.xlsx') | |
dfpro.set_index('subject',inplace=True) | |
count=0 | |
for i,row in dfpsychs.iterrows(): | |
s=row['src_subject_id'] | |
if df.loc[s,'network']=='ProNET': | |
if pd.isna(dfpro.loc[s,'psychs_p1p8_screening']) and pd.isna(dfpro.loc[s,'psychs_p9ac32_screening']): | |
count+=1 | |
print(s,dfpro.loc[s,['psychs_p1p8_fu_baseline','psychs_p9ac32_fu_baseline']].values) | |
print(count) | |
# files are saved in /tmp/ of dn020 | |
tracker=pd.read_csv('/tmp/combined_tracker.csv') | |
tracker.set_index('subject',inplace=True) | |
for i,row in dfpsychs.iterrows(): | |
s=row['src_subject_id'] | |
if df.loc[s,'network']=='PRESCIENT': | |
print(tracker.loc[s,'psychs_baseline']) | |
# output shows that all original/ampscz_psychs01_baseline.csv PRESCIENT cases are "omit" | |
# next diagnosis should be why all are omit | |
# ======================================================================================= | |
# working code for loading p1p8 CSV files and generating one combined CSV | |
dfcomb=None | |
for f1 in p1p8_files: | |
s=basename(f1).split('_')[0] | |
df1=pd.read_csv(f1,dtype=str) | |
# extract baseline | |
_df1=df1.set_index('visit') | |
try: | |
__df1=_df1.loc['2'] | |
except KeyError: | |
continue | |
if _df1.loc[['2']].shape[0]>1: | |
print(s) | |
continue | |
dict1={} | |
dict1['src_subject_id']=s | |
for v in vars: | |
if v in df1.columns: | |
dict1[v]=__df1[v] | |
dfbase=pd.DataFrame.from_dict([dict1]) | |
if dfcomb is None: | |
dfcomb=dfbase.copy() | |
else: | |
dfcomb=pd.concat([dfcomb,dfbase],axis=0,sort=False) | |
print(dfcomb.shape) | |
# working code for loading JSONs and generating one combined CSV | |
dfcomb2=None | |
for s in dfcomb['src_subject_id'].values: | |
f1=f'Prescient{s[:2]}/raw/{s}/surveys/{s}.Prescient.json' | |
with open(f1) as f: | |
data=json.load(f) | |
dict1={} | |
dict1['src_subject_id']=s | |
for d in data: | |
if d['redcap_event_name']=='baseline_arm_1': | |
for v in vars: | |
if v in dict1: | |
dict1[v]=d[v] | |
break | |
dfbase=pd.DataFrame.from_dict([dict1]) | |
if dfcomb2 is None: | |
dfcomb2=dfbase.copy() | |
else: | |
dfcomb2=pd.concat([dfcomb2,dfbase],axis=0,sort=False) | |
print(dfcomb2.shape) | |
# keep only common columns | |
drop=[] | |
for c in dfcomb2.columns: | |
if c not in dfcomb.columns: | |
drop.append(c) | |
dfcomb2.drop(drop,axis=1,inplace=True) | |
# issue with this approach is that HC follow up forms colluded into the combined df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment