Skip to content

Instantly share code, notes, and snippets.

@tashrifbillah
Last active October 13, 2022 15:50
Show Gist options
  • Save tashrifbillah/7645b653531dcc145efed3834cfb8ca9 to your computer and use it in GitHub Desktop.
Save tashrifbillah/7645b653531dcc145efed3834cfb8ca9 to your computer and use it in GitHub Desktop.
Script for comparing some REDCap and RPMS forms
#!/usr/bin/env python
from os import getcwd, chdir
import pandas as pd
import json
from glob import glob
var_col='field_name'
form_col='form_name'
type_col='field_type'
value_col='text_validation_type_or_show_slider_number'
df=pd.read_csv('/data/predict/utility/dict_diff/ampscz/ampscz_dict_20221011.csv',encoding='ISO-8859-1')
var_col='Variable / Field Name'
form_col='Form Name'
type_col='Field Type'
value_col='Choices, Calculations, OR Slider Labels'
df=pd.read_csv('/data/predict/utility/amp-scz-form/orig/AMPSCZFormRepository_DataDictionary_2022-10-13_checkbox.csv')
groups=df.groupby(form_col)
forms=['family_interview_for_genetic_studies_figs','psychs_p1p8','psychs_p9ac32','scid5_psychosis_mood_substance_abuse']
types=['text', 'radio', 'checkbox', 'dropdown', 'yesno']
json_event={'Pronet/PHOENIX/PROTECTED/PronetYA/raw/YA01508/surveys/YA01508.Pronet.json':
['screening_arm_1','screening_arm_1','screening_arm_1','baseline_arm_1'],
'Pronet/PHOENIX/PROTECTED/PronetPI/raw/PI01355/surveys/PI01355.Pronet.json':
['screening_arm_2','screening_arm_2','screening_arm_2','baseline_arm_2'],
'Pronet/PHOENIX/PROTECTED/PronetWU/raw/WU05257/surveys/WU05257.Pronet.json':
['screening_arm_1','screening_arm_1','screening_arm_1','baseline_arm_1']}
dfv=pd.DataFrame(columns=['variable','subject','value'])
dirbak=getcwd()
chdir('/data/predict/data_from_nda/')
i=0
for j,form in enumerate(forms):
print(form)
dfg= groups.get_group(form)
dfg.set_index(var_col, inplace=True)
for v,row in dfg.iterrows():
# skip the main variable in checkbox
# only use the expanded ones
if row['Field Type']=='checkbox' and '___' not in v:
continue
if row[type_col] in types:
# load json
for s in json_event.keys():
with open(s) as f:
dict1=json.load(f)
for d in dict1:
# go to screening event
if d['redcap_event_name']==json_event[s][j]:
# extract value
try:
dfv.loc[i]= [v, s.split('/')[-1].split('.Pronet.json')[0], d[v]]
i+=1
except KeyError:
pass
print(dfv.shape)
rpms_dirs={'Prescient/PHOENIX/PROTECTED/PrescientME/raw/ME21922':[1,1,1,2],
'Prescient/PHOENIX/PROTECTED/PrescientME/raw/ME78581':[1,1,1,2],
'Prescient/PHOENIX/PROTECTED/PrescientME/raw/ME22598':[1,1,1,2]}
for j,form in enumerate(forms):
print(form)
dfg= groups.get_group(form)
dfg.set_index(var_col, inplace=True)
for v,row in dfg.iterrows():
# skip the main variable in checkbox
# only use the expanded ones
if row['Field Type']=='checkbox' and '___' not in v:
continue
if row[type_col] in types:
# load csv
for d in rpms_dirs.keys():
try:
dfsub=pd.read_csv(glob(f'{d}/surveys/*{form}.csv')[0])
except IndexError:
# the form does not exist for this subject
continue
for _,subrow in dfsub.iterrows():
# go to screening event
if int(subrow['visit'])==rpms_dirs[d][j]:
# extract value
try:
dfv.loc[i]= [v, d.split('/')[-1], subrow[v]]
i+=1
except KeyError:
pass
dfv.to_csv('redcap_values_to_compare.csv',index=False)
chdir(dirbak)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment