Created
June 20, 2023 22:14
-
-
Save tashrifbillah/8241393de66554f26f85a3909a9c4570 to your computer and use it in GitHub Desktop.
Check status of AVL combined QC records and transcripts' existence
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# execute it from within /data/predict1/data_from_nda/Prescient/PHOENIX/GENERAL | |
from os.path import dirname, join as pjoin | |
import pandas as pd | |
from glob import glob | |
files=glob('*/processed/*/interviews/open/*_combinedQCRecords.csv') | |
j=0 | |
for file in files: | |
df=pd.read_csv(file) | |
day=df.loc[0,'day'] | |
template=pjoin(dirname(file),f'transcripts/*_interviewAudioTranscript_open_day{day:04}_session*_REDACTED.txt') | |
transcript_file=glob(template) | |
if len(transcript_file)==1: | |
# print(transcript_file[0]) | |
j+=1 | |
elif len(transcript_file)>1: | |
print(transcript_file) | |
else: | |
print(template, 'does not exist') | |
print('Total combinedQCRecords.csv', len(files)) | |
print('Total transcript files',j) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment