Skip to content

Instantly share code, notes, and snippets.

@dmd
Created April 23, 2021 17:26
Show Gist options
  • Save dmd/a2d1b7d6f02a2428ac4eba35cd7c76c8 to your computer and use it in GitHub Desktop.
Save dmd/a2d1b7d6f02a2428ac4eba35cd7c76c8 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import re
import datetime
def title(s,d):
if datetime.datetime.strptime(d,'%Y%m%d') < datetime.datetime(2003, 1, 1, 0, 0):
return 'do_not_file','do_not_file'
ret = re.sub('[^0-9a-zA-Z]+', ' ', s)
wordlist = (
'mri research',
'research',
'converted',
'mclean',
'development',
'investigators',
'investigator',
'run this one',
'archived protocols',
'archivedprotocols',
)
changed = False
for word in wordlist:
rpl = re.compile(re.escape(word), re.IGNORECASE)
old = ret
ret = rpl.sub('', ret)
if old != ret:
changed = True
ret = re.sub('\s+',' ', ret)
ret = re.sub('^\s','',ret)
ret = re.sub('\s$','',ret)
if ret == '':
return 'do_not_file', 'do_not_file'
special = ('EAOLSON', 'Silveri', 'Forester', 'Frederic', 'Ongur')
if not changed and ret not in special:
return 'do_not_file', 'do_not_file'
if s.startswith('3P') or s.startswith('3T') or 'clinical' in s.lower():
return 'do_not_file', 'do_not_file'
return (ret, re.sub(' ','_',ret).lower())
# print ('Most recently used\toriginal StudyDescription\tProject')
with open("studynames-mostrecentdate.txt") as f:
for line in f:
d,raw = line.split('\t')
raw = raw.rstrip()
ret = title(raw,d)
l= [ret[1], ret[0], raw]
if not ret[0] == 'do_not_file':
print(*l, sep='\t')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment