Created
April 23, 2021 17:26
-
-
Save dmd/a2d1b7d6f02a2428ac4eba35cd7c76c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import re | |
import datetime | |
def title(s,d): | |
if datetime.datetime.strptime(d,'%Y%m%d') < datetime.datetime(2003, 1, 1, 0, 0): | |
return 'do_not_file','do_not_file' | |
ret = re.sub('[^0-9a-zA-Z]+', ' ', s) | |
wordlist = ( | |
'mri research', | |
'research', | |
'converted', | |
'mclean', | |
'development', | |
'investigators', | |
'investigator', | |
'run this one', | |
'archived protocols', | |
'archivedprotocols', | |
) | |
changed = False | |
for word in wordlist: | |
rpl = re.compile(re.escape(word), re.IGNORECASE) | |
old = ret | |
ret = rpl.sub('', ret) | |
if old != ret: | |
changed = True | |
ret = re.sub('\s+',' ', ret) | |
ret = re.sub('^\s','',ret) | |
ret = re.sub('\s$','',ret) | |
if ret == '': | |
return 'do_not_file', 'do_not_file' | |
special = ('EAOLSON', 'Silveri', 'Forester', 'Frederic', 'Ongur') | |
if not changed and ret not in special: | |
return 'do_not_file', 'do_not_file' | |
if s.startswith('3P') or s.startswith('3T') or 'clinical' in s.lower(): | |
return 'do_not_file', 'do_not_file' | |
return (ret, re.sub(' ','_',ret).lower()) | |
# print ('Most recently used\toriginal StudyDescription\tProject') | |
with open("studynames-mostrecentdate.txt") as f: | |
for line in f: | |
d,raw = line.split('\t') | |
raw = raw.rstrip() | |
ret = title(raw,d) | |
l= [ret[1], ret[0], raw] | |
if not ret[0] == 'do_not_file': | |
print(*l, sep='\t') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment