Last active
March 16, 2019 02:49
-
-
Save xsthunder/35f507b02fc6cc24f9e57fade2c57aa4 to your computer and use it in GitHub Desktop.
split str by regex,search each str with regex using grouping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for splitting by regex see https://blog.csdn.net/programmer_at/article/details/77409507 | |
spliters = [ | |
r';', | |
] | |
reg = '(胰腺|肝脏)(.+)' | |
key_names=['部位','情况'] | |
text = '胰腺未见局灶性占位。; 肝脏未见明显局灶性占位。;yes' | |
import re | |
def f(spliters, reg, key_names, text, throw_on_unmatch = False): | |
arr = re.split('|'.join( | |
map( | |
lambda x:re.escape(x), | |
spliters, | |
) | |
), | |
text, | |
) | |
arr = map(lambda x:x.strip(), arr) | |
arr = filter(lambda x:len(x)>0,arr) | |
def f(one_text): | |
ret = {} | |
re_ret = re.search(reg, one_text) | |
if(re_ret != None): | |
for index,name in enumerate(key_names): | |
ret[name] = re_ret.group(index+1) # group 0 is the original str | |
else : | |
if(throw_on_unmatch):raise Exception("unmatch pattern:'%s' in '%s'"%(reg, one_text) ) | |
for index,name in enumerate(key_names): | |
ret[name] = '' | |
return ret | |
return list(map(f, arr)) | |
f(spliters, reg, key_names, text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment