mengdiwang · April 25, 2018 05:56
diff --git a/regex.py b/regex.py
 # coding=utf8
 # the above tag defines encoding for this document and is for Python 2.x compatibility

 import re

 regex = r'<a target="_blank" title="(.*?)" href="(.*?)">(.*?)</a>'

 test_str = "<p>　　二〇一八年四月二十日<br><br><a target=\"_blank\" title=\"上海证券交易所股票上市规则（2018年4月修订）\" href=\"/aboutus/mediacenter/hotandd/a/20180420/4a6cd527bc7bcac21dd66a2abb60d3d5.doc\">上海证券交易所股票上市规则（2018年4月修订）<br></a><a target=\"_blank\" title=\"《上海证券交易所股票上市规则（2018年4月修订）》修订说明\" href=\"/aboutus/mediacenter/hotandd/a/20180420/f086a86397ee054f984443a8ac9d1f3a.doc\">《上海证券交易所股票上市规则（2018年4月修订）》修订说明</a><br><br type=\"_moz\">&nbsp;</p>"

 matches = re.finditer(regex, test_str, re.MULTILINE | re.UNICODE)

 for matchNum, match in enumerate(matches):
    matchNum = matchNum + 1
    
    print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
    
    for groupNum in range(0, len(match.groups())):
        groupNum = groupNum + 1
        
        print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))

 # Note: for Python 2.7 compatibility, use ur'' to prefix the regex and u"" to prefix the test string and substitution.
	# coding=utf8
	# the above tag defines encoding for this document and is for Python 2.x compatibility

	import re

	regex = r'<a target="_blank" title="(.?)" href="(.?)">(.*?)</a>'

	test_str = "<p>　　二〇一八年四月二十日<br><br><a target=\"_blank\" title=\"上海证券交易所股票上市规则（2018年4月修订）\" href=\"/aboutus/mediacenter/hotandd/a/20180420/4a6cd527bc7bcac21dd66a2abb60d3d5.doc\">上海证券交易所股票上市规则（2018年4月修订）<br></a><a target=\"_blank\" title=\"《上海证券交易所股票上市规则（2018年4月修订）》修订说明\" href=\"/aboutus/mediacenter/hotandd/a/20180420/f086a86397ee054f984443a8ac9d1f3a.doc\">《上海证券交易所股票上市规则（2018年4月修订）》修订说明</a><br><br type=\"_moz\"> </p>"

	matches = re.finditer(regex, test_str, re.MULTILINE \| re.UNICODE)

	for matchNum, match in enumerate(matches):
	matchNum = matchNum + 1

	print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))

	for groupNum in range(0, len(match.groups())):
	groupNum = groupNum + 1

	print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))

	# Note: for Python 2.7 compatibility, use ur'' to prefix the regex and u"" to prefix the test string and substitution.
No results found