quxiaowei · July 14, 2016 01:41
diff --git a/delete_tag.py b/delete_tag.py
 #  delete specific tags from html file
 #  by quxiaowei 
 #  @ 20160702

 i = j = 0
 text = """
 <p class="p"><b><span style="font-family: 'Times New Roman': font-size: 12pt;">一、岗位及人数</span></b><span style="font-family: 'Times New Roman'; font-size: 12pt;"><o:p></o:p></span></p><p class="p"><span style="font-family: 'Times New Roman'; font-size: 12pt;">办公室工作人员，<font face="Times New Roman">1</font><font face="微软雅黑">名；教师，</font><font face="Times New Roman">4</font><font face="微软雅黑">名。</font></span><font class="   
 """
 text_o = ''

 tags = set(['p', '/p', 'span', '/span'])

 for i in range(0, len(text)):
    if text[i] == '<':
        text_o = text_o + text[j:i]
        j = i + 1
    elif text[i] == '>':
        tag = text[j:i].strip().split(' ')[0]
        print(tag)
        if tag in tags:
            text_o = text_o + text[j-1:i+1]
        j = i + 1

 print(text_o)

 i = j = 0
 text_o = ''

 while i < len(text):
    try:
        i = text.index('<', j)
        text_o += text[j+1:i]
        j = text.index('>', i)
    except:
        break
    tag = text[i+1:j].strip().split(' ')[0]
    if tag in tags:
        text_o += text[i:j+1]
 text_o += text[j+1:]

 print(text_o)
	# delete specific tags from html file
	# by quxiaowei
	# @ 20160702

	i = j = 0
	text = """
	<p class="p"><b><span style="font-family: 'Times New Roman': font-size: 12pt;">一、岗位及人数</span></b><span style="font-family: 'Times New Roman'; font-size: 12pt;"><o:p></o:p></span></p><p class="p"><span style="font-family: 'Times New Roman'; font-size: 12pt;">办公室工作人员，<font face="Times New Roman">1</font><font face="微软雅黑">名；教师，</font><font face="Times New Roman">4</font><font face="微软雅黑">名。</font></span><font class="
	"""
	text_o = ''

	tags = set(['p', '/p', 'span', '/span'])

	for i in range(0, len(text)):
	if text[i] == '<':
	text_o = text_o + text[j:i]
	j = i + 1
	elif text[i] == '>':
	tag = text[j:i].strip().split(' ')[0]
	print(tag)
	if tag in tags:
	text_o = text_o + text[j-1:i+1]
	j = i + 1

	print(text_o)

	i = j = 0
	text_o = ''

	while i < len(text):
	try:
	i = text.index('<', j)
	text_o += text[j+1:i]
	j = text.index('>', i)
	except:
	break
	tag = text[i+1:j].strip().split(' ')[0]
	if tag in tags:
	text_o += text[i:j+1]
	text_o += text[j+1:]

	print(text_o)