Last active
December 15, 2015 21:49
-
-
Save greatghoul/5328784 to your computer and use it in GitHub Desktop.
BeautifulSoup 获取标签属性
http://segmentfault.com/q/1010000000185964
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from BeautifulSoup import BeautifulSoup | |
import re | |
def get_timestamp(html): | |
soup = BeautifulSoup(html) | |
authi_elems = soup.findAll('div', { 'class': 'authi' }) | |
for authi_elem in authi_elems: | |
date_elem = authi_elem.find('span', title=re.compile(r'20\d{2}-\d+-\d+\s\d{2}:\d{2}:\d{2}')) | |
print date_elem.get('title') | |
# print date_elem | |
if __name__ == '__main__': | |
html = """ | |
<div class="authi"> | |
<img class="authicn vm" id="" src="static/image/common/online_member.gif" /> | |
<em id="">发表于 <span title="2013-4-2 08:30:11">4 天前</span></em> | |
<span class="pipe">|</span><a href=>倒序浏览</a> | |
<div style="float:right;position:relative;top:-6px;"><ul> | |
<li style="float: left; width: 130px; height: 24px;"><wb:follow-button uid="" type="gray_2" width="136" height="24"></wb:follow-button></li> | |
<li style=" float:left; width: 160px; height: 24px;"></li> | |
</ul></div></div> | |
""" | |
get_timestamp(html) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment