Skip to content

Instantly share code, notes, and snippets.

@lisongx
Created March 24, 2012 11:44
Show Gist options
  • Save lisongx/2181456 to your computer and use it in GitHub Desktop.
Save lisongx/2181456 to your computer and use it in GitHub Desktop.
获得最see.xidian.edu.cn近几日的新闻
#!/usr/bin/env python
import requests
from lxml import html
from datetime import datetime, timedelta
# get news in last X days
DAY = 3
# url of the news page
BASE_URL = "http://see.xidian.edu.cn/"
# get the page content
r = requests.get(BASE_URL)
# override the encoding
r.encoding = 'GBK'
doc = html.document_fromstring(r.text)
today = datetime.today()
def get_today_news():
trs = doc.cssselect('table[width="98%"] tr')
for tr in trs:
img_tag = tr.cssselect('img')
date_tag = tr.cssselect('font')
if img_tag and date_tag:
tr_date = date_tag[0].text_content().strip('[]')
tr_timedelta = today - datetime.strptime(tr_date, "%Y-%m-%d")
if tr_timedelta < timedelta(DAY):
link = tr.cssselect('a')[0].get('href')
print tr.text_content(), BASE_URL + link
if __name__ == '__main__':
get_today_news()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment