Created
June 28, 2013 16:45
-
-
Save esehara/5886126 to your computer and use it in GitHub Desktop.
はてなブックマークのお気に入りユーザーで、既にアクティヴではないユーザーを調べる ref: http://qiita.com/esehara@github/items/d595c89c52a81052bf42
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from bs4 import BeautifulSoup | |
import sys | |
import urllib | |
def _init(): | |
if len(sys.argv) == 1: | |
print "usage: lastbookmark.py user_name" | |
sys.exit(1) | |
def get_userlist(): | |
follow_users = [] | |
for page in range(3): | |
html = urllib.urlopen( | |
"http://b.hatena.ne.jp/%s/follow?of=%d" % ( | |
sys.argv[1], page * 200)).read() | |
soup = BeautifulSoup(html) | |
soup_userlist = soup.find_all('a', {'class': 'username'}) | |
follow_users += [userlist.text for userlist in soup_userlist] | |
return follow_users | |
def get_last_bookmark(user): | |
html = urllib.urlopen( | |
"http://b.hatena.ne.jp/%s/fragment" % user).read() | |
soup = BeautifulSoup(html) | |
time = soup.find('span', {'class': 'timestamp'}) | |
print user, time.text | |
return (user, time.text) | |
def target_user(analize_list): | |
print "-----------------------------------" | |
print "--- You should remove user list ---" | |
print "-----------------------------------" | |
for user, date in analize_list: | |
date = date.split('/') | |
if date[0] != "2013": | |
print user, "/".join(date) | |
def command(): | |
_init() | |
users = get_userlist() | |
analize_list = [] | |
for user in users: | |
analize_list.append((get_last_bookmark(user))) | |
target_user(analize_list) | |
if __name__ == "__main__": | |
command() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment