Skip to content

Instantly share code, notes, and snippets.

@YoukouTenhouin
Created May 11, 2013 10:12
Show Gist options
  • Save YoukouTenhouin/5559498 to your computer and use it in GitHub Desktop.
Save YoukouTenhouin/5559498 to your computer and use it in GitHub Desktop.
前N头像抓取器
#!/usr/bin/python2.7
import urllib2
import httplib
import re
import sys
import PIL.Image
usernamere = re.compile('username="?(.*)"')
portraitre = re.compile(r"'portrait' : '?(.*)',")
users = []
def GetUserImageByName(username):
while True:
try:
page = urllib2.urlopen('http://www.baidu.com/p/'+username)
pcont = page.read()
try:
imagename = portraitre.findall(pcont)[0]
except IndexError:
page = urllib2.urlopen('http://tieba.baidu.com/i/sys/jump?un='+username)
pcont = page.read()
imagename = portraitre.findall(pcont)[0]
return 'http://himg.bdimg.com/sys/portrait/item/%s'%imagename
except httplib.IncompleteRead:
continue
break
return 'http://tb.himg.baidu.com/sys/portrait/item/%s'%imagename
def GetArgValueByKey(key):
return sys.argv[sys.argv.index(key)+1]
def GetPageNumByRank(rank):
return int((rank-1)/20)+1
if __name__ == '__main__':
tieba_name = GetArgValueByKey('tieba')
tieba_name = urllib2.quote(tieba_name.decode(sys.stdin.encoding).encode('gbk'))
try:
begin = int(GetArgValueByKey('from'))
except ValueError:
begin = 0
end = int(GetArgValueByKey('to'))
page_begin = GetPageNumByRank(begin)
page_end = GetPageNumByRank(end)
for i in range(page_begin,page_end+1):
print ('Getting page %d'%(i))
url = ('http://tieba.baidu.com/f/like/furank?kw=%s&pn=%d'%(tieba_name,i))
print 'URL:' + url
page = urllib2.urlopen(url).read()
users+=usernamere.findall(page)
try:
count = int(GetArgValueByKey('countbegin'))
except ValueError:
count = 1
users = users[(begin%20)-1:end]
for user in users:
print ('%s -> %d.jpg'%(urllib2.unquote(user).decode('gbk'),count))
file = open(str(count) + '.jpg','w')
try:
file.write(urllib2.urlopen(GetUserImageByName(user)).read())
except IndexError:
print 'IndexError,Create Blank Image'
blank = PIL.Image.new('RGB',(110,110),(0,0,0))
blank.save(file,'jpeg')
finally:
file.close()
count+=1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment