Skip to content

Instantly share code, notes, and snippets.

@s1ntoneli
Last active July 30, 2017 21:15
Show Gist options
  • Select an option

  • Save s1ntoneli/f4c21ed62a0ae69560ec9094f41d557f to your computer and use it in GitHub Desktop.

Select an option

Save s1ntoneli/f4c21ed62a0ae69560ec9094f41d557f to your computer and use it in GitHub Desktop.
妹子图
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2017-07-31 03:00:45
# Project: meizitu_new
from pyspider.libs.base_handler import *
import time
class Handler(BaseHandler):
crawl_config = {
}
def __init__(self):
self.deal = Deal()
@every(minutes=24 * 60)
def on_start(self):
self.crawl('http://m.meizitu.com/a/list_1_2.html', callback=self.index_page)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for each in response.doc('div[class="am-gallery-item"]').items():
self.mkCategory(each)
self.crawl(each('a').attr.href, callback=self.detail_page)
time.sleep(1.5)
@config(priority=2)
def detail_page(self, response):
title = response.doc('h2 a').text()
print ("category is " + title)
for each in response.doc('p img').items():
self.saveImgSrc(each.attr.src, title)
print (each.attr.src)
def mkCategory(self, entry):
title = entry('h3').text()
src = entry('img').attr('data-original')
print (title)
print (src)
self.deal.mkCategory(title)
self.saveImgSrc(src, title)
def saveImgSrc(self, src, category):
self.deal.saveImgSrc(src, category)
import os
#import time
DIR_PATH = time.strftime('%Y-%m-%d')
class Deal:
def __init__(self):
self.path = DIR_PATH
if not self.path.endswith('/'):
self.path = self.path + '/'
if not os.path.exists(self.path):
os.makedirs(self.path)
def mkDir(self, path):
path = path.strip()
dir_path = self.path + path
exists = os.path.exists(dir_path)
if not exists:
os.makedirs(dir_path)
return dir_path
else:
return dir_path
def mkCategory(self, category):
f = open(self.path + category, 'w')
f.close()
def saveImgSrc(self, src, category):
path = self.path + category
print('saving ' + category + ' ' + src)
f = open(path, 'a')
f.write(src + '\n')
f.close()
def saveImg(self, content, path):
f = open(path, 'wb')
f.write(content)
f.close()
def saveBrief(self, content, dir_path, name):
file_name = dir_path + "/" + name + ".txt"
f = open(file_name, "w+")
f.write(content.encode('utf-8'))
def getExtension(self, url):
extension = url.split('.')[-1]
return extension
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment