Created
August 16, 2019 16:25
-
-
Save ipconfiger/378718c7220fb975bba1a992ad376f29 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf8 | |
import os | |
import time | |
import request | |
from lxml import html | |
def get_page_url(pid): | |
if pid == 1: | |
return "http://www.imeitou.com/nvsheng/mnns/index.html" | |
else: | |
return "http://www.imeitou.com/nvsheng/mnns/index_{}.html".format(pid) | |
def main(): | |
page = request.get_url('http://www.imeitou.com/nvsheng/mnns/index.html') | |
root = html.document_fromstring(page) | |
ele = root.xpath('/html/body/div[5]/div[2]/ul/div/div/span[3]/strong[1]') | |
page_number = int(ele[0].text) | |
for pid in range(1, page_number+1): | |
page_url = get_page_url(pid) | |
page = request.get_url(page_url) | |
page_root = html.document_fromstring(page) | |
eles = page_root.xpath('/html/body/div[5]/div[2]/ul/li/a/img') | |
for ele in eles: | |
image_url = ele.attrib['src'] | |
img = request.get_url(image_url) | |
local_path = os.path.join(os.getcwd(), 'images', image_url.split('/')[-1]) | |
with open(local_path, 'wb') as f: | |
f.write(img) | |
time.sleep(0.1) | |
time.sleep(0.2) | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf8 | |
import requests | |
def get_url(url): | |
session = requests.Session() | |
session.headers.update({ | |
'Origin': 'http://www.imeitou.com', | |
'Referer': 'http://www.imeitou.com/nvsheng/mnns/index.html', | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36', | |
'Cookie': 'Hm_lvt_1ef6d22326fe2c0f9411d7294ca6d902=1565681953,1565682000; Hm_lpvt_1ef6d22326fe2c0f9411d7294ca6d902=1565950344' | |
}) | |
return session.get(url).content | |
if __name__=="__main__": | |
print(get_url('http://www.imeitou.com/nvsheng/mnns/index.html')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment