Last active
January 9, 2022 08:44
-
-
Save Kingson/5703956 to your computer and use it in GitHub Desktop.
下载读远网站电子书 V1.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# coding=utf-8 | |
__author__ = 'Kingson zhou' | |
import requests | |
import os | |
import time | |
from bs4 import BeautifulSoup | |
import random | |
""" | |
说明: | |
1.此脚本是用来下载“读远”(http://www.readfar.com/)上的电子书,以防备突然被关站。 | |
2.本脚本主要使用requests库、BeautifulSoup库以及random函数 | |
3.脚本中的Cookie和add_comments函数中的authenticity_token值和当前登录用户有关,需要自行替换 | |
""" | |
header = {"User-Agent": "Mozilla/5.0(Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1)\ | |
# Gecko/20090624 Firefox/3.5", "Cookie": "angle_session=BAh7CEkiD3Nlc3Npb25faWQGOgZFRkkiJWEyOTAzODA0ZmNhNWY1NDdjZmRiMDY1ZTk1NTAyZGQ2BjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMVFheUs4cW85d1AzS1diVkIwVFM0OXVDcmZsQWVDajVDaW1nQlNkV3VtQlE9BjsARkkiDHVzZXJfaWQGOwBGVToaTW9wZWQ6OkJTT046Ok9iamVjdElkIhFRq%2FXegr6cG7QAAAI%3D--6bd72d49961e0a563077b14992ab80536cd4d98c; remember_token=51abf5de82be9c1bb4000002%24a0564e924a2ccdf23b052ecbf60e8dbbf784472a90b3888fe5d0d447ef3ad4f4; _ga=1.2.616581869.1370224027; Hm_lvt_866bd67df0812fb6fba5538f31acc23b=1370224027; Hm_lpvt_866bd67df0812fb6fba5538f31acc23b=1370236606"} | |
main_url = 'http://www.readfar.com' | |
msg = ['正在看,还可以', '一本非常不错的书,值得大家阅读。', '正在找这本书,谢谢', '谢谢分享'] | |
def save(bookename): | |
if os.path.exists(bookename) is False: | |
os.mkdir(bookename) | |
return True | |
else: | |
return False | |
def add_comments(book_list_url): | |
""" | |
由于下载电子书,需要消耗karma,大概1M需耗费1karma,如需要大量下载, | |
还需提前通过给书籍添加评论来储备karma,自认为不太道德,造成垃圾评论之嫌,望站长见谅 | |
""" | |
get_book_list_url_request = requests.get(book_list_url) | |
main_soup = BeautifulSoup(get_book_list_url_request.content) | |
book_list_url_tag = main_soup.find_all("a", attrs={"class": "book thumbnail"}) | |
for book_url in book_list_url_tag: | |
book_name = book_url.find('h4').string | |
book_url = book_url["href"] | |
get_book_details_request = requests.get( | |
book_url, headers=header) | |
main_soup = BeautifulSoup(get_book_details_request.content) | |
book_comments_url = main_soup.find( | |
"form", attrs={"class": "new_comment"}) | |
# for book_comments_url in book_comments_url_tag: | |
book_comments_url = main_url + book_comments_url["action"] | |
payload = {"utf8": "✓", | |
"authenticity_token": *****, #与当前登录用户有关,需要自己抓包分析 | |
"comment[content]": random.choice(msg), | |
"commit": "确定", | |
"sync": 0} | |
add_book_comments = requests.post( | |
book_comments_url, data=payload, headers=header) | |
if add_book_comments.status_code == 200: | |
print book_name, "评论添加成功" | |
else: | |
print book_name, "评论添加失败" | |
time.sleep(10) | |
def download_book(book_list_url): | |
""" | |
从给定的List页面中找到进入每本书的详情页面的URL,再在详情页面中找到下载的地址,进行下载即可 | |
""" | |
while book_list_url is not False: | |
get_book_list_url_request = requests.get(book_list_url) | |
main_soup = BeautifulSoup(get_book_list_url_request.content) | |
book_list_url_tag = main_soup.find_all("a", attrs={"class": "book thumbnail"}) | |
for book_url in book_list_url_tag: | |
book_name = book_url.find('h4').string | |
book_url = book_url["href"] | |
get_book_details_request = requests.get(book_url) | |
book_details_soup = BeautifulSoup(get_book_details_request.content) | |
book_details_tag = book_details_soup.find_all( | |
"a", attrs={"data-toggle": "tooltip"}) | |
for book_download_url in book_details_tag: | |
book_formart = book_download_url["class"][0] | |
book_download_url = book_download_url["href"] | |
if save(book_name) is False: | |
break | |
else: | |
book_download_url = main_url + book_download_url | |
book_download_request = requests.get( | |
book_download_url, headers=header) | |
with open(book_name + "/" + book_name + "." + book_formart, "wb") as code: | |
code.write(book_download_request.content) | |
print book_name, book_download_url, "Download Done." | |
time.sleep(10) | |
try: | |
get_book_list_url_request = requests.get(book_list_url) | |
main_soup = BeautifulSoup(get_book_list_url_request.content) | |
book_next_url_tag = main_soup.find("li", attrs={"class": "next"}) | |
book_list_url = book_next_url_tag.find('a')['href'] | |
except Exception: | |
print "Download Success!" | |
break | |
if __name__ == '__main__': | |
book_list_url = 'http://www.readfar.com/books/best?sort=update' | |
download_book(book_list_url) | |
# add_comments("http://www.readfar.com/books?p=57") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment