Skip to content

Instantly share code, notes, and snippets.

@synodriver
Created May 30, 2022 03:39
Show Gist options
  • Select an option

  • Save synodriver/b8770df33849c48604c059892ffeef83 to your computer and use it in GitHub Desktop.

Select an option

Save synodriver/b8770df33849c48604c059892ffeef83 to your computer and use it in GitHub Desktop.
import scrapy
from scrapy.http import TextResponse
import json
class BiliSpider(scrapy.Spider):
name = 'bili'
allowed_domains = ['www.bilibili.com', "api.bilibili.com"]
start_urls = [
'https://api.bilibili.com/x/v2/reply/main?jsonp=jsonp&next=0&type=1&oid=897003307&mode=3&plat=1&_=1653878127432']
custom_settings = {
"DEFAULT_REQUEST_HEADERS": {
":authority": "api.bilibili.com",
":method": "GET",
":scheme": "https",
# ":path":"/x/v2/reply/main?callback=jQuery172034851316477140637_1653878123353&jsonp=jsonp&next=0&type=1&oid=897003307&mode=3&plat=1&_=1653878127432",
"accept": "*/*",
"referer": "https://www.bilibili.com/video/BV1RA4y1Z7qP",
"cookie": "你的cookie",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39"
}
}
def parse(self, response: TextResponse, tp=0):
body = response.text
body = json.loads(body)
# pprint(body)
if not body["data"]["cursor"]["is_end"]:
yield scrapy.Request(response.url.replace(f"next={tp}", f"next={tp + 1}"), cb_kwargs={"tp": tp + 1})
try:
for rp in body["data"]["replies"]:
print(rp["member"]["uname"], rp["content"]["ipv6"])
except KeyError:
print("没有ip 怪 retrying")
yield scrapy.Request(response.url, cb_kwargs={"tp": tp}, dont_filter=True)
return
except TypeError:
print("end")
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment