Created
June 28, 2024 16:05
-
-
Save mundane799699/dac6f01905fc9c358b686c9ec4ffa9ba to your computer and use it in GitHub Desktop.
抖音爬虫
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from playwright.sync_api import sync_playwright, Page | |
from openpyxl import Workbook | |
import time | |
from datetime import datetime | |
# 安装依赖 | |
# pip install playwright | |
# playwright install | |
# 创建 Excel 文件 | |
wb = Workbook() | |
ws = wb.active | |
ws.append(["名称", "抖音号", "链接", "获赞", "粉丝数"]) | |
def main(): | |
with sync_playwright() as p: | |
browser = p.chromium.launch_persistent_context(headless=False, user_data_dir='douyin') | |
page = browser.new_page() | |
page.on("response", lambda response: handle_response(response)) | |
page.goto("https://www.douyin.com/search/%E9%B9%BF%E5%AE%89%E6%8C%87%E7%BA%B9%E9%94%81?type=user") | |
# 等60秒,你可以操作一些验证码或者短信验证。如果没有验证码,可以把时间改短一点。 | |
page.wait_for_timeout(60000) | |
last_height = page.evaluate("document.body.scrollHeight") | |
while True: | |
page.evaluate('window.scrollTo(0, document.body.scrollHeight)') | |
time.sleep(3) | |
new_height = page.evaluate("document.body.scrollHeight") | |
if new_height == last_height: | |
print("End of page reached") | |
break | |
last_height = new_height | |
timestamp = datetime.now().strftime("%Y%m%d%H%M%S") | |
filename = f'抖音_{timestamp}.xlsx' | |
wb.save(filename) | |
print(f"数据已保存到 {filename}") | |
browser.close() | |
def handle_response(response): | |
if "aweme/v1/web/discover/search" in response.url: | |
data = response.json() | |
for user in data['user_list']: | |
user_info = user['user_info'] | |
nickname = user_info['nickname'] | |
unique_id = user_info['unique_id'] | |
sec_uid = user_info['sec_uid'] | |
link = f"https://www.douyin.com/user/{sec_uid}" | |
total_favorited = user_info['total_favorited'] | |
follower_count = user_info['follower_count'] | |
ws.append([nickname, unique_id, link, total_favorited, follower_count]) | |
print(f"名称:{nickname},抖音号:{unique_id},链接:{link},获赞:{total_favorited},粉丝数:{follower_count}") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment