Created
June 7, 2025 09:11
-
-
Save X-Gorn/da4dfe297aece147c58fd1c908d8d21e to your computer and use it in GitHub Desktop.
Tiktok Posts Scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from curl_cffi import requests | |
from bs4 import BeautifulSoup | |
data = [] | |
picuki = requests.get('https://www.picuki.com/profile/linustech', impersonate="chrome") | |
soup = BeautifulSoup(picuki.text, 'html.parser') | |
for post in soup.select('.posts-container a'): | |
title = post.img.get('alt') if post.img else None | |
item = post.find('div', {'class': 'posts__video-item-story-download popup-open'}) | |
music = item.get('data-music') if item else None | |
url = item.get('data-source') if item else None | |
data.append({'title': title, 'music': music, 'source': url}) | |
load_more = soup.find('div', {'class': 'posts-load-more'}) | |
is_load_more = True if load_more else False | |
is_first = True | |
next_url = None | |
while is_load_more: | |
if not is_first: | |
load_more = soup.find('input', {'class': 'pagination-next-page-input'}) | |
if not load_more: | |
is_load_more = False | |
continue | |
next_url_path = load_more.get('data-next') if load_more.get('data-next') else load_more.get('value') | |
next_url = 'https://www.picuki.com'+next_url_path | |
picuki = requests.get(next_url, impersonate="chrome") | |
soup = BeautifulSoup(picuki.text, 'html.parser') | |
for post in soup.select('a'): | |
title = post.img.get('alt') if post.img else None | |
item = post.find('div', {'class': 'posts__video-item-story-download popup-open'}) | |
music = item.get('data-music') if item else None | |
url = item.get('data-source') if item else None | |
data.append({'title': title, 'music': music, 'source': url}) | |
is_first = False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment