|
import cloudscraper |
|
import requests |
|
from bs4 import BeautifulSoup |
|
import os |
|
|
|
# Directory to save videos |
|
SAVE_DIR = os.getcwd() |
|
|
|
# Initialize cloudscraper |
|
scraper = cloudscraper.create_scraper() |
|
|
|
# Function to download a file |
|
def download_file(url, save_path): |
|
try: |
|
response = scraper.get(url, stream=True) |
|
response.raise_for_status() |
|
chunkCount = 0 |
|
with open(save_path, 'wb') as file: |
|
for chunk in response.iter_content(chunk_size=1048576): |
|
chunkCount = chunkCount + 1 |
|
print(f"Downloaded {chunkCount} MBs to {save_path}") |
|
file.write(chunk) |
|
print(f"Downloaded Completed: {save_path}") |
|
except requests.RequestException as e: |
|
print(f"Error downloading {url}: {e}") |
|
|
|
# Function to get the video URL from a page |
|
def get_video_url(page_id): |
|
try: |
|
url = f"https://tv.infowars.com/index/display/id/{page_id}" |
|
response = scraper.get(url) |
|
response.raise_for_status() |
|
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
a_tags = soup.find_all('a', string='mp4') |
|
|
|
for a in a_tags: |
|
video_url = a.get('href') |
|
if video_url and video_url.endswith('.mp4'): |
|
return video_url |
|
except requests.RequestException as e: |
|
print(f"Error fetching page {page_id}: {e}") |
|
return None |
|
|
|
# Main function to iterate through pages and download videos |
|
def download_videos(start_id, end_id): |
|
for page_id in range(start_id, end_id + 1): |
|
video_url = get_video_url(page_id) |
|
if video_url: |
|
file_name = video_url.split('/')[-1] |
|
save_path = os.path.join(SAVE_DIR, file_name) |
|
download_file(video_url, save_path) |
|
else: |
|
print(f"No MP4 link found on page {page_id}") |
|
|
|
# Start and end page IDs |
|
START_ID = 1 |
|
END_ID = 13762 |
|
|
|
# Start downloading videos |
|
download_videos(START_ID, END_ID) |