Skip to content

Instantly share code, notes, and snippets.

@av1d
Last active January 30, 2025 04:04
Show Gist options
  • Save av1d/812b3e762749d462d09f3424876a8637 to your computer and use it in GitHub Desktop.
Save av1d/812b3e762749d462d09f3424876a8637 to your computer and use it in GitHub Desktop.
Video Sitemap Browser enables you to browse and search video sitemap XML files for titles and meta content, allowing you to conveniently download video from a user-friendly menu.
#!/usr/bin/python3
import json
import os
import readline
import requests
import subprocess
import xml.etree.ElementTree as ET
from rich.console import Console
console = Console()
version = "1.1"
# Fixed bug of crashing if there are no existing sitemaps.
# Now on first run, all sitemaps will be downloaded automatically.
# https://gist.github.com/av1d/
# ◍◍ ╭┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅╮
# ╭┅┅┅╯ ▄ ▄▄▄▄▄ ███ │
# ┇ █ █ ▀▄ █ █ │
# ┇ █ █ ▄ ▀▀▀▀▄ █ ▀ ▄ │
# ┇ █ █ ▀▄▄▄▄▀ █ ▄▀ │
# ┇ █ █ ███ │
# ┇ █▐ │
# ┇ ▐ ╭Video Sitemap Browser╯
# ┗┅┅┅┅┅┅╯ by av1d ◍◍
#
# Enables you to browse and search
# video sitemap XML files for titles
# and meta content, allowing you to
# conveniently download them from a
# user-friendly menu.
#
#
# You can locate video-sitemap.xml files by searching like this:
# - https://www.google.com/search?q=%22video-sitemap.xml%22+filetype%3Axml
# - https://www.google.com/search?q=inurl%3A%22video-sitemap.xml%22
# Or, just try somerandomwebsite.com/video-sitemap.xml and see if they have one.
#
#
# INSTALLATION:
#
# If you are using the download feature, you must install yt-dlp:
# - https://github.com/yt-dlp/yt-dlp
#
# Install the Python modules:
# pip install gnureadline requests rich
#
# CONFIGURATION:
#
# This is where the URLs to your sitemaps will go. They're currently
# stored here so they can be updated from within the program. Filenames
# must be unique or will overwrite each other. The format is:
# URL : local filename
urls = {
'https://videosite.zzz/video-sitemap.xml': 'videosite-video-sitemap.xml',
'https://anothervideosite.zzz/video-sitemap.xml': 'anothervideosite-video-sitemap.xml'
}
# That's it! You're done!
def clear_screen():
os.system('cls' if os.name == 'nt' else 'clear')
def parse_sitemap(file_path):
tree = ET.parse(file_path)
root = tree.getroot()
videos = []
for url in root.findall('.//{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
for video in url.findall('.//{http://www.google.com/schemas/sitemap-video/1.1}video'):
title_elem = video.find('.//{http://www.google.com/schemas/sitemap-video/1.1}title')
player_loc_elem = video.find('.//{http://www.google.com/schemas/sitemap-video/1.1}player_loc')
description_elem = video.find('.//{http://www.google.com/schemas/sitemap-video/1.1}description')
thumbnail_loc_elem = video.find('.//{http://www.google.com/schemas/sitemap-video/1.1}thumbnail_loc')
tag_elems = video.findall('.//{http://www.google.com/schemas/sitemap-video/1.1}tag')
title = title_elem.text if title_elem is not None else "No title"
player_loc = player_loc_elem.text if player_loc_elem is not None else "No player link available"
description = description_elem.text if description_elem is not None else "No description"
thumbnail_loc = thumbnail_loc_elem.text if thumbnail_loc_elem is not None else "No thumbnail"
tags = [tag.text for tag in tag_elems if tag.text]
videos.append({
'title': title,
'player_loc': player_loc,
'description': description,
'thumbnail_loc': thumbnail_loc,
'tags': tags
})
return sorted(videos, key=lambda x: x['title'].lower())
def display_titles(videos, start_index, per_page):
for i, video in enumerate(videos[start_index:start_index+per_page], start=start_index+1):
console.print(f"[bold]{i}[/bold]. {video['title']}", markup=True, highlight=False)
def search_videos(videos, query):
return [video for video in videos if query.lower() in video['title'].lower()]
def search_video_meta(videos, query):
return [video for video in videos if
(video['description'] != "No description" and query.lower() in video['description'].lower()) or
any(query.lower() in tag.lower() for tag in video['tags'])]
"""
def download_sitemaps():
print("Updating sitemaps ...")
for url, filename in urls.items():
try:
response = requests.get(url)
response.raise_for_status()
with open(filename, 'wb') as file:
file.write(response.content)
print(f"Successfully downloaded and saved {filename}")
input("\nPress Enter to continue...")
return True
except requests.exceptions.RequestException as e:
print(f"Error downloading {url}: {e}")
input("\nPress Enter to continue...")
return False
"""
def download_sitemaps():
print("Updating sitemaps ...")
success = True
for url, filename in urls.items():
try:
response = requests.get(url)
response.raise_for_status()
with open(filename, 'wb') as file:
file.write(response.content)
print(f"Successfully downloaded and saved {filename}")
except requests.exceptions.RequestException as e:
print(f"Error downloading {url}: {e}")
success = False
input("\nPress Enter to continue...")
return success
def choose_sitemap():
while True:
clear_screen()
print("Choose sitemap:")
keys = list(urls.keys())
for map_num, sitemap_value in enumerate(urls.values(), start=1):
console.print(f"[bold]{map_num}[/bold]. {sitemap_value}", markup=True, highlight=False)
choice = input("Enter your choice: ").strip()
if choice.isdigit() and 1 <= int(choice) <= len(keys):
return urls[keys[int(choice) - 1]]
else:
print("Invalid choice. Please try again.")
input("\nPress Enter to continue...")
def download_video(url):
try:
# Get video info without downloading
print(f"Fetching video info for: {url}")
process = subprocess.Popen(['yt-dlp', '-J', '--no-playlist', url],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
universal_newlines=True)
stdout, stderr = process.communicate()
if process.returncode != 0:
print(f"Error fetching video info: {stderr.strip()}")
if "404" in stderr:
print("The video is not available (404 Not Found).")
return
try:
video_info = json.loads(stdout)
except json.JSONDecodeError:
print("Error parsing video information. The video might be unavailable.")
return
# check if the video is available
if 'title' in video_info and 'duration' in video_info:
# video seems to be available, proceed with download
print(f"Downloading: {video_info['title']}")
download_process = subprocess.Popen(['yt-dlp', '-P', '.', '--no-playlist', url],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
universal_newlines=True)
# print download progress
while True:
output = download_process.stdout.readline()
if output == '' and download_process.poll() is not None:
break
if output:
print(output.strip())
if download_process.returncode == 0:
print(f"Successfully downloaded video: {video_info['title']}")
else:
print("Download failed.")
stderr_output = download_process.stderr.read()
print(f"Error output during download: {stderr_output.strip()}")
else:
print("The video appears to be unavailable or deleted.")
except FileNotFoundError:
print("yt-dlp is not installed or not in the system PATH")
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")
input("\nPress Enter to continue...")
def handle_command_history(event):
line = readline.get_line_buffer()
if event.keyname == 'up':
try:
index = command_history.index(line)
if index > 0:
readline.insert_text(command_history[index - 1])
readline.redisplay()
except ValueError:
pass
elif event.keyname == 'down':
try:
index = command_history.index(line)
if index < len(command_history) - 1:
readline.insert_text(command_history[index + 1])
readline.redisplay()
except ValueError:
pass
return readline.parse_and_bind_default_key_bindings(event)
def main():
file_path = choose_sitemap()
#videos = parse_sitemap(file_path)
try:
videos = parse_sitemap(file_path)
except:
print("Sitemaps not found. Downloading...")
download_sitemaps()
videos = parse_sitemap(file_path)
page = 0
per_page = 10
command_history = []
readline.parse_and_bind('up-history: handle_command_history')
readline.parse_and_bind('down-history: handle_command_history')
while True:
clear_screen()
console.print(f"Video Sitemap Browser - [bold]{file_path}[/bold]")
print("=" * 40)
display_titles(videos, page * per_page, per_page)
console.print("\nCommands: ([bold]↑↓[/bold] recalls command history)")
console.print("[bold]n[/bold] - Next page")
console.print("[bold]p[/bold] - Previous page")
console.print("[bold]s <query>[/bold] - Search video titles", markup=True, highlight=False)
console.print("[bold]m <query>[/bold] - Search video meta", markup=True, highlight=False)
console.print("[bold]l <number>[/bold] - Show video player link", markup=True, highlight=False)
console.print("[bold]d <number>[/bold] - Download video", markup=True, highlight=False)
console.print("[bold]u[/bold] - Update sitemaps")
console.print("[bold]c[/bold] - Switch to another sitemap")
console.print("[bold]q[/bold] - Quit")
command = input("\nEnter command: ").strip().lower()
command_history.append(command)
if command == 'n':
if (page + 1) * per_page < len(videos):
page += 1
elif command == 'p':
if page > 0:
page -= 1
elif command.startswith('l ') or command.startswith('d '):
try:
index = int(command.split()[1]) - 1
if 0 <= index < len(videos):
print(f"\nVideo: '{videos[index]['title']}'")
print(f"Link: {videos[index]['player_loc']}")
if command.startswith('d '):
download_video(videos[index]['player_loc'])
else:
input("\nPress Enter to continue...")
else:
print("Invalid number.")
input("\nPress Enter to continue...")
except ValueError:
print("Invalid input. Please enter a number.")
input("\nPress Enter to continue...")
elif command.startswith('s '):
query = command[2:]
search_results = search_videos(videos, query)
if search_results:
print(f"\nSearch results for title '{query}':")
for i, video in enumerate(search_results, start=1):
print(f"{i}. {video['title']}")
input("\nPress Enter to continue...")
else:
print(f"No title results found for '{query}'")
input("\nPress Enter to continue...")
elif command.startswith('m '):
query = command[2:]
search_results = search_video_meta(videos, query)
if search_results:
print(f"\nSearch results for metadata '{query}':")
for i, video in enumerate(search_results, start=1):
print(f"{i}. {video['title']}")
if query.lower() in video['description'].lower():
print(f" Description Match: {video['description']}")
matching_tags = [tag for tag in video['tags'] if query.lower() in tag.lower()]
if matching_tags:
print(f" Matching Tags: {', '.join(matching_tags)}")
input("\nPress Enter to continue...")
else:
print(f"No metadata results found for '{query}'")
input("\nPress Enter to continue...")
elif command == 'u':
download_sitemaps()
videos = parse_sitemap(file_path)
page = 0
elif command == 'c':
file_path = choose_sitemap()
videos = parse_sitemap(file_path)
page = 0
elif command == 'q':
break
else:
print("Invalid command.")
input("\nPress Enter to continue...")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment