Created
November 29, 2023 19:42
-
-
Save amir16yp/5e54e5a3f06ac5d47e0f67dc6d1f2c78 to your computer and use it in GitHub Desktop.
YTS scraper/downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import requests | |
from bs4 import BeautifulSoup | |
from sys import argv | |
from os import getcwd | |
import subprocess | |
def ask_yes_no_question(question): | |
while True: | |
user_response = input(question + " (yes/no): ").strip().lower() | |
if user_response == "yes": | |
return True | |
elif user_response == "no": | |
return False | |
else: | |
print("Please enter 'yes' or 'no'.") | |
def download_torrent(torrent_url, download_directory): | |
# Build the aria2c command with necessary options | |
command = [ | |
'aria2c', | |
'--dir', download_directory, # Specify the download directory | |
torrent_url, # URL of the torrent file or magnet link | |
] | |
try: | |
# Run the aria2c command | |
subprocess.run(command, check=True) | |
print("Torrent download completed successfully!") | |
except subprocess.CalledProcessError as e: | |
print(f"Error: {e}") | |
def get_movie_info(query): | |
# URL with the query parameter | |
url = f"https://yts.mx/browse-movies/{query}/all/all/0/latest/0/all" | |
movie_info_dict = {} # Initialize an empty dictionary to store the movie information | |
try: | |
# Send an HTTP GET request | |
response = requests.get(url) | |
# Check if the request was successful | |
if response.status_code == 200: | |
# Parse the HTML content of the page | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Find all div elements whose class attribute starts with "browse-movie-wrap" | |
movie_wraps = soup.find_all('div', class_=lambda x: x and x.startswith("browse-movie-wrap")) | |
# Loop through the movie wraps and extract information | |
for movie_wrap in movie_wraps: | |
# Find the a tag within the movie wrap | |
movie_link = movie_wrap.find('a') | |
# Get the href attribute of the a tag | |
href = movie_link['href'] | |
# Find the figure tag under the movie wrap | |
figure = movie_wrap.find('figure') | |
# Find the img tag under the figure | |
img = figure.find('img') | |
# Get the src and alt attributes of the img tag | |
img_src = img['src'] | |
img_alt = img['alt'] | |
# Strip the title and remove "download" from the last part | |
img_alt = img_alt.strip().rsplit(" download", 1)[0] | |
# Add the title and link to the dictionary | |
movie_info_dict[img_alt] = href | |
return movie_info_dict | |
else: | |
print(f"Failed to retrieve data. Status code: {response.status_code}") | |
except requests.exceptions.RequestException as e: | |
print(f"An error occurred: {e}") | |
return {} | |
def get_movie_torrent_links(movie_link): | |
try: | |
# Send an HTTP GET request to the movie link | |
response = requests.get(movie_link) | |
# Check if the request was successful | |
if response.status_code == 200: | |
# Parse the HTML content of the page | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Find all <a> tags whose href attributes start with "https://yts.mx/torrent/download/" | |
torrent_links = soup.find_all('a', href=lambda x: x and x.startswith("https://yts.mx/torrent/download/")) | |
# Create a list of dictionaries to store the links and their text | |
torrent_info_list = [] | |
# Keep track of encountered torrent links | |
encountered_links = set() | |
for torrent_link in torrent_links: | |
# Get the href and text of the <a> tag | |
href = torrent_link['href'] | |
text = torrent_link.get_text() | |
# Check if the link is not a duplicate | |
if text != "Download" and href not in encountered_links: | |
# Add the href and text to the list as a dictionary | |
torrent_info_list.append({'torrent': href, 'quality': text}) | |
# Add the link to the set of encountered links | |
encountered_links.add(href) | |
return torrent_info_list | |
else: | |
print(f"Failed to retrieve data from {movie_link}. Status code: {response.status_code}") | |
except requests.exceptions.RequestException as e: | |
print(f"An error occurred: {e}") | |
return [] | |
# Example usage: | |
query = input("Enter a movie query: ") | |
movies_info = get_movie_info(query) | |
if not movies_info: | |
print("No movies found.") | |
else: | |
# Display a numbered list of movie titles | |
for i, movie_title in enumerate(movies_info.keys(), 1): | |
print(f"{i} - {movie_title}") | |
# Let the user select a movie by entering the corresponding number | |
selection = int(input("Enter the number of the movie you want to select: ")) | |
# Get the selected movie's link | |
selected_movie = list(movies_info.values())[selection - 1] | |
# Get torrent links for the selected movie | |
torrent_links = get_movie_torrent_links(selected_movie) | |
if not torrent_links: | |
print("No torrent links found for the selected movie.") | |
else: | |
# Display a numbered list of torrent qualities | |
for i, torrent_info in enumerate(torrent_links, 1): | |
print(f"{i} - Quality: {torrent_info['quality']}") | |
# Let the user select a torrent quality by entering the corresponding number | |
quality_selection = int(input("Enter the number of the torrent quality you want to download: ")) | |
selected_torrent = torrent_links[quality_selection - 1] | |
print(f"Selected torrent quality: {selected_torrent['quality']}") | |
print(f"Download link: {selected_torrent['torrent']}") | |
if ask_yes_no_question("Download this torrent using aria2c?"): | |
print("Downloading torrent using aria2c...") | |
download_torrent(selected_torrent['torrent'], getcwd()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment