Last active
May 2, 2021 18:05
-
-
Save KokoseiJ/1172a8129d22b7074b56f4b9f76abc56 to your computer and use it in GitHub Desktop.
Clone OneIndex recursively
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Requires requests, bs4 module. | |
# also you need wget binary on your system too. | |
# First argument is the URL to the target oneindex- this one is necessary. | |
# Second argument is the folder of the name where the contents will be stored- | |
# defaults to 'oneindex_download' if not provided. | |
# Other arguments will be ignored. | |
# | |
import os | |
import sys | |
import time | |
import requests | |
from urllib.parse import urljoin | |
from bs4 import BeautifulSoup as bs | |
if len(sys.argv) < 2: | |
print("Error: Site URL not provided") | |
sys.exit(1) | |
grab_url = sys.argv[1] | |
name = sys.argv[2] if len(sys.argv) >= 3 else "oneindex_download" | |
def request(url, method, stream=False): | |
r = requests.request(method, url, stream=stream) | |
if r.status_code != 200: | |
print(f"Error: {r.status_code} while trying {url}. Retrying...") | |
time.sleep(1) | |
return request(url, method, stream) | |
return r | |
def get_lists(html): | |
soup = bs(html, features='lxml') | |
items = soup.find("div", {'class': 'items'}).find_all("a", {'class': 'item'}) | |
return [item['href'] for item in items] | |
def download_dir(url, name): | |
print(f"Downloading folder '{name}'...") | |
try: | |
os.mkdir(name) | |
except FileExistsError: | |
pass | |
os.chdir(name) | |
url = url[:-1] if url.endswith("/") else url | |
r = request(url, "GET") | |
filelist = get_lists(r.text) | |
for file in filelist: | |
if file.endswith("/"): | |
download_dir(urljoin(url, file), file.replace("/", "")) | |
else: | |
download_file(urljoin(url, file)) | |
os.chdir("..") | |
def download_file(url): | |
print(f"Downloading file '{url.rsplit('/')[-1]}'...") | |
os.system(f"wget '{url}'") | |
print("Finished!") | |
return | |
download_dir(grab_url, name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment