Last active
September 20, 2019 03:24
-
-
Save warecrash/8de832ade7f2648d9e0221b0c7d9fd50 to your computer and use it in GitHub Desktop.
Download every file linked on a page
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib.request | |
import sys | |
import argparse | |
parser = argparse.ArgumentParser(description='Scrape a page for all the files and download them to the current directory.') | |
parser.add_argument('root', metavar='url', type=str, help='URL you want to download files from') | |
args = parser.parse_args() | |
if not args.root.endswith('/'): | |
print("[+] You forgot to add a / to the end of your url. Snatch is adding it for you.") | |
args.root = args.root+"/" | |
if not args.root.startswith('http'): | |
print("[+] You forgot to add http:// to the beginning of your url. Snatch is adding it for you.") | |
args.root = "http://" + args.root | |
print() | |
print ("[+] URL: " + args.root) | |
request = urllib.request.urlopen(args.root) | |
website = BeautifulSoup(request, features="html.parser", from_encoding=request.info().get_param('charset')) | |
for link in website.find_all('a', href=True): | |
file = urllib.parse.quote(link['href']) | |
filePath = args.root + file | |
try: | |
print("[+] Downloading " + filePath) | |
urllib.request.urlretrieve(filePath, file) | |
except: | |
print ("[-] Failed to download file at " + filePath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment