Last active
March 6, 2021 00:43
-
-
Save pandada8/ae212014b18bb4620bb0f684921147d6 to your computer and use it in GitHub Desktop.
dumpfolder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from requests_html import HTMLSession | |
import os | |
import sys | |
import urllib | |
from xmlrpc.client import ServerProxy | |
session = HTMLSession() | |
def addLink(link, name): | |
with ServerProxy("http://localhost:6800/rpc") as s: | |
ret = s.aria2.addUri([link], {"out": name}) | |
print(ret) | |
def getLinkOfPage(link): | |
print(f'access {urllib.parse.unquote(link)}') | |
r = session.get(link) | |
links = [] | |
for i in r.html.links: | |
if 'caddyserver.com' in i: | |
continue | |
if i[0] == '/': | |
continue | |
if i[0] == '?': | |
continue | |
if i[:2] == '..': | |
continue | |
if i[:2] == './': | |
i = os.path.join(link, i[2:]) | |
else: | |
i = os.path.join(link, i) | |
links.append(i) | |
return links | |
def walkUrl(url): | |
files = set() | |
stack = [url] | |
while len(stack) > 0: | |
url = stack.pop() | |
links = getLinkOfPage(url) | |
for l in links: | |
if l[-1] == '/': | |
stack.append(l) | |
else: | |
files.add(l) | |
return files | |
def getFolderName(url): | |
names = url.split('/') | |
for i in names[::-1]: | |
if i != "": | |
return urllib.parse.unquote(i) | |
def main(): | |
if len(sys.argv) > 1: | |
for folder in sys.argv[1:]: | |
files = walkUrl(folder) | |
base = getFolderName(folder) | |
for i in files: | |
postfix = os.path.splitext(i)[1] | |
name = os.path.join(base, urllib.parse.unquote(os.path.relpath(i, folder))) | |
print('add', urllib.parse.unquote(i)) | |
print(name) | |
addLink(i, name) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment