Skip to content

Instantly share code, notes, and snippets.

@pandada8
Last active March 6, 2021 00:43
Show Gist options
  • Save pandada8/ae212014b18bb4620bb0f684921147d6 to your computer and use it in GitHub Desktop.
Save pandada8/ae212014b18bb4620bb0f684921147d6 to your computer and use it in GitHub Desktop.
dumpfolder
import requests
from requests_html import HTMLSession
import os
import sys
import urllib
from xmlrpc.client import ServerProxy
session = HTMLSession()
def addLink(link, name):
with ServerProxy("http://localhost:6800/rpc") as s:
ret = s.aria2.addUri([link], {"out": name})
print(ret)
def getLinkOfPage(link):
print(f'access {urllib.parse.unquote(link)}')
r = session.get(link)
links = []
for i in r.html.links:
if 'caddyserver.com' in i:
continue
if i[0] == '/':
continue
if i[0] == '?':
continue
if i[:2] == '..':
continue
if i[:2] == './':
i = os.path.join(link, i[2:])
else:
i = os.path.join(link, i)
links.append(i)
return links
def walkUrl(url):
files = set()
stack = [url]
while len(stack) > 0:
url = stack.pop()
links = getLinkOfPage(url)
for l in links:
if l[-1] == '/':
stack.append(l)
else:
files.add(l)
return files
def getFolderName(url):
names = url.split('/')
for i in names[::-1]:
if i != "":
return urllib.parse.unquote(i)
def main():
if len(sys.argv) > 1:
for folder in sys.argv[1:]:
files = walkUrl(folder)
base = getFolderName(folder)
for i in files:
postfix = os.path.splitext(i)[1]
name = os.path.join(base, urllib.parse.unquote(os.path.relpath(i, folder)))
print('add', urllib.parse.unquote(i))
print(name)
addLink(i, name)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment