|
#!/usr/bin/env python |
|
# -*- coding: UTF-8 -*- |
|
from pybots import HTTPBot |
|
from tinyscript import * |
|
|
|
|
|
__author__ = "Alexandre D'Hondt" |
|
__version__ = "1.0" |
|
__copyright__ = ("A. D'Hondt", 2020) |
|
__license__ = "gpl-3.0" |
|
__doc__ = """ |
|
This tool helps download videos of the courses from PentesterAcademy. It can also compress the videos. |
|
""" |
|
__examples__ = [ |
|
"\"__strip_mid=...; SACSID=~...\" 30 --compress", |
|
"\"__strip_mid=...; SACSID=~...\" 10 11 12 --crf 20", |
|
] |
|
|
|
BANNER_FONT = "standard" |
|
BANNER_STYLE = {'fgcolor': "lolcat"} |
|
SCRIPTNAME_FORMAT = "none" |
|
|
|
|
|
class PTADownloader(HTTPBot): |
|
def __init__(self, cookie, *args, **kwargs): |
|
super(PTADownloader, self).__init__("https://www.pentesteracademy.com", *args, **kwargs) |
|
self._set_cookie(cookie) |
|
|
|
def count(self): |
|
self.get("/members?options=accountlogs") |
|
for p in self.soup.find("div", {'id': "content"}).find_all("p"): |
|
if p.text.startswith("Total plays allowed based on subscription date "): |
|
self.total = int(p.find_all("b")[1].text) |
|
if p.text.startswith("Total plays till date = "): |
|
self.downloaded = int(p.find("b").text) |
|
self.logger.warning("%d of %d downloads remaining" % (self.total - self.downloaded, self.total)) |
|
|
|
def download(self, course_id, crf=0): |
|
self.get("/course?id=%d" % course_id) |
|
title = self.soup.find_all("section", {'class': "static-banner"})[0].text.strip() |
|
self.logger.info("Course: " + title) |
|
root = ts.Path("%d_%s" % (course_id, ts.slugify(title)), create=True) |
|
# first, collect links from the page of the selected course |
|
video_pages = [] |
|
for media in self.soup.find_all("div", {'class': "media"}): |
|
for a in media.find_all("a"): |
|
video_pages.append(a.attrs['href']) |
|
l, r = len(video_pages), self.total - self.downloaded |
|
if l > r: |
|
self.logger.warning("You may have too few downloads remaining to get the full course (remaining: %d ; " |
|
"needed: %d)" % (r, l)) |
|
# then, process download links |
|
processed, i = [], {} |
|
for video_page in video_pages: |
|
self.get(video_page) |
|
# collect unique download links (MP4, ZIP archives with code, examples, etc) |
|
dlinks = [] |
|
for div in self.soup.find_all("div", {'class': "active"}): |
|
for a in div.find_all("a"): |
|
l = a.attrs['href'] |
|
if l not in dlinks: |
|
dlinks.append(l) |
|
# now, process them, resolving clash names (e.g. code.zip for multiple modules) |
|
for dlink in dlinks: |
|
self.get(dlink, allow_redirects=False) |
|
try: |
|
dlink = self.response.headers['Location'] |
|
except KeyError: |
|
continue # this appends with non-download links |
|
url = ts.urlparse(dlink) |
|
u = "%s://%s%s" % (url.scheme, url.netloc, url.path) |
|
if u in processed: |
|
continue |
|
processed.append(u) |
|
filename = url.path.split("/")[-1] |
|
i.setdefault(filename, 0) |
|
if i[filename] > 0: |
|
n, e = os.path.splitext(filename) |
|
filename = "%s-%i%s" % (n, i[filename], e) |
|
i[filename] += 1 |
|
fp = root.joinpath(filename) |
|
if not fp.exists(): |
|
PTADownloader.process(dlink, fp, crf) |
|
else: |
|
self.logger.warning("Skipping %s..." % filename) |
|
|
|
@staticmethod |
|
def execute(cmd): |
|
logger.debug(" ".join(cmd)) |
|
ts.execute(cmd) |
|
|
|
@staticmethod |
|
def process(url, path, crf=0): |
|
logger.info("Getting '%s'..." % path.filename) |
|
PTADownloader.execute(["wget", "-O", str(path), url]) |
|
if crf > 0 and path.suffix == ".mp4": |
|
logger.info("Compressing '%s'..." % path.filename) |
|
TMP = ts.Path(path.dirname, ".tmp.mp4") |
|
PTADownloader.execute(["ffmpeg", "-i", str(path), "-vcodec", "libx265", "-crf", str(crf), str(TMP)]) |
|
path.remove() |
|
TMP.rename(path.absolute()) |
|
|
|
|
|
if __name__ == '__main__': |
|
parser.add_argument("cookie", help="session cookie") |
|
parser.add_argument("cid", nargs="+", type=ts.pos_int, help="course ID's") |
|
parser.add_argument("-c", "--compress", action="store_true", help="compress videos after download") |
|
parser.add_argument("--crf", type=ts.pos_int, default=30, help="compression factor") |
|
initialize() |
|
ptad = PTADownloader(args.cookie, args.verbose) |
|
for cid in args.cid: |
|
ptad.count() |
|
ptad.download(cid, args.crf if args.compress or args.crf != 30 else 0) |