dhondta · April 7, 2024 18:40
diff --git a/README.md b/README.md
diff --git a/pta-downloader.py b/pta-downloader.py
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 from pybots import HTTPBot
 from tinyscript import *


 __author__ = "Alexandre D'Hondt"
 __version__ = "1.0"
 __copyright__ = ("A. D'Hondt", 2020)
 __license__ = "gpl-3.0"
 __doc__ = """
 This tool helps download videos of the courses from PentesterAcademy. It can also compress the videos.
 """
 __examples__ = [
    "\"__strip_mid=...; SACSID=~...\" 30 --compress",
    "\"__strip_mid=...; SACSID=~...\" 10 11 12 --crf 20",
 ]

 BANNER_FONT  = "standard"
 BANNER_STYLE = {'fgcolor': "lolcat"}
 SCRIPTNAME_FORMAT = "none"


 class PTADownloader(HTTPBot):
    def __init__(self, cookie, *args, **kwargs):
        super(PTADownloader, self).__init__("https://www.pentesteracademy.com", *args, **kwargs)
        self._set_cookie(cookie)
    
    def count(self):
        self.get("/members?options=accountlogs")
        for p in self.soup.find("div", {'id': "content"}).find_all("p"):
            if p.text.startswith("Total plays allowed based on subscription date "):
                self.total = int(p.find_all("b")[1].text)
            if p.text.startswith("Total plays till date = "):
                self.downloaded = int(p.find("b").text)
        self.logger.warning("%d of %d downloads remaining" % (self.total - self.downloaded, self.total))
    
    def download(self, course_id, crf=0):
        self.get("/course?id=%d" % course_id)
        title = self.soup.find_all("section", {'class': "static-banner"})[0].text.strip()
        self.logger.info("Course: " + title)
        root = ts.Path("%d_%s" % (course_id, ts.slugify(title)), create=True)
        # first, collect links from the page of the selected course
        video_pages = []
        for media in self.soup.find_all("div", {'class': "media"}):
            for a in media.find_all("a"):
                video_pages.append(a.attrs['href'])
        l, r = len(video_pages), self.total - self.downloaded
        if l > r:
            self.logger.warning("You may have too few downloads remaining to get the full course (remaining: %d ; "
                                "needed: %d)" % (r, l))
        # then, process download links
        processed, i = [], {}
        for video_page in video_pages:
            self.get(video_page)
            # collect unique download links (MP4, ZIP archives with code, examples, etc)
            dlinks = []
            for div in self.soup.find_all("div", {'class': "active"}):
                for a in div.find_all("a"):
                    l = a.attrs['href']
                    if l not in dlinks:
                        dlinks.append(l)
            # now, process them, resolving clash names (e.g. code.zip for multiple modules)
            for dlink in dlinks:
                self.get(dlink, allow_redirects=False)
                try:
                    dlink = self.response.headers['Location']
                except KeyError:
                    continue  # this appends with non-download links
                url = ts.urlparse(dlink)
                u = "%s://%s%s" % (url.scheme, url.netloc, url.path)
                if u in processed:
                    continue
                processed.append(u)
                filename = url.path.split("/")[-1]
                i.setdefault(filename, 0)
                if i[filename] > 0:
                    n, e = os.path.splitext(filename)
                    filename = "%s-%i%s" % (n, i[filename], e)
                i[filename] += 1
                fp = root.joinpath(filename)
                if not fp.exists():
                    PTADownloader.process(dlink, fp, crf)
                else:
                    self.logger.warning("Skipping %s..." % filename)
    
    @staticmethod
    def execute(cmd):
        logger.debug(" ".join(cmd))
        ts.execute(cmd)
    
    @staticmethod
    def process(url, path, crf=0):
        logger.info("Getting '%s'..." % path.filename)
        PTADownloader.execute(["wget", "-O", str(path), url])
        if crf > 0 and path.suffix == ".mp4":
            logger.info("Compressing '%s'..." % path.filename)
            TMP = ts.Path(path.dirname, ".tmp.mp4")
            PTADownloader.execute(["ffmpeg", "-i", str(path), "-vcodec", "libx265", "-crf", str(crf), str(TMP)])
            path.remove()
            TMP.rename(path.absolute())


 if __name__ == '__main__':
    parser.add_argument("cookie", help="session cookie")
    parser.add_argument("cid", nargs="+", type=ts.pos_int, help="course ID's")
    parser.add_argument("-c", "--compress", action="store_true", help="compress videos after download")
    parser.add_argument("--crf", type=ts.pos_int, default=30, help="compression factor")
    initialize()
    ptad = PTADownloader(args.cookie, args.verbose)
    for cid in args.cid:
        ptad.count()
        ptad.download(cid, args.crf if args.compress or args.crf != 30 else 0)
diff --git a/requirements.txt b/requirements.txt
 tinyscript>=1.23.15
 pybots
diff --git a/video-compressor.py b/video-compressor.py
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 from tinyscript import *


 __author__ = "Alexandre D'Hondt"
 __version__ = "1.6"
 __copyright__ = ("A. D'Hondt", 2020)
 __license__ = "gpl-3.0"
 __doc__ = """
 This tool is a simple wrapper for taking a folder and running ffmpeg on the videos found in order to compress them.
 """
 __examples__ = ["my-folder", "my-folder --crf 20"]


 BANNER_FONT  = "standard"
 BANNER_STYLE = {'fgcolor': "lolcat"}
 SCRIPTNAME_FORMAT = "none"

 VIDEO_EXT    = [".avi", ".m4v", ".mkv", ".mov", ".mp4", ".wmv"]
 CMD_COMPR    = {
    'default': lambda v1, v2, crf: ["ffmpeg -i \"%s\" -vcodec libx265 -crf %d \"%s\"" % (v1, crf, v2)],
    '.m4v':    lambda v1, v2, crf: ["ffmpeg -i \"%s\" -vcodec libx265 -vf scale=720:-1 -crf %d \"%s\"" % \
                                    (v1, crf, v1.dirname.joinpath(".tmp.mp4")),
                                    "ffmpeg -i %s \"%s\"" % (v1.dirname.joinpath(".tmp.mp4"), v2),
                                    "rm -f %s" % v1.dirname.joinpath(".tmp.mp4")],
    '.wmv':    lambda v1, v2, crf: ["ffmpeg -i \"%s\" -vcodec wmv2 -crf %d \"%s\"" % (v1, crf, v2)],
 }


 def compress(path, crf=0):
    logger.info("Compressing '%s'..." % path)
    TMP = ts.Path(path.dirname, ".tmp" + path.suffix)
    TMP.remove(False)
    ts.Path(path.dirname, ".tmp.mp4").remove(False)
    for cmd in CMD_COMPR.get(path.suffix, CMD_COMPR['default'])(path, TMP, crf):
        logger.debug(cmd)
        _, err, rc = ts.execute(shlex.split(cmd), returncode=True)
        if rc > 0:
            logger.debug(ensure_str(err))
            logger.error("Failed to compress %s" % path)
            return
    path.remove()
    TMP.rename(path.absolute())


 if __name__ == '__main__':
    parser.add_argument("folder", help="folder where the videos to be handled are")
    parser.add_argument("--crf", type=ts.pos_int, default=30, help="compression factor")
    initialize()
    filter_videos = lambda p: p.suffix in VIDEO_EXT and p.stem != ".tmp" and not p.stem.startswith("._")
    for fp in ts.Path(args.folder).walk(filter_func=filter_videos):
        compress(fp, args.crf)
	#!/usr/bin/env python
	# -- coding: UTF-8 --
	from pybots import HTTPBot
	from tinyscript import *


	__author__ = "Alexandre D'Hondt"
	__version__ = "1.0"
	__copyright__ = ("A. D'Hondt", 2020)
	__license__ = "gpl-3.0"
	__doc__ = """
	This tool helps download videos of the courses from PentesterAcademy. It can also compress the videos.
	"""
	__examples__ = [
	"\"__strip_mid=...; SACSID=~...\" 30 --compress",
	"\"__strip_mid=...; SACSID=~...\" 10 11 12 --crf 20",
	]

	BANNER_FONT = "standard"
	BANNER_STYLE = {'fgcolor': "lolcat"}
	SCRIPTNAME_FORMAT = "none"


	class PTADownloader(HTTPBot):
	def __init__(self, cookie, args, *kwargs):
	super(PTADownloader, self).__init__("https://www.pentesteracademy.com", args, *kwargs)
	self._set_cookie(cookie)

	def count(self):
	self.get("/members?options=accountlogs")
	for p in self.soup.find("div", {'id': "content"}).find_all("p"):
	if p.text.startswith("Total plays allowed based on subscription date "):
	self.total = int(p.find_all("b")[1].text)
	if p.text.startswith("Total plays till date = "):
	self.downloaded = int(p.find("b").text)
	self.logger.warning("%d of %d downloads remaining" % (self.total - self.downloaded, self.total))

	def download(self, course_id, crf=0):
	self.get("/course?id=%d" % course_id)
	title = self.soup.find_all("section", {'class': "static-banner"})[0].text.strip()
	self.logger.info("Course: " + title)
	root = ts.Path("%d_%s" % (course_id, ts.slugify(title)), create=True)
	# first, collect links from the page of the selected course
	video_pages = []
	for media in self.soup.find_all("div", {'class': "media"}):
	for a in media.find_all("a"):
	video_pages.append(a.attrs['href'])
	l, r = len(video_pages), self.total - self.downloaded
	if l > r:
	self.logger.warning("You may have too few downloads remaining to get the full course (remaining: %d ; "
	"needed: %d)" % (r, l))
	# then, process download links
	processed, i = [], {}
	for video_page in video_pages:
	self.get(video_page)
	# collect unique download links (MP4, ZIP archives with code, examples, etc)
	dlinks = []
	for div in self.soup.find_all("div", {'class': "active"}):
	for a in div.find_all("a"):
	l = a.attrs['href']
	if l not in dlinks:
	dlinks.append(l)
	# now, process them, resolving clash names (e.g. code.zip for multiple modules)
	for dlink in dlinks:
	self.get(dlink, allow_redirects=False)
	try:
	dlink = self.response.headers['Location']
	except KeyError:
	continue # this appends with non-download links
	url = ts.urlparse(dlink)
	u = "%s://%s%s" % (url.scheme, url.netloc, url.path)
	if u in processed:
	continue
	processed.append(u)
	filename = url.path.split("/")[-1]
	i.setdefault(filename, 0)
	if i[filename] > 0:
	n, e = os.path.splitext(filename)
	filename = "%s-%i%s" % (n, i[filename], e)
	i[filename] += 1
	fp = root.joinpath(filename)
	if not fp.exists():
	PTADownloader.process(dlink, fp, crf)
	else:
	self.logger.warning("Skipping %s..." % filename)

	@staticmethod
	def execute(cmd):
	logger.debug(" ".join(cmd))
	ts.execute(cmd)

	@staticmethod
	def process(url, path, crf=0):
	logger.info("Getting '%s'..." % path.filename)
	PTADownloader.execute(["wget", "-O", str(path), url])
	if crf > 0 and path.suffix == ".mp4":
	logger.info("Compressing '%s'..." % path.filename)
	TMP = ts.Path(path.dirname, ".tmp.mp4")
	PTADownloader.execute(["ffmpeg", "-i", str(path), "-vcodec", "libx265", "-crf", str(crf), str(TMP)])
	path.remove()
	TMP.rename(path.absolute())


	if __name__ == '__main__':
	parser.add_argument("cookie", help="session cookie")
	parser.add_argument("cid", nargs="+", type=ts.pos_int, help="course ID's")
	parser.add_argument("-c", "--compress", action="store_true", help="compress videos after download")
	parser.add_argument("--crf", type=ts.pos_int, default=30, help="compression factor")
	initialize()
	ptad = PTADownloader(args.cookie, args.verbose)
	for cid in args.cid:
	ptad.count()
	ptad.download(cid, args.crf if args.compress or args.crf != 30 else 0)