spicyjpeg · August 17, 2021 14:09
diff --git a/fnf_downloader.py b/fnf_downloader.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 """Friday Night Funkin' downloader

 I made this script to automate downloading of the FNF HTML5 version hosted on
 Newgrounds, as well as bootlegs and mods hosted by other people. Given an empty
 directory and the URL to index.html, this script downloads all assets and files
 required to play the game fully offline (no patching is done so FNF will retain
 Newgrounds functionality). Running downloaded games in a browser still requires
 a local web server, but you can easily use Python itself as a server by
 executing "python3 -m http.server 8000" from the same folder as index.html.

 This script is standalone and only depends on the requests library you can
 download using pip ("pip3 install requests"). Feel free to copy and use it in
 your mod's build pipeline or anywhere else (but please respect the license of
 anything you download). It should also work with other HTML5 games built with
 Lime/OpenFL/HaxeFlixel as long as they use the default asset loader.
 """

 __version__ = "0.1.0"
 __author__  = "spicyjpeg"

 import os, re, json, logging
 from time      import perf_counter
 from binascii  import a2b_base64
 from ast       import literal_eval
 from itertools import accumulate
 from argparse  import ArgumentParser, FileType

 from requests       import Session
 from requests.utils import unquote, urlparse, urlunparse

 USER_AGENT   = "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.7113.93 Safari/537.36"
 DEFAULT_URLS = {
 	"fnf":           "https://uploads.ungrounded.net/alternate/1528000/1528775_alternate_113347_r88.zip",
 	"fnf_prototype": "https://v6p9d9t4.ssl.hwcdn.net/html/2876359-359162",
 	"flappybalt":    "https://demos.haxeflixel.com/html5/Flappybalt"
 }

 ## Utilities

 def resolvePath(path):
 	"""
 	Resolves a path, removing "." and ".." as well as trailing slashes. This
 	always uses slashes, does not reference the filesystem at all and is safe
 	to use for URL paths.
 	"""

 	_path  = path.split("/")
 	output = []

 	for component in _path:
 		if (not component) or (component == "."):
 			continue
 		elif component == "..":
 			output.pop()
 		else:
 			output.append(component)

 	return "/".join(output)

 def ensureParentDir(path):
 	"""
 	Ensures a file with the given path can be saved by creating all parent
 	directories. Returns the OS-specific normalized (i.e. using backslashes on
 	Windows) path.
 	"""

 	_path = path.split("/")

 	for parent in accumulate(_path[:-1], os.path.join):
 		if parent and not os.path.isdir(parent):
 			os.mkdir(parent)

 	return os.path.join(*_path)

 def cleanURL(url):
 	"""
 	Removes query parameters, hashes, trailing slashes and "index.html" from
 	any URL.
 	"""

 	scheme, hostname, path, *_ = urlparse(url)

 	if path.endswith(( "/", "/index.html" )):
 		path, _ = path.rsplit("/", 1)

 	return urlunparse((
 		scheme,
 		hostname,
 		path,
 		"",
 		"",
 		""
 	))

 ## Haxe parser

 HAXE_INT_REGEX   = re.compile(r"[+-]?[0-9]+")
 HAXE_FLOAT_REGEX = re.compile(r"[+-]?[0-9]*(?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?")

 class HaxeParser:
 	"""
 	Simple parser class for Haxe's built-in serialization format. Almost all
 	basic data types are supported, with the exception of classes and dates.

 	https://haxe.org/manual/std-serialization-format.html
 	"""

 	def _parseInt(self, payload, positive = False):
 		"""
 		Parses an integer and returns a ( value, length ) tuple. Used
 		internally, do not call.
 		"""

 		match = HAXE_INT_REGEX.match(payload)
 		match = match.group()

 		if positive and int(match) < 0:
 			raise ValueError("expected positive integer")

 		return int(match), len(match)

 	def _parseFloat(self, payload):
 		"""
 		Parses a floating-point value and returns a ( value, length ) tuple.
 		Used internally, do not call.
 		"""

 		match = HAXE_FLOAT_REGEX.match(payload)
 		match = match.group()

 		return float(match), len(match)

 	def _parseString(self, payload, isBytes = False):
 		"""
 		Parses a string (or byte string) and returns a ( value, length ) tuple.
 		Used internally, do not call.
 		"""

 		length, data = payload.split(":", 1)
 		data         = data[0:int(length)]
 		totalLength  = len(length) + int(length) + 1

 		string = unquote(data)

 		if isBytes:
 			# Pad the base64 string to ensure its length is a multiple of 4
 			# characters (required since most base64 implementations decode 4
 			# characters at a time, as they map to exactly 3 bytes).
 			string += "=" * (4 - (len(data) % 4))

 			return a2b_base64(string), totalLength
 		else:
 			return string, totalLength

 	def _parseObject(self, payload):
 		"""
 		Parses an object or map and returns a ( value, length ) tuple. Used
 		internally, do not call.
 		"""

 		obj    = {}
 		offset = 0

 		# "g" terminates generic objects, while "h" is for specialized built-in
 		# types (maps in this case).
 		while payload[offset] not in "gh":
 			key, keyLength     = self._parse(payload[offset:])
 			value, valueLength = self._parse(payload[offset + keyLength:])

 			obj[key] = value
 			offset  += keyLength + valueLength

 		# Add 1 to the length to account for the terminator.
 		return obj, offset + 1

 	def _parseList(self, payload):
 		"""
 		Parses a list or array and returns a ( value, length ) tuple. Used
 		internally, do not call.
 		"""

 		obj    = []
 		offset = 0

 		while payload[offset] != "h":
 			item, length = self._parse(payload[offset:])

 			# Unpack any tuple (which is currently only used for consecutive
 			# nulls) into the array.
 			if type(item) is tuple:
 				obj.extend(item)
 			else:
 				obj.append(item)

 			offset += length

 		# Add 1 to the length to account for the terminator.
 		return obj, offset + 1

 	def _parse(self, data):
 		"""
 		Parses a type identifier followed by the respective payload and returns
 		a ( value, length ) tuple. This is the internal implementation of
 		parse(). 
 		"""

 		_type   = data[0]
 		payload = data[1:]

 		if _type in "Rr": # Cache back-reference
 			# Haxe "compresses" serialized data by saving all strings and
 			# objects into a cache, then inserting pointers to cache entries
 			# instead of any duplicate value.
 			value, length = self._parseInt(payload, True)
 			value         = {
 				"R": self.stringCache,
 				"r": self.objectCache
 			}[_type][value]

 		elif _type in "nzkmptf": # Constant
 			value = {
 				"n": None,
 				"z": 0,
 				#"k": math.nan,
 				#"m": -math.inf,
 				#"p": math.inf,
 				"t": True,
 				"f": False
 			}[_type]
 			length = 0

 		elif _type in "i:": # Integer (":" is used in integer maps)
 			value, length = self._parseInt(payload)

 		elif _type == "d": # Float
 			value, length = self._parseFloat(payload)

 		elif _type == "y": # String (same encoding as serialized string)
 			value, length = self._parseString(payload)
 			self.stringCache.append(value)

 		elif _type == "s": # Byte string
 			value, length = self._parseString(payload, True)

 		elif _type == "u": # Consecutive nulls (used in arrays)
 			value, length = self._parseInt(payload, True)
 			value         = ( None, ) * value

 		elif _type in "obqM": # Object/struct/string map/integer map
 			value, length = self._parseObject(payload)
 			# TODO: is cache only used for "o" as stated in docs?
 			self.objectCache.append(value)

 		elif _type in "la": # List or array
 			value, length = self._parseList(payload)

 		#elif _type == "v": # Date (???)
 			#value  = strptime(payload[0:19], "%Y-%m-%d %H:%M:%S")
 			#length = 19

 		else:
 			raise NotImplementedError(f"unsupported Haxe type '{_type}'")

 		# Add 1 to the length to account for the type identifier.
 		return value, length + 1

 	def parse(self, data):
 		"""
 		Deserializes a serialized string, i.e. a type identifier followed by
 		the respective payload. Unlike _parse(), this function also clears the
 		cache and performs length checking to make sure the whole string gets
 		parsed.
 		"""

 		self.stringCache = []
 		self.objectCache = []

 		value, length = self._parse(data)
 		if length != len(data):
 			raise RuntimeError("failed to parse extra data at end of string")

 		return value

 ## Downloader class

 # I know I shouldn't parse HTML using regex if I don't want some demon from a
 # parallel universe to kill me. But this works well enough.
 HTML_SCRIPT_REGEX = re.compile(r"<\s*?script.+?src=\"(.+?)\"\s*?>", re.IGNORECASE)
 HTML_FONT_REGEX   = re.compile(r"url\s*\(\s*(?P<q>['\"]?)(.+?\.(?:ttf|otf|woff2?))(?P=q)\s*\)")

 # OpenFL/HaxeFlixel knows which assets to load by using manifest files. FNF
 # takes advantage of this by having different manifests for each week (hosted
 # as separate JSON files) plus a "preload" manifest that lists the assets to
 # load on startup, which is embedded in the main JS file along with paths to
 # all other manifests. These regexes take care of extracting everything we need
 # from that script -- I hope they don't break with updates (even though we all
 # know ninjamuffin is likely never going to update anything other than the
 # Full-A$$ Game).
 MANIFEST_REGEX     = re.compile(r"'\{\s*?\"name\"\s*?:\s*?null\s*?,.*\}'|\"\{\s*?\\\"name\\\"\s*?:\s*?null\s*?,.*\}\"")
 EXT_MANIFEST_REGEX = re.compile(r"(?P<q>['\"])(manifest/.+?\.json)(?P=q)", re.IGNORECASE)

 class Downloader:
 	"""
 	This class handles pretty much everything.
 	"""

 	def __init__(
 		self,
 		root,
 		assetsOnly = False,
 		redownload = False,
 		whitelist  = None,
 		blacklist  = None
 	):
 		"""
 		Initializes the downloader with the given options.
 		"""

 		self.root       = cleanURL(root)
 		self.assetsOnly = assetsOnly
 		self.redownload = redownload
 		self.whitelist  = whitelist
 		self.blacklist  = blacklist

 		self.session = Session()
 		self.parser  = HaxeParser()

 		self.numAssets = 0
 		self.totalSize = 0

 		logging.debug(f"Root URL: {self.root}")

 	def _get(self, path):
 		"""
 		Performs a GET request and returns the response object. If assetsOnly
 		is disabled, the response's contents are also saved to a file.
 		"""

 		response = self.session.get(f"{self.root}/{path}")
 		if not response.ok:
 			logging.fatal(f"{path} error (HTTP {response.status_code}, {path})")
 			exit(1)

 		if not self.assetsOnly:
 			savePath = ensureParentDir(path)

 			with open(savePath, "wb") as outputFile:
 				outputFile.write(response.content)

 			logging.debug(f"{path} saved")

 		return response

 	def _download(self, path, expectedSize = None):
 		"""
 		Downloads a file. Unlike _get() this functions streams the file and is
 		thus suitable for downloading large assets.
 		"""

 		savePath = ensureParentDir(path)
 		size     = 0

 		# Skip this file if it has already been downloaded before (and its size
 		# matches the expected size).
 		if not self.redownload and os.path.isfile(savePath):
 			if os.stat(savePath).st_size == expectedSize:
 				logging.info(f"{path} skipped")
 				return
 			elif expectedSize is not None:
 				logging.warning(f"{path} found with invalid size, redownloading")

 		response = self.session.get(f"{self.root}/{path}", stream = True)
 		if not response.ok:
 			logging.error(f"{path} error (HTTP {response.status_code}, {path})")

 		with open(savePath, "wb") as outputFile:
 			for chunk in response.iter_content(1024):
 				outputFile.write(chunk)
 				size += len(chunk)

 		if expectedSize is not None and size != expectedSize:
 			logging.warning(f"{path} size does not match expected size")

 		self.numAssets += 1
 		self.totalSize += size
 		logging.info(f"{path} downloaded ({size // 1024} KB)")

 	def _manifestAllowed(self, name):
 		"""
 		Returns whether the manifest with the specified name should be
 		downloaded or skipped, based on the whitelist and blacklist.
 		"""

 		*_, _name = name.rsplit("/", 1)

 		if self.whitelist is not None:
 			if _name not in self.whitelist:
 				return False
 		if self.blacklist is not None:
 			if _name in self.blacklist:
 				return False

 		return True

 	def parseManifest(self, name, manifest):
 		"""
 		Downloads all assets listed in the given manifest object. Returns a
 		decoded copy of the manifest object.
 		"""

 		_version = manifest["version"]
 		if _version != 2:
 			logging.warning(f"({name}) Manifest version {_version} is not officially supported")

 		# The root path specified in the manifest is relative to the manifest
 		# itself's URL. Note that this prefix only applies to paths, not asset
 		# IDs (which are identical to paths in FNF, but might be different in
 		# other games).
 		prefix = manifest.get("rootPath", None) or "."
 		prefix = resolvePath(f"{name}/../{prefix}")
 		if prefix:
 			prefix += "/"

 		# Each manifest is a JSON wrapper over a weird URL-encoded string...
 		# well, after digging through OpenFL/Lime sources it turns out it's
 		# actually the output of Haxe's built-in serializer. So I wrote a
 		# parser for that too.
 		assets = self.parser.parse(manifest["assets"])
 		logging.info(f"({name}) Downloading {len(assets)} assets")

 		for asset in assets:
 			_id   = asset.get("id", "")
 			_size = asset.get("size", 0)

 			# Some assets seem to use "path groups", i.e. lists of multiple
 			# paths (even though all files in FNF which use this feature have a
 			# single entry in the group). Other assets (fonts) are loaded via
 			# CSS instead and don't have an associated path, only a CSS class.
 			if "className" in asset:
 				logging.debug(f"Skipping font asset: {_id}")
 				continue

 			if "pathGroup" in asset:
 				paths = asset["pathGroup"]
 			else:
 				paths = asset["path"],

 			for path in paths:
 				self._download(prefix + path, _size)

 		# Return a shallow copy of the manifest, with the Haxe blob replaced
 		# with the deserialized object.
 		return { **manifest, "assets": assets }

 	def downloadJS(self, url):
 		"""
 		Downloads a JavaScript file from the given URL and searches it for
 		embedded and linked manifests, then calls downloadManifest() for each
 		manifest found. Yields ( manifestName, manifestObject ) tuples.
 		"""

 		name, _ = os.path.splitext(url)
 		script  = self._get(url)

 		# Extract the manifest(s) embedded within the script itself. These
 		# are JSON blobs wrapped into string literals; thankfully Python
 		# and JS literals have almost the same syntax.
 		matches = MANIFEST_REGEX.findall(script.text)

 		for _id, literal in enumerate(matches):
 			# Add a suffix if multiple manifest blobs are present.
 			manifestName = name
 			if _id:
 				manifestName += f"_{_id}"

 			if not self._manifestAllowed(manifestName):
 				continue

 			logging.info(f"({name}) Found embedded manifest: {manifestName}")
 			manifest = literal_eval(literal)

 			yield manifestName, self.parseManifest(
 				manifestName,
 				json.loads(manifest)
 			)

 		# Download any external manifests whose paths are referenced in the
 		# script.
 		matches = EXT_MANIFEST_REGEX.findall(script.text)

 		for _, path in matches:
 			manifestName, _ = os.path.splitext(path)

 			if not self._manifestAllowed(manifestName):
 				continue

 			logging.info(f"({name}) Found linked manifest: {manifestName}")
 			manifest = self._get(path)

 			yield manifestName, self.parseManifest(
 				manifestName,
 				manifest.json()
 			)

 	def downloadHTML(self, url):
 		"""
 		Downloads a HTML file from the given URL and searches it for fonts and
 		JS files, then calls downloadJS() for each script found. Yields
 		( manifestName, manifestObject ) tuples.
 		"""

 		logging.info("Downloading HTML, fonts and JS")
 		html = self._get(url)

 		# Download fonts. Due to "limitations" of HTML5, these are loaded
 		# directly from the main HTML page through inline CSS rather than via
 		# manifests.
 		if self._manifestAllowed("_fonts"):
 			for _, _path in HTML_FONT_REGEX.findall(html.text):
 				path = resolvePath(_path)
 				self._download(path)

 		# Obtain all manifest files from each script.
 		for _path in HTML_SCRIPT_REGEX.findall(html.text):
 			path = resolvePath(_path)
 			logging.info(f"Found linked JS: {path}")

 			yield from self.downloadJS(path)

 ## Command line crap

 def parserSetup():
 	"""
 	Creates a command line parser. This is only used internally by main().
 	"""

 	names = ", ".join(DEFAULT_URLS)
 	parser = ArgumentParser(
 		description = f"Downloads a copy of any Lime/OpenFL/HaxeFlixel-based web game. The following games can be specified by name: {names}",
 		epilog      = "Use '-E _fonts' to skip downloading font files referenced by CSS.",
 		add_help    = False
 	)

 	toolsGroup = parser.add_argument_group("Tools")
 	toolsGroup.add_argument(
 		"-h", "--help",
 		action = "help",
 		help   = "Shows this help message and exits"
 	)
 	toolsGroup.add_argument(
 		"-v", "--verbose",
 		action = "count",
 		help   = "Shows additional debug information (2 levels)"
 	)

 	manifestGroup = parser.add_argument_group("Manifest options")
 	manifestGroup.add_argument(
 		"-M", "--manifests",
 		action  = "append",
 		type    = str,
 		help    = "Only downloads assets from the specified manifest(s)",
 		metavar = "name"
 	)
 	manifestGroup.add_argument(
 		"-E", "--exclude",
 		action  = "append",
 		type    = str,
 		help    = "Skips downloading assets from the specified manifest(s)",
 		metavar = "name"
 	)
 	manifestGroup.add_argument(
 		"-S", "--save",
 		type    = FileType("wt"),
 		help    = "Outputs a JSON file containing all decoded manifests",
 		metavar = "outputFile"
 	)

 	downloadGroup = parser.add_argument_group("Download options")
 	downloadGroup.add_argument(
 		"-a", "--assets-only",
 		action = "store_true",
 		help   = "Disables saving parsed HTML, JS and manifest files"
 	)
 	downloadGroup.add_argument(
 		"-r", "--redownload",
 		action = "store_true",
 		help   = "Forces redownloading existing assets"
 	)
 	downloadGroup.add_argument(
 		"-u", "--user-agent",
 		type    = str,
 		default = USER_AGENT,
 		help    = "User agent (browser ID) to use when downloading",
 		metavar = "string"
 	)

 	fileGroup = parser.add_argument_group("File paths")
 	fileGroup.add_argument(
 		"game",
 		type    = str,
 		nargs   = "?",
 		default = "fnf",
 		help    = "Game name or URL to the game's root or index.html"
 	)
 	fileGroup.add_argument(
 		"outputPath",
 		type    = str,
 		nargs   = "?",
 		default = ".",
 		help    = "Where to store all downloaded files"
 	)

 	return parser

 def loggerSetup(verbose = None):
 	"""
 	Configures logging. This is only used internally by main().
 	"""

 	if verbose:
 		_level = min(verbose, 2)
 	else:
 		_level = 0

 	logging.basicConfig(
 		format = "[%(funcName)-13s %(levelname)-7s] %(message)s",
 		level = (
 			logging.WARNING,
 			logging.INFO,    # -v
 			logging.DEBUG    # -vv
 		)[_level]
 	)

 ## Main

 def main():
 	"""
 	Main entry point of the script.
 	"""

 	parser = parserSetup()
 	args   = parser.parse_args()
 	loggerSetup(args.verbose)

 	logging.info(f"FNF downloader v{__version__}")

 	url        = DEFAULT_URLS.get(
 		args.game.lower(),
 		args.game
 	)
 	downloader = Downloader(
 		url,
 		args.assets_only,
 		args.redownload,
 		args.manifests,
 		args.exclude
 	)
 	downloader.session.headers["User-Agent"] = args.user_agent

 	startTime  = perf_counter()
 	manifests  = {}
 	outputPath = ensureParentDir(f"{args.outputPath}/.") # lol

 	os.chdir(outputPath)
 	for name, manifest in downloader.downloadHTML("index.html"):
 		manifests[name] = manifest

 	# Save all manifests's contents to a single JSON file if -S was passed.
 	if args.save:
 		with args.save as outputFile:
 			json.dump(
 				manifests,
 				outputFile,
 				indent = "\t"
 			)

 	downloadTime = round(perf_counter() - startTime)
 	logging.info(f"Files downloaded: {downloader.numAssets}, {downloader.totalSize // 1048576} MB")
 	logging.info(f"Done ({downloadTime // 60}m {downloadTime % 60}s)")

 if __name__ == "__main__":
 	main()
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	"""Friday Night Funkin' downloader

	I made this script to automate downloading of the FNF HTML5 version hosted on
	Newgrounds, as well as bootlegs and mods hosted by other people. Given an empty
	directory and the URL to index.html, this script downloads all assets and files
	required to play the game fully offline (no patching is done so FNF will retain
	Newgrounds functionality). Running downloaded games in a browser still requires
	a local web server, but you can easily use Python itself as a server by
	executing "python3 -m http.server 8000" from the same folder as index.html.

	This script is standalone and only depends on the requests library you can
	download using pip ("pip3 install requests"). Feel free to copy and use it in
	your mod's build pipeline or anywhere else (but please respect the license of
	anything you download). It should also work with other HTML5 games built with
	Lime/OpenFL/HaxeFlixel as long as they use the default asset loader.
	"""

	__version__ = "0.1.0"
	__author__ = "spicyjpeg"

	import os, re, json, logging
	from time import perf_counter
	from binascii import a2b_base64
	from ast import literal_eval
	from itertools import accumulate
	from argparse import ArgumentParser, FileType

	from requests import Session
	from requests.utils import unquote, urlparse, urlunparse

	USER_AGENT = "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.7113.93 Safari/537.36"
	DEFAULT_URLS = {
	"fnf": "https://uploads.ungrounded.net/alternate/1528000/1528775_alternate_113347_r88.zip",
	"fnf_prototype": "https://v6p9d9t4.ssl.hwcdn.net/html/2876359-359162",
	"flappybalt": "https://demos.haxeflixel.com/html5/Flappybalt"
	}

	## Utilities

	def resolvePath(path):
	"""
	Resolves a path, removing "." and ".." as well as trailing slashes. This
	always uses slashes, does not reference the filesystem at all and is safe
	to use for URL paths.
	"""

	_path = path.split("/")
	output = []

	for component in _path:
	if (not component) or (component == "."):
	continue
	elif component == "..":
	output.pop()
	else:
	output.append(component)

	return "/".join(output)

	def ensureParentDir(path):
	"""
	Ensures a file with the given path can be saved by creating all parent
	directories. Returns the OS-specific normalized (i.e. using backslashes on
	Windows) path.
	"""

	_path = path.split("/")

	for parent in accumulate(_path[:-1], os.path.join):
	if parent and not os.path.isdir(parent):
	os.mkdir(parent)

	return os.path.join(*_path)

	def cleanURL(url):
	"""
	Removes query parameters, hashes, trailing slashes and "index.html" from
	any URL.
	"""

	scheme, hostname, path, *_ = urlparse(url)

	if path.endswith(( "/", "/index.html" )):
	path, _ = path.rsplit("/", 1)

	return urlunparse((
	scheme,
	hostname,
	path,
	"",
	"",
	""
	))

	## Haxe parser

	HAXE_INT_REGEX = re.compile(r"[+-]?[0-9]+")
	HAXE_FLOAT_REGEX = re.compile(r"[+-]?[0-9](?:\.[0-9])?(?:[Ee][+-]?[0-9]+)?")

	class HaxeParser:
	"""
	Simple parser class for Haxe's built-in serialization format. Almost all
	basic data types are supported, with the exception of classes and dates.

	https://haxe.org/manual/std-serialization-format.html
	"""

	def _parseInt(self, payload, positive = False):
	"""
	Parses an integer and returns a ( value, length ) tuple. Used
	internally, do not call.
	"""

	match = HAXE_INT_REGEX.match(payload)
	match = match.group()

	if positive and int(match) < 0:
	raise ValueError("expected positive integer")

	return int(match), len(match)

	def _parseFloat(self, payload):
	"""
	Parses a floating-point value and returns a ( value, length ) tuple.
	Used internally, do not call.
	"""

	match = HAXE_FLOAT_REGEX.match(payload)
	match = match.group()

	return float(match), len(match)

	def _parseString(self, payload, isBytes = False):
	"""
	Parses a string (or byte string) and returns a ( value, length ) tuple.
	Used internally, do not call.
	"""

	length, data = payload.split(":", 1)
	data = data[0:int(length)]
	totalLength = len(length) + int(length) + 1

	string = unquote(data)

	if isBytes:
	# Pad the base64 string to ensure its length is a multiple of 4
	# characters (required since most base64 implementations decode 4
	# characters at a time, as they map to exactly 3 bytes).
	string += "=" * (4 - (len(data) % 4))

	return a2b_base64(string), totalLength
	else:
	return string, totalLength

	def _parseObject(self, payload):
	"""
	Parses an object or map and returns a ( value, length ) tuple. Used
	internally, do not call.
	"""

	obj = {}
	offset = 0

	# "g" terminates generic objects, while "h" is for specialized built-in
	# types (maps in this case).
	while payload[offset] not in "gh":
	key, keyLength = self._parse(payload[offset:])
	value, valueLength = self._parse(payload[offset + keyLength:])

	obj[key] = value
	offset += keyLength + valueLength

	# Add 1 to the length to account for the terminator.
	return obj, offset + 1

	def _parseList(self, payload):
	"""
	Parses a list or array and returns a ( value, length ) tuple. Used
	internally, do not call.
	"""

	obj = []
	offset = 0

	while payload[offset] != "h":
	item, length = self._parse(payload[offset:])

	# Unpack any tuple (which is currently only used for consecutive
	# nulls) into the array.
	if type(item) is tuple:
	obj.extend(item)
	else:
	obj.append(item)

	offset += length

	# Add 1 to the length to account for the terminator.
	return obj, offset + 1

	def _parse(self, data):
	"""
	Parses a type identifier followed by the respective payload and returns
	a ( value, length ) tuple. This is the internal implementation of
	parse().
	"""

	_type = data[0]
	payload = data[1:]

	if _type in "Rr": # Cache back-reference
	# Haxe "compresses" serialized data by saving all strings and
	# objects into a cache, then inserting pointers to cache entries
	# instead of any duplicate value.
	value, length = self._parseInt(payload, True)
	value = {
	"R": self.stringCache,
	"r": self.objectCache
	}[_type][value]

	elif _type in "nzkmptf": # Constant
	value = {
	"n": None,
	"z": 0,
	#"k": math.nan,
	#"m": -math.inf,
	#"p": math.inf,
	"t": True,
	"f": False
	}[_type]
	length = 0

	elif _type in "i:": # Integer (":" is used in integer maps)
	value, length = self._parseInt(payload)

	elif _type == "d": # Float
	value, length = self._parseFloat(payload)

	elif _type == "y": # String (same encoding as serialized string)
	value, length = self._parseString(payload)
	self.stringCache.append(value)

	elif _type == "s": # Byte string
	value, length = self._parseString(payload, True)

	elif _type == "u": # Consecutive nulls (used in arrays)
	value, length = self._parseInt(payload, True)
	value = ( None, ) * value

	elif _type in "obqM": # Object/struct/string map/integer map
	value, length = self._parseObject(payload)
	# TODO: is cache only used for "o" as stated in docs?
	self.objectCache.append(value)

	elif _type in "la": # List or array
	value, length = self._parseList(payload)

	#elif _type == "v": # Date (???)
	#value = strptime(payload[0:19], "%Y-%m-%d %H:%M:%S")
	#length = 19

	else:
	raise NotImplementedError(f"unsupported Haxe type '{_type}'")

	# Add 1 to the length to account for the type identifier.
	return value, length + 1

	def parse(self, data):
	"""
	Deserializes a serialized string, i.e. a type identifier followed by
	the respective payload. Unlike _parse(), this function also clears the
	cache and performs length checking to make sure the whole string gets
	parsed.
	"""

	self.stringCache = []
	self.objectCache = []

	value, length = self._parse(data)
	if length != len(data):
	raise RuntimeError("failed to parse extra data at end of string")

	return value

	## Downloader class

	# I know I shouldn't parse HTML using regex if I don't want some demon from a
	# parallel universe to kill me. But this works well enough.
	HTML_SCRIPT_REGEX = re.compile(r"<\s?script.+?src=\"(.+?)\"\s?>", re.IGNORECASE)
	HTML_FONT_REGEX = re.compile(r"url\s\(\s(?P<q>['\"]?)(.+?\.(?:ttf\|otf\|woff2?))(?P=q)\s*\)")

	# OpenFL/HaxeFlixel knows which assets to load by using manifest files. FNF
	# takes advantage of this by having different manifests for each week (hosted
	# as separate JSON files) plus a "preload" manifest that lists the assets to
	# load on startup, which is embedded in the main JS file along with paths to
	# all other manifests. These regexes take care of extracting everything we need
	# from that script -- I hope they don't break with updates (even though we all
	# know ninjamuffin is likely never going to update anything other than the
	# Full-A$$ Game).
	MANIFEST_REGEX = re.compile(r"'\{\s?\"name\"\s?:\s?null\s?,.\}'\|\"\{\s?\\\"name\\\"\s?:\s?null\s?,.\}\"")
	EXT_MANIFEST_REGEX = re.compile(r"(?P<q>['\"])(manifest/.+?\.json)(?P=q)", re.IGNORECASE)

	class Downloader:
	"""
	This class handles pretty much everything.
	"""

	def __init__(
	self,
	root,
	assetsOnly = False,
	redownload = False,
	whitelist = None,
	blacklist = None
	):
	"""
	Initializes the downloader with the given options.
	"""

	self.root = cleanURL(root)
	self.assetsOnly = assetsOnly
	self.redownload = redownload
	self.whitelist = whitelist
	self.blacklist = blacklist

	self.session = Session()
	self.parser = HaxeParser()

	self.numAssets = 0
	self.totalSize = 0

	logging.debug(f"Root URL: {self.root}")

	def _get(self, path):
	"""
	Performs a GET request and returns the response object. If assetsOnly
	is disabled, the response's contents are also saved to a file.
	"""

	response = self.session.get(f"{self.root}/{path}")
	if not response.ok:
	logging.fatal(f"{path} error (HTTP {response.status_code}, {path})")
	exit(1)

	if not self.assetsOnly:
	savePath = ensureParentDir(path)

	with open(savePath, "wb") as outputFile:
	outputFile.write(response.content)

	logging.debug(f"{path} saved")

	return response

	def _download(self, path, expectedSize = None):
	"""
	Downloads a file. Unlike _get() this functions streams the file and is
	thus suitable for downloading large assets.
	"""

	savePath = ensureParentDir(path)
	size = 0

	# Skip this file if it has already been downloaded before (and its size
	# matches the expected size).
	if not self.redownload and os.path.isfile(savePath):
	if os.stat(savePath).st_size == expectedSize:
	logging.info(f"{path} skipped")
	return
	elif expectedSize is not None:
	logging.warning(f"{path} found with invalid size, redownloading")

	response = self.session.get(f"{self.root}/{path}", stream = True)
	if not response.ok:
	logging.error(f"{path} error (HTTP {response.status_code}, {path})")

	with open(savePath, "wb") as outputFile:
	for chunk in response.iter_content(1024):
	outputFile.write(chunk)
	size += len(chunk)

	if expectedSize is not None and size != expectedSize:
	logging.warning(f"{path} size does not match expected size")

	self.numAssets += 1
	self.totalSize += size
	logging.info(f"{path} downloaded ({size // 1024} KB)")

	def _manifestAllowed(self, name):
	"""
	Returns whether the manifest with the specified name should be
	downloaded or skipped, based on the whitelist and blacklist.
	"""

	*_, _name = name.rsplit("/", 1)

	if self.whitelist is not None:
	if _name not in self.whitelist:
	return False
	if self.blacklist is not None:
	if _name in self.blacklist:
	return False

	return True

	def parseManifest(self, name, manifest):
	"""
	Downloads all assets listed in the given manifest object. Returns a
	decoded copy of the manifest object.
	"""

	_version = manifest["version"]
	if _version != 2:
	logging.warning(f"({name}) Manifest version {_version} is not officially supported")

	# The root path specified in the manifest is relative to the manifest
	# itself's URL. Note that this prefix only applies to paths, not asset
	# IDs (which are identical to paths in FNF, but might be different in
	# other games).
	prefix = manifest.get("rootPath", None) or "."
	prefix = resolvePath(f"{name}/../{prefix}")
	if prefix:
	prefix += "/"

	# Each manifest is a JSON wrapper over a weird URL-encoded string...
	# well, after digging through OpenFL/Lime sources it turns out it's
	# actually the output of Haxe's built-in serializer. So I wrote a
	# parser for that too.
	assets = self.parser.parse(manifest["assets"])
	logging.info(f"({name}) Downloading {len(assets)} assets")

	for asset in assets:
	_id = asset.get("id", "")
	_size = asset.get("size", 0)

	# Some assets seem to use "path groups", i.e. lists of multiple
	# paths (even though all files in FNF which use this feature have a
	# single entry in the group). Other assets (fonts) are loaded via
	# CSS instead and don't have an associated path, only a CSS class.
	if "className" in asset:
	logging.debug(f"Skipping font asset: {_id}")
	continue

	if "pathGroup" in asset:
	paths = asset["pathGroup"]
	else:
	paths = asset["path"],

	for path in paths:
	self._download(prefix + path, _size)

	# Return a shallow copy of the manifest, with the Haxe blob replaced
	# with the deserialized object.
	return { **manifest, "assets": assets }

	def downloadJS(self, url):
	"""
	Downloads a JavaScript file from the given URL and searches it for
	embedded and linked manifests, then calls downloadManifest() for each
	manifest found. Yields ( manifestName, manifestObject ) tuples.
	"""

	name, _ = os.path.splitext(url)
	script = self._get(url)

	# Extract the manifest(s) embedded within the script itself. These
	# are JSON blobs wrapped into string literals; thankfully Python
	# and JS literals have almost the same syntax.
	matches = MANIFEST_REGEX.findall(script.text)

	for _id, literal in enumerate(matches):
	# Add a suffix if multiple manifest blobs are present.
	manifestName = name
	if _id:
	manifestName += f"_{_id}"

	if not self._manifestAllowed(manifestName):
	continue

	logging.info(f"({name}) Found embedded manifest: {manifestName}")
	manifest = literal_eval(literal)

	yield manifestName, self.parseManifest(
	manifestName,
	json.loads(manifest)
	)

	# Download any external manifests whose paths are referenced in the
	# script.
	matches = EXT_MANIFEST_REGEX.findall(script.text)

	for _, path in matches:
	manifestName, _ = os.path.splitext(path)

	if not self._manifestAllowed(manifestName):
	continue

	logging.info(f"({name}) Found linked manifest: {manifestName}")
	manifest = self._get(path)

	yield manifestName, self.parseManifest(
	manifestName,
	manifest.json()
	)

	def downloadHTML(self, url):
	"""
	Downloads a HTML file from the given URL and searches it for fonts and
	JS files, then calls downloadJS() for each script found. Yields
	( manifestName, manifestObject ) tuples.
	"""

	logging.info("Downloading HTML, fonts and JS")
	html = self._get(url)

	# Download fonts. Due to "limitations" of HTML5, these are loaded
	# directly from the main HTML page through inline CSS rather than via
	# manifests.
	if self._manifestAllowed("_fonts"):
	for _, _path in HTML_FONT_REGEX.findall(html.text):
	path = resolvePath(_path)
	self._download(path)

	# Obtain all manifest files from each script.
	for _path in HTML_SCRIPT_REGEX.findall(html.text):
	path = resolvePath(_path)
	logging.info(f"Found linked JS: {path}")

	yield from self.downloadJS(path)

	## Command line crap

	def parserSetup():
	"""
	Creates a command line parser. This is only used internally by main().
	"""

	names = ", ".join(DEFAULT_URLS)
	parser = ArgumentParser(
	description = f"Downloads a copy of any Lime/OpenFL/HaxeFlixel-based web game. The following games can be specified by name: {names}",
	epilog = "Use '-E _fonts' to skip downloading font files referenced by CSS.",
	add_help = False
	)

	toolsGroup = parser.add_argument_group("Tools")
	toolsGroup.add_argument(
	"-h", "--help",
	action = "help",
	help = "Shows this help message and exits"
	)
	toolsGroup.add_argument(
	"-v", "--verbose",
	action = "count",
	help = "Shows additional debug information (2 levels)"
	)

	manifestGroup = parser.add_argument_group("Manifest options")
	manifestGroup.add_argument(
	"-M", "--manifests",
	action = "append",
	type = str,
	help = "Only downloads assets from the specified manifest(s)",
	metavar = "name"
	)
	manifestGroup.add_argument(
	"-E", "--exclude",
	action = "append",
	type = str,
	help = "Skips downloading assets from the specified manifest(s)",
	metavar = "name"
	)
	manifestGroup.add_argument(
	"-S", "--save",
	type = FileType("wt"),
	help = "Outputs a JSON file containing all decoded manifests",
	metavar = "outputFile"
	)

	downloadGroup = parser.add_argument_group("Download options")
	downloadGroup.add_argument(
	"-a", "--assets-only",
	action = "store_true",
	help = "Disables saving parsed HTML, JS and manifest files"
	)
	downloadGroup.add_argument(
	"-r", "--redownload",
	action = "store_true",
	help = "Forces redownloading existing assets"
	)
	downloadGroup.add_argument(
	"-u", "--user-agent",
	type = str,
	default = USER_AGENT,
	help = "User agent (browser ID) to use when downloading",
	metavar = "string"
	)

	fileGroup = parser.add_argument_group("File paths")
	fileGroup.add_argument(
	"game",
	type = str,
	nargs = "?",
	default = "fnf",
	help = "Game name or URL to the game's root or index.html"
	)
	fileGroup.add_argument(
	"outputPath",
	type = str,
	nargs = "?",
	default = ".",
	help = "Where to store all downloaded files"
	)

	return parser

	def loggerSetup(verbose = None):
	"""
	Configures logging. This is only used internally by main().
	"""

	if verbose:
	_level = min(verbose, 2)
	else:
	_level = 0

	logging.basicConfig(
	format = "[%(funcName)-13s %(levelname)-7s] %(message)s",
	level = (
	logging.WARNING,
	logging.INFO, # -v
	logging.DEBUG # -vv
	)[_level]
	)

	## Main

	def main():
	"""
	Main entry point of the script.
	"""

	parser = parserSetup()
	args = parser.parse_args()
	loggerSetup(args.verbose)

	logging.info(f"FNF downloader v{__version__}")

	url = DEFAULT_URLS.get(
	args.game.lower(),
	args.game
	)
	downloader = Downloader(
	url,
	args.assets_only,
	args.redownload,
	args.manifests,
	args.exclude
	)
	downloader.session.headers["User-Agent"] = args.user_agent

	startTime = perf_counter()
	manifests = {}
	outputPath = ensureParentDir(f"{args.outputPath}/.") # lol

	os.chdir(outputPath)
	for name, manifest in downloader.downloadHTML("index.html"):
	manifests[name] = manifest

	# Save all manifests's contents to a single JSON file if -S was passed.
	if args.save:
	with args.save as outputFile:
	json.dump(
	manifests,
	outputFile,
	indent = "\t"
	)

	downloadTime = round(perf_counter() - startTime)
	logging.info(f"Files downloaded: {downloader.numAssets}, {downloader.totalSize // 1048576} MB")
	logging.info(f"Done ({downloadTime // 60}m {downloadTime % 60}s)")

	if __name__ == "__main__":
	main()