This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <[email protected]> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Note that Poppler is GPL-licensed, so this code is altogether affected by copyleft | |
import math | |
from pathlib import Path | |
import PIL.Image | |
import cairo | |
import gi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Four lines intentionally left blank | |
# SPDX-FileCopyrightText: 2025 geisserml <[email protected]> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Sophisticated parser for a page number mini-language | |
# Technically, this might be a use case for some parser generator like pyparsing or PLY, but this is a manual implementation based on common string operations. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause | |
import sys | |
import argparse | |
if sys.version_info >= (3, 9): | |
from argparse import BooleanOptionalAction | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Four lines intentionally left blank | |
# SPDX-FileCopyrightText: 2025 geisserml <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause | |
import argparse | |
from pathlib import Path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2023 geisserml <[email protected]> | |
# SPDX-License-Identifier: CC-BY-4.0 OR Apache-2.0 OR BSD-3-Clause | |
# Unlike repository files, there is no "raw view" for GH releases, but we can extract the plain markdown content using GH web API | |
# See also https://stackoverflow.com/q/76995969/15547292 | |
# The following code snippet shows how to get a release title from pdfium-binaries to extract the full version | |
import re | |
import json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2023 geisserml <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause OR MPL-2.0 | |
# Safer tar extraction (hopefully) preventing CVE-2007-4559 etc. | |
# Tries to use the most elegant strategy available in the caller's python version (>= 3.6) | |
__all__ = ["safer_tar_unpack"] | |
import sys | |
if sys.version_info >= (3, 11, 4): # PEP 706 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Four lines intentionally left blank | |
# SPDX-FileCopyrightText: 2025 geisserml <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 OR MPL-2.0 | |
# See also https://github.com/extremeheat/JSPyBridge/blob/master/examples/python/pdfjs.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 | |
import re | |
from datetime import datetime | |
from urllib.request import urlopen | |
from packaging.version import Version as PypaVersion | |
PB_RELEASE_URL = "https://archive.apache.org/dist/pdfbox/" | |
PB_DISTS_RE = r'<a href="([\d\.]+.+?)/">.+</a>\s+([\d\-]+ [\d:]+)' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Four lines intentionally left blank | |
# SPDX-FileCopyrightText: 2025 geisserml <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 OR MPL-2.0 | |
# Assuming you have an Apache PDFBox 3 jar in the same directory |
NewerOlder