internetimagery · April 21, 2023 21:43
diff --git a/img_sync.py b/img_sync.py

 import re
 import os
 import html
 import shutil
 import logging
 import tempfile
 from functools import partial
 from threading import Lock
 from urllib.request import urlopen, Request
 from urllib.error import HTTPError, URLError
 from concurrent.futures import ThreadPoolExecutor, Future

 from PIL import Image

 LOG = logging.getLogger(__name__)
 URL = re.compile(r"""&lt;\s*img\s+src=(['"])(https?://[^\1]+?)\1""")
 EXT = {
    "image/jpeg": "jpg",
    "image/png": "png",
    "image/gif": "gif",
 }


 def generate_replacement(size, store_dir, identifier, cache, lock, item):
    url = item[1].group(2)
    with lock:
        future = cache.get(url)
        if future:
            path = future.result()
            if path:
                return (item[1].start(2), item[1].end(2), path)
            else:
                return (0, 0, "")
        else:
            future = cache[url] = Future()

    result = ""
    try:
        req = Request(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'})
        with urlopen(req) as resp:
            mimetype = resp.headers.get_content_type()
            ext = EXT.get(mimetype)
            if ext:
                with tempfile.SpooledTemporaryFile() as tmp:
                    shutil.copyfileobj(resp, tmp)

                    output = os.path.join(
                        store_dir,
                        "{}_{:03d}.{}".format(identifier, item[0] + 1, ext),
                    )
                    img = Image.open(tmp)
                    img.thumbnail(size)
                    try:
                        img.save(output)
                    except OSError as err:
                        LOG.warning('Failed: %s "%s"', err, url)
                    else:
                        LOG.info('Downloaded: "{}"'.format(url))
                        result = output
                        return (item[1].start(2), item[1].end(2), output)

    except (HTTPError, URLError) as err:
        LOG.warning('Failed: %s "%s"', err, url)
    finally:
        future.set_result(result)

    return (0, 0, result)


 def main(path, size):
    identifier, _ = os.path.splitext(os.path.basename(path))
    path_dir = os.path.dirname(path)
    img_path = os.path.abspath(os.path.join(path_dir, "images", identifier))
    os.makedirs(img_path, exist_ok=True)

    replaced_images = []
    with open(path) as h:
        data = h.read()

    cache = {}
    lock = Lock()

    with ThreadPoolExecutor() as pool:
        for start, stop, replacement_path in sorted(
            pool.map(
                partial(generate_replacement, (size, size), img_path, identifier, cache, lock),
                enumerate(URL.finditer(data)),
            ),
            reverse=True,
        ):
            if not replacement_path:
                continue
            relative_path = os.path.relpath(replacement_path, img_path)
            data = data[:start] + html.escape(relative_path) + data[stop:]

    output = os.path.join(path_dir, "{}-updated.gpx".format(identifier))
    with open(output, "w") as h:
        h.write(data)


 if __name__ =="__main__":
    import argparse

    parser = argparse.ArgumentParser("Collect linked images from gpx file and localize them.")
    parser.add_argument("path", help="Path to the .gpx file")
    parser.add_argument("-s", "--size", type=int, default=200, help="Size for thumbnails")
    args = parser.parse_args()

    main(args.path, args.size)

	import re
	import os
	import html
	import shutil
	import logging
	import tempfile
	from functools import partial
	from threading import Lock
	from urllib.request import urlopen, Request
	from urllib.error import HTTPError, URLError
	from concurrent.futures import ThreadPoolExecutor, Future

	from PIL import Image

	LOG = logging.getLogger(__name__)
	URL = re.compile(r"""<\s*img\s+src=(['"])(https?://[^\1]+?)\1""")
	EXT = {
	"image/jpeg": "jpg",
	"image/png": "png",
	"image/gif": "gif",
	}


	def generate_replacement(size, store_dir, identifier, cache, lock, item):
	url = item[1].group(2)
	with lock:
	future = cache.get(url)
	if future:
	path = future.result()
	if path:
	return (item[1].start(2), item[1].end(2), path)
	else:
	return (0, 0, "")
	else:
	future = cache[url] = Future()

	result = ""
	try:
	req = Request(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'})
	with urlopen(req) as resp:
	mimetype = resp.headers.get_content_type()
	ext = EXT.get(mimetype)
	if ext:
	with tempfile.SpooledTemporaryFile() as tmp:
	shutil.copyfileobj(resp, tmp)

	output = os.path.join(
	store_dir,
	"{}_{:03d}.{}".format(identifier, item[0] + 1, ext),
	)
	img = Image.open(tmp)
	img.thumbnail(size)
	try:
	img.save(output)
	except OSError as err:
	LOG.warning('Failed: %s "%s"', err, url)
	else:
	LOG.info('Downloaded: "{}"'.format(url))
	result = output
	return (item[1].start(2), item[1].end(2), output)

	except (HTTPError, URLError) as err:
	LOG.warning('Failed: %s "%s"', err, url)
	finally:
	future.set_result(result)

	return (0, 0, result)


	def main(path, size):
	identifier, _ = os.path.splitext(os.path.basename(path))
	path_dir = os.path.dirname(path)
	img_path = os.path.abspath(os.path.join(path_dir, "images", identifier))
	os.makedirs(img_path, exist_ok=True)

	replaced_images = []
	with open(path) as h:
	data = h.read()

	cache = {}
	lock = Lock()

	with ThreadPoolExecutor() as pool:
	for start, stop, replacement_path in sorted(
	pool.map(
	partial(generate_replacement, (size, size), img_path, identifier, cache, lock),
	enumerate(URL.finditer(data)),
	),
	reverse=True,
	):
	if not replacement_path:
	continue
	relative_path = os.path.relpath(replacement_path, img_path)
	data = data[:start] + html.escape(relative_path) + data[stop:]

	output = os.path.join(path_dir, "{}-updated.gpx".format(identifier))
	with open(output, "w") as h:
	h.write(data)


	if __name__ =="__main__":
	import argparse

	parser = argparse.ArgumentParser("Collect linked images from gpx file and localize them.")
	parser.add_argument("path", help="Path to the .gpx file")
	parser.add_argument("-s", "--size", type=int, default=200, help="Size for thumbnails")
	args = parser.parse_args()

	main(args.path, args.size)