dartt0n · March 19, 2025 16:53 · dartt0n · Nov 23, 2024 · dartt0n · Nov 23, 2024
diff --git a/disk-caching-for-ir2024-innopolis-university.md b/disk-caching-for-ir2024-innopolis-university.md
diff --git a/innopolis-university-information-retrieval-file-cache.py b/innopolis-university-information-retrieval-file-cache.py
 import hashlib
 import numpy as np
 from typing import Callable
 from tempfile import TemporaryDirectory
 from pathlib import Path


 class FileCache:
    def __init__(
        self, cache_dir: Path | None = None, key_fn: Callable[[str], str] | None = None
    ):
        # use specifed cache directory or create a temporary directory
        self._cache_dir = cache_dir or Path(TemporaryDirectory().name)
        self._cache_dir.mkdir(parents=True, exist_ok=True)

        self._cache_keys = set()
        self._key_fn = key_fn or self.__default_key_fn
        
        # load existing caches from disk
        for file in self._cache_dir.glob("*.npy"):
            self._cache_keys.add(file.stem)

    @staticmethod
    def __default_key_fn(url: str) -> str:
        # sha256 hash of the URL
        return hashlib.sha256(url.encode()).hexdigest()

    def __call__(
        self, download_func: Callable[[str], np.ndarray]
    ) -> Callable[[str], np.ndarray]:
        def wrapper(url: str) -> np.ndarray:
            key = self._key_fn(url)
            # if key exists
            if key in self._cache_keys:
                # load numpy array from file
                return np.load(self._cache_dir / (key + ".npy"))

            # otherwise download and save
            np_data = download_func(url)
            self._cache_keys.add(key)
            np.save(self._cache_dir / (key + ".npy"), np_data)
            return np_data

        return wrapper

    def clean_cache(self):
        # delete each known cache entry
        for key in self._cache_keys:
            file_path = self._cache_dir / (key + ".npy")
            if file_path.exists():
                file_path.unlink()
        self._cache_keys.clear()


 cache = FileCache(Path("image_cache"))

 @cache
 def load_image_from_url(url: str) -> np.ndarray:
    ... # todo: your implementation of load_image_from_url
	import hashlib
	import numpy as np
	from typing import Callable
	from tempfile import TemporaryDirectory
	from pathlib import Path


	class FileCache:
	def __init__(
	self, cache_dir: Path \| None = None, key_fn: Callable[[str], str] \| None = None
	):
	# use specifed cache directory or create a temporary directory
	self._cache_dir = cache_dir or Path(TemporaryDirectory().name)
	self._cache_dir.mkdir(parents=True, exist_ok=True)

	self._cache_keys = set()
	self._key_fn = key_fn or self.__default_key_fn

	# load existing caches from disk
	for file in self._cache_dir.glob("*.npy"):
	self._cache_keys.add(file.stem)

	@staticmethod
	def __default_key_fn(url: str) -> str:
	# sha256 hash of the URL
	return hashlib.sha256(url.encode()).hexdigest()

	def __call__(
	self, download_func: Callable[[str], np.ndarray]
	) -> Callable[[str], np.ndarray]:
	def wrapper(url: str) -> np.ndarray:
	key = self._key_fn(url)
	# if key exists
	if key in self._cache_keys:
	# load numpy array from file
	return np.load(self._cache_dir / (key + ".npy"))

	# otherwise download and save
	np_data = download_func(url)
	self._cache_keys.add(key)
	np.save(self._cache_dir / (key + ".npy"), np_data)
	return np_data

	return wrapper

	def clean_cache(self):
	# delete each known cache entry
	for key in self._cache_keys:
	file_path = self._cache_dir / (key + ".npy")
	if file_path.exists():
	file_path.unlink()
	self._cache_keys.clear()


	cache = FileCache(Path("image_cache"))

	@cache
	def load_image_from_url(url: str) -> np.ndarray:
	... # todo: your implementation of load_image_from_url