kogens · November 23, 2023 07:28
diff --git a/spmloader.md b/spmloader.md
diff --git a/spmloader.py b/spmloader.py
 from __future__ import annotations

 import re
 import struct
 import warnings
 from dataclasses import dataclass
 from datetime import datetime
 from os import PathLike
 from pathlib import Path

 import numpy as np
 import pint
 from pint import UnitRegistry, Quantity

 # Regex for CIAO parameters (lines starting with \@ )
 RE_CIAO_PARAM = re.compile(
    r'^\\?@?(?:(?P<group>\d?):)?(?P<param>.*): (?P<type>\w)\s?(?:\[(?P<softscale>.*)\])?\s?(?:\((?P<hardscale>.*)\))?\s(?P<hardval>.*)$')

 # Define regex to identify numerical values
 RE_NUMERICAL = re.compile(r'^([+-]?\d+\.?\d*(?:[eE][+-]\d+)?)( 1?[^\d:]+)?$')
 RE_MULTIPLE_NUMERICAL = re.compile(r'^((?:(?<!\d)[+-]?\d+\.?\d*(?:e[+-]\d+)? ?)+)([^\d:]+)?$')

 # Integer size used when decoding data from raw bytestrings
 INTEGER_SIZE = 2 ** 32

 # Pint UnitRegistry handles physical quantities
 ureg = UnitRegistry()
 ureg.define(f'least_significant_bit  = {INTEGER_SIZE} = LSB')
 ureg.define('arbitrary_units = [] = Arb')
 ureg.define('log_arbitrary_units = [] = log_Arb')
 ureg.define('log_volt = [] = log_V')
 ureg.define('log_pascal = [] = log_Pa')
 ureg.define('@alias degree = º')  # The files use "Ordinal indicator": º instead of the actual degree symbol: °
 ureg.default_format = '~C'


 class SPMFile:
    """ Representation of an entire SPM file with images and metadata """

    def __init__(self, path: str | PathLike):
        self.path = Path(path)
        self.metadata = {}
        self.images = {}
        self._flat_metadata = {}
        self._integer_size = INTEGER_SIZE

        self.load_spm()

    def __repr__(self) -> str:
        titles = [x for x in self.images.keys()]
        return f'SPM file: "{self.path.name}", {self["Date"]}. Images: {titles}'

    def __getitem__(self, item) -> tuple[int, float, str, Quantity]:
        """ Fetches values from the metadata when class is called like a dict """
        return self._flat_metadata[item]

    def load_spm(self):
        """ Load an SPM file and extract images and metadata """
        with open(self.path, 'rb') as f:
            file_bytes = f.read()

        # Extract lines and interpret metadata and images
        metadata_lines = extract_metadata_lines(file_bytes)
        metadata = interpret_metadata(metadata_lines)
        self.metadata = metadata

        # Construct a "flat metadata" for accessing with __getitem__.
        self._flat_metadata = flatten_metadata(metadata)

        # Load CIAO images
        self.images = extract_ciao_images(metadata, file_bytes)
        for key in self.images:
            setattr(self, key, self.images[key])  # Make CIAO images accessible as attributes

    @property
    def groups(self) -> dict[int | None, dict[str]]:
        """ CIAO parameters ordered by group number """
        groups = {}
        for key, value in sorted(self._flat_metadata.items()):
            if isinstance(value, CIAOParameter):
                if value.group in groups.keys():
                    groups[value.group].update({key.split(':', 1)[-1]: value})
                else:
                    groups[value.group] = {}
                    groups[value.group].update({key.split(':', 1)[-1]: value})

        return groups


 class CIAOImage:
    """ A CIAO image with metadata"""

    def __init__(self, image_metadata: dict, full_metadata: dict, file_bytes: bytes):
        self.width = None
        self.height = None
        self.data = None
        self.px_size_x, self.px_size_y = None, None
        self.x, self.y = None, None

        self.metadata = image_metadata | flatten_metadata(full_metadata)

        # Data offset and length refer to the bytes of the original file including metadata
        data_start = image_metadata['Data offset']
        data_length = image_metadata['Data length']

        # Calculate the number of pixels in order to decode the bytestring.
        # Note: "Bytes/pixel" is defined in the metadata but byte lengths don't seem to follow the bytes/pixel it.
        n_rows, n_cols = image_metadata['Number of lines'], image_metadata['Samps/line']
        n_pixels = n_cols * n_rows

        # Extract relevant image data from the raw bytestring of the full file and decode the byte values
        # as signed 32-bit integers in little-endian (same as "least significant bit").
        # Note that this is despite documentation saying 16-bit signed int.
        # https://docs.python.org/3/library/struct.html#format-characters
        bytestring = file_bytes[data_start: data_start + data_length]
        pixel_values = struct.unpack(f'<{n_pixels}i', bytestring)

        # Save Z scale parameter from full metadata
        zscale_soft_scale = self.fetch_soft_scale_from_full_metadata('2:Z scale')
        self.metadata.update(zscale_soft_scale)
        self.scansize = full_metadata['Ciao scan list']['Scan Size']

        # Reorder image into a numpy array and calculate the physical value of each pixel.
        # Row order is reversed in stored data, so we flip up/down.
        self._raw_image = np.flipud(np.array(pixel_values).reshape(n_rows, n_cols))
        self.calculate_physical_units()
        self.title = self.metadata['2:Image Data'].internal_designation

    def fetch_soft_scale_from_full_metadata(self, key='2:Z scale') -> dict:
        soft_scale_key = self.metadata[key].sscale
        soft_scale_value = self.metadata[soft_scale_key].value

        return {soft_scale_key: soft_scale_value}

    @property
    def corrected_zscale(self):
        """ Returns the z-scale correction used to translate from "pixel value" to physical units in the image"""
        z_scale = self.metadata['2:Z scale']
        hard_value = z_scale.value
        soft_scale_key = z_scale.sscale
        soft_scale_value = self.metadata[soft_scale_key]

        # The "hard scale" is used to calculate the physical value. The hard scale given in the line must be ignored,
        # and a corrected one obtained by dividing the "Hard value" by the max range of the integer, it seems.
        # NOTE: Documentation says divide by 2^16, but 2^32 gives proper results...?
        corrected_hard_scale = hard_value / INTEGER_SIZE

        return corrected_hard_scale * soft_scale_value

    def calculate_physical_units(self):
        """ Calculate physical scale of image values """
        self.data = self._raw_image * self.corrected_zscale

        # Calculate pixel sizes in physical units
        # NOTE: This assumes "Scan Size" always represents the longest dimension of the image.
        n_rows, n_cols = self._raw_image.shape
        aspect_ratio = (max(self._raw_image.shape) / n_rows, max(self._raw_image.shape) / n_cols)

        # Calculate pixel sizes and
        self.px_size_y = self.scansize / (n_rows - 1) / aspect_ratio[0]
        self.px_size_x = self.scansize / (n_cols - 1) / aspect_ratio[1]

        self.height = (n_rows - 1) * self.px_size_y
        self.width = (n_cols - 1) * self.px_size_x

        self.y = np.linspace(0, self.height, n_rows)
        self.x = np.linspace(0, self.width, n_cols)

    def __array__(self) -> np.ndarray:
        warnings.filterwarnings("ignore", category=pint.UnitStrippedWarning)
        return self.data.__array__()

    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
        return getattr(self.data, '__array_ufunc__')(ufunc, method, *inputs, **kwargs)

    def __getitem__(self, key) -> str | int | float | Quantity:
        # Return metadata by exact key
        if key in self.metadata:
            return self.metadata[key]

        # If key is not found, try without group numbers in metadata keys
        merged_keys = {k.split(':', 1)[-1]: k for k in self.metadata.keys()}
        if key in merged_keys:
            return self.metadata[merged_keys[key]]
        else:
            raise KeyError(f'Key not found: {key}')

    def __repr__(self) -> str:
        reprstr = (f'{self.metadata["Data type"]} image "{self.title}" [{self.data.units}], '
                   f'{self.data.shape} px = ({self.height.m:.1f}, {self.width.m:.1f}) {self.px_size_x.u}')
        return reprstr

    def __str__(self):
        return self.__repr__()

    def __getattr__(self, name):
        return getattr(self.data, name)

    def __add__(self, other):
        if isinstance(other, CIAOImage):
            return self.data + other.data
        else:
            return self.data + other

    def __radd__(self, other):
        return self.__add__(other)

    def __sub__(self, other):
        if isinstance(other, CIAOImage):
            return self.data - other.data
        else:
            return self.data - other

    def __rsub__(self, other):
        return self.__sub__(other)

    def __mul__(self, other):
        if isinstance(other, CIAOImage):
            return self.data * other.data
        else:
            return self.data * other

    def __rmul__(self, other):
        return self.__mul__(other)

    def __truediv__(self, other):
        if isinstance(other, CIAOImage):
            return self.data / other.data
        else:
            return self.data / other

    def __rtruediv__(self, other):
        return self.__truediv__(other)

    @property
    def extent(self) -> list[float]:
        ext = [0, self.width.magnitude, 0, self.height.magnitude]
        return ext

    @property
    def meshgrid(self) -> np.meshgrid:
        return np.meshgrid(self.x, self.y)


 @dataclass
 class CIAOParameter:
    r""" CIAO parameters are lines in the SPM file starting with \@ and have several "sub" parameters """
    group: int | None
    parameter: str
    ptype: str
    value: float | str | Quantity | None

    sscale: str
    internal_designation: str

    hscale: float | None = None
    external_designation: str | None = None

    def __init__(self, parameter_string: str):
        self._raw_string = parameter_string.lstrip('\\@')
        match = RE_CIAO_PARAM.match(parameter_string)
        if match:
            self.group = int(match.group('group')) if match.group('group') else None
            self.parameter = match.group('param')
            self.ptype = match.group('type')

            # "Soft scale" and "Internal designation" seem to be interchangeable
            self.sscale = parse_parameter_value(match.group('softscale'))
            self.internal_designation = self.sscale

            self.value = parse_parameter_value(match.group('hardval')) if match.group('hardval') else None

            if self.ptype in ['V', 'C']:
                # "Value" or "Scale" parameter
                self.hscale = parse_parameter_value(match.group('hardscale')) if match.group('hardscale') else None
            elif self.ptype == 'S':
                # "Select" parameter
                self.external_designation = parse_parameter_value(match.group('hardval'))
        else:
            raise ValueError(f'Not a recognized CIAO parameter object: {parameter_string}')

    def __getattr__(self, name):
        return getattr(self.value, name)

    def __repr__(self) -> str:
        return self.ciao_string

    def __str__(self) -> str:
        return f'{self.value}'

    def __mul__(self, other) -> int | float | Quantity:
        if isinstance(other, CIAOParameter):
            return self.value * other.value
        else:
            return self.value * other

    def __truediv__(self, other) -> int | float | Quantity:
        if isinstance(other, CIAOParameter):
            return self.value / other.value
        else:
            return self.value / other

    def __add__(self, other) -> int | float | Quantity:
        if isinstance(other, CIAOParameter):
            return self.value + other.value
        else:
            return self.value + other

    def __sub__(self, other) -> int | float | Quantity:
        if isinstance(other, CIAOParameter):
            return self.value - other.value
        else:
            return self.value - other

    @property
    def ciao_string(self, backslash: bool = False):
        start = '\\@' if backslash else ''
        group_string = f'{self.group}:' if self.group else ''
        hscale_string = f' ({self.hscale})' if self.hscale else ''
        sscale_string = f' [{self.sscale}]' if self.sscale or self.ptype == 'S' else ''
        if self.ptype and self.ptype == 'S':
            value_string = f' "{self.value}"'
        elif self.ptype and self.ptype in ['V', 'C']:
            value_string = f' {self.value}'
        else:
            value_string = ''
        ciao_string = f'{start}{group_string}{self.parameter}: {self.ptype}{sscale_string}{hscale_string}{value_string}'

        return ciao_string


 def extract_metadata_lines(spm_bytestring: bytes) -> list[str]:
    """ Extract the metadata section between "*File list" and "*File list end" and decode and cleanup the lines """
    # Extract lines as list of bytestrings
    file_lines = spm_bytestring.splitlines()

    start_index, end_index = None, None
    for i, line in enumerate(file_lines):
        if line.strip() == b'\\*File list':
            start_index = i
        elif line.strip() == b'\\*File list end':
            end_index = i
            break

    if start_index is not None and end_index is not None:
        # Extract the identified lines between start and end. Decode strings and strip unwanted characters.
        metadata_lines = [x.decode('latin-1').lstrip('\\') for x in file_lines[start_index:end_index]]
        return metadata_lines
    else:
        raise ValueError('Beginning or end of "\\*File list" missing, cannot extract metadata')


 def interpret_metadata(metadata_lines: list[str], sort=False) -> dict[str, dict[str, int, float, str, Quantity]]:
    """ Walk through all lines in metadata and interpret sections beginning with * """
    metadata = {}
    current_section = None
    n_image = 0

    # Walk through each line of metadata and extract sections and parameters
    for line in metadata_lines:
        if line.startswith('*'):
            # Lines starting with * indicate a new section
            current_section = line.strip('*')

            # "Ciao image list" appears multiple times, so we give them a number
            if current_section == 'Ciao image list':
                current_section = f'Ciao image list {n_image}'
                n_image += 1

            # Initialize an empty dict to contain metadata for each section
            metadata[current_section] = {}

        elif line.startswith('@'):
            # Line is CIAO parameter, interpret and add to current section
            ciaoparam = CIAOParameter(line)

            # Note: The "parameter" used as key is not always unique and can appear multiple times with different
            # group number. Usually not an issue for CIAO images, however.
            key = ciaoparam.parameter if not ciaoparam.group else f'{ciaoparam.group}:{ciaoparam.parameter}'
            metadata[current_section][key] = ciaoparam
        else:
            # Line is regular parameter, add to metadata of current section
            key, value = line.split(':', 1)
            metadata[current_section][key] = parse_parameter_value(value)

    if sort:
        for key, value in metadata.items():
            metadata[key] = dict(sorted(metadata[key].items()))

    return metadata


 def flatten_metadata(metadata: dict) -> dict:
    """ Construct a "flat metadata" for accessing with __getitem__.
    Avoid "Ciao image list" as it appears multiple times """
    non_repeating_keys = [key for key in metadata.keys() if 'Ciao image list' not in key]
    flat_metadata = {k: v for key in non_repeating_keys for k, v in metadata[key].items()}

    return flat_metadata


 def extract_ciao_images(metadata: dict, file_bytes: bytes) -> dict[str, CIAOImage]:
    """ Data for CIAO images are found using the metadata from the Ciao image sections in the metadata """
    images = {}
    image_sections = {k: v for k, v in metadata.items() if k.startswith('Ciao image')}
    for i, image_metadata in enumerate(image_sections.values()):
        image = CIAOImage(image_metadata, metadata, file_bytes)
        key = image_metadata['2:Image Data'].internal_designation
        images[key] = image

    return images


 def parse_parameter_value(value_str: str) -> str | int | float | Quantity | datetime | list[float, Quantity] | None:
    """ Parse parameters into number, string or physical quantity """

    # Value is None, return it
    if not value_str:
        return value_str

    # Strip whitespace and check for matches
    value_str = value_str.strip()
    match_numerical = RE_NUMERICAL.match(value_str)
    if match_numerical and match_numerical.group(2):
        # Value  is a quantity with unit
        unit = match_numerical.group(2)

        # A few units appear as e.g. log(Pa), replace with log_Pa etc.
        if '(' in unit:
            unit = unit.replace('(', '_').replace(')', '')
        try:
            return ureg.Quantity(float(match_numerical.group(1)), unit)
        except pint.UndefinedUnitError:
            print(f'Unit not recognized: {match_numerical.group(2)}, parameter not converted: {value_str}')
            return value_str

    elif match_numerical:
        # No unit detected, value is just number
        if '.' not in value_str:
            # No decimal, convert to integer
            return int(value_str)
        else:
            # Decimal present, convert to float
            return float(value_str)

    # Check if value is a list of numbers with possible unit
    match_multiple_numerical = RE_MULTIPLE_NUMERICAL.match(value_str)
    if match_multiple_numerical:
        # List of values separated by space, possibly with a unit
        values = match_multiple_numerical.group(1).split()
        values = [float(x) for x in values]

        if match_multiple_numerical.group(2):
            unit = match_multiple_numerical.group(2)
            unit = unit.replace('~m', 'µm') if unit else None
            values = Quantity(values, unit)

        return values

    # Try if value is a date
    try:
        value = datetime.strptime(value_str, '%I:%M:%S %p %a %b %d %Y')
        return value
    except ValueError:
        pass

    # No other matches, strip " and return value
    return value_str.strip('"')
	from __future__ import annotations

	import re
	import struct
	import warnings
	from dataclasses import dataclass
	from datetime import datetime
	from os import PathLike
	from pathlib import Path

	import numpy as np
	import pint
	from pint import UnitRegistry, Quantity

	# Regex for CIAO parameters (lines starting with \@ )
	RE_CIAO_PARAM = re.compile(
	r'^\\?@?(?:(?P<group>\d?):)?(?P<param>.): (?P<type>\w)\s?(?:\[(?P<softscale>.)\])?\s?(?:\((?P<hardscale>.)\))?\s(?P<hardval>.)$')

	# Define regex to identify numerical values
	RE_NUMERICAL = re.compile(r'^([+-]?\d+\.?\d*(?:[eE][+-]\d+)?)( 1?[^\d:]+)?$')
	RE_MULTIPLE_NUMERICAL = re.compile(r'^((?:(?<!\d)[+-]?\d+\.?\d*(?:e[+-]\d+)? ?)+)([^\d:]+)?$')

	# Integer size used when decoding data from raw bytestrings
	INTEGER_SIZE = 2 ** 32

	# Pint UnitRegistry handles physical quantities
	ureg = UnitRegistry()
	ureg.define(f'least_significant_bit = {INTEGER_SIZE} = LSB')
	ureg.define('arbitrary_units = [] = Arb')
	ureg.define('log_arbitrary_units = [] = log_Arb')
	ureg.define('log_volt = [] = log_V')
	ureg.define('log_pascal = [] = log_Pa')
	ureg.define('@alias degree = º') # The files use "Ordinal indicator": º instead of the actual degree symbol: °
	ureg.default_format = '~C'


	class SPMFile:
	""" Representation of an entire SPM file with images and metadata """

	def __init__(self, path: str \| PathLike):
	self.path = Path(path)
	self.metadata = {}
	self.images = {}
	self._flat_metadata = {}
	self._integer_size = INTEGER_SIZE

	self.load_spm()

	def __repr__(self) -> str:
	titles = [x for x in self.images.keys()]
	return f'SPM file: "{self.path.name}", {self["Date"]}. Images: {titles}'

	def __getitem__(self, item) -> tuple[int, float, str, Quantity]:
	""" Fetches values from the metadata when class is called like a dict """
	return self._flat_metadata[item]

	def load_spm(self):
	""" Load an SPM file and extract images and metadata """
	with open(self.path, 'rb') as f:
	file_bytes = f.read()

	# Extract lines and interpret metadata and images
	metadata_lines = extract_metadata_lines(file_bytes)
	metadata = interpret_metadata(metadata_lines)
	self.metadata = metadata

	# Construct a "flat metadata" for accessing with __getitem__.
	self._flat_metadata = flatten_metadata(metadata)

	# Load CIAO images
	self.images = extract_ciao_images(metadata, file_bytes)
	for key in self.images:
	setattr(self, key, self.images[key]) # Make CIAO images accessible as attributes

	@property
	def groups(self) -> dict[int \| None, dict[str]]:
	""" CIAO parameters ordered by group number """
	groups = {}
	for key, value in sorted(self._flat_metadata.items()):
	if isinstance(value, CIAOParameter):
	if value.group in groups.keys():
	groups[value.group].update({key.split(':', 1)[-1]: value})
	else:
	groups[value.group] = {}
	groups[value.group].update({key.split(':', 1)[-1]: value})

	return groups


	class CIAOImage:
	""" A CIAO image with metadata"""

	def __init__(self, image_metadata: dict, full_metadata: dict, file_bytes: bytes):
	self.width = None
	self.height = None
	self.data = None
	self.px_size_x, self.px_size_y = None, None
	self.x, self.y = None, None

	self.metadata = image_metadata \| flatten_metadata(full_metadata)

	# Data offset and length refer to the bytes of the original file including metadata
	data_start = image_metadata['Data offset']
	data_length = image_metadata['Data length']

	# Calculate the number of pixels in order to decode the bytestring.
	# Note: "Bytes/pixel" is defined in the metadata but byte lengths don't seem to follow the bytes/pixel it.
	n_rows, n_cols = image_metadata['Number of lines'], image_metadata['Samps/line']
	n_pixels = n_cols * n_rows

	# Extract relevant image data from the raw bytestring of the full file and decode the byte values
	# as signed 32-bit integers in little-endian (same as "least significant bit").
	# Note that this is despite documentation saying 16-bit signed int.
	# https://docs.python.org/3/library/struct.html#format-characters
	bytestring = file_bytes[data_start: data_start + data_length]
	pixel_values = struct.unpack(f'<{n_pixels}i', bytestring)

	# Save Z scale parameter from full metadata
	zscale_soft_scale = self.fetch_soft_scale_from_full_metadata('2:Z scale')
	self.metadata.update(zscale_soft_scale)
	self.scansize = full_metadata['Ciao scan list']['Scan Size']

	# Reorder image into a numpy array and calculate the physical value of each pixel.
	# Row order is reversed in stored data, so we flip up/down.
	self._raw_image = np.flipud(np.array(pixel_values).reshape(n_rows, n_cols))
	self.calculate_physical_units()
	self.title = self.metadata['2:Image Data'].internal_designation

	def fetch_soft_scale_from_full_metadata(self, key='2:Z scale') -> dict:
	soft_scale_key = self.metadata[key].sscale
	soft_scale_value = self.metadata[soft_scale_key].value

	return {soft_scale_key: soft_scale_value}

	@property
	def corrected_zscale(self):
	""" Returns the z-scale correction used to translate from "pixel value" to physical units in the image"""
	z_scale = self.metadata['2:Z scale']
	hard_value = z_scale.value
	soft_scale_key = z_scale.sscale
	soft_scale_value = self.metadata[soft_scale_key]

	# The "hard scale" is used to calculate the physical value. The hard scale given in the line must be ignored,
	# and a corrected one obtained by dividing the "Hard value" by the max range of the integer, it seems.
	# NOTE: Documentation says divide by 2^16, but 2^32 gives proper results...?
	corrected_hard_scale = hard_value / INTEGER_SIZE

	return corrected_hard_scale * soft_scale_value

	def calculate_physical_units(self):
	""" Calculate physical scale of image values """
	self.data = self._raw_image * self.corrected_zscale

	# Calculate pixel sizes in physical units
	# NOTE: This assumes "Scan Size" always represents the longest dimension of the image.
	n_rows, n_cols = self._raw_image.shape
	aspect_ratio = (max(self._raw_image.shape) / n_rows, max(self._raw_image.shape) / n_cols)

	# Calculate pixel sizes and
	self.px_size_y = self.scansize / (n_rows - 1) / aspect_ratio[0]
	self.px_size_x = self.scansize / (n_cols - 1) / aspect_ratio[1]

	self.height = (n_rows - 1) * self.px_size_y
	self.width = (n_cols - 1) * self.px_size_x

	self.y = np.linspace(0, self.height, n_rows)
	self.x = np.linspace(0, self.width, n_cols)

	def __array__(self) -> np.ndarray:
	warnings.filterwarnings("ignore", category=pint.UnitStrippedWarning)
	return self.data.__array__()

	def __array_ufunc__(self, ufunc, method, inputs, *kwargs):
	return getattr(self.data, '__array_ufunc__')(ufunc, method, inputs, *kwargs)

	def __getitem__(self, key) -> str \| int \| float \| Quantity:
	# Return metadata by exact key
	if key in self.metadata:
	return self.metadata[key]

	# If key is not found, try without group numbers in metadata keys
	merged_keys = {k.split(':', 1)[-1]: k for k in self.metadata.keys()}
	if key in merged_keys:
	return self.metadata[merged_keys[key]]
	else:
	raise KeyError(f'Key not found: {key}')

	def __repr__(self) -> str:
	reprstr = (f'{self.metadata["Data type"]} image "{self.title}" [{self.data.units}], '
	f'{self.data.shape} px = ({self.height.m:.1f}, {self.width.m:.1f}) {self.px_size_x.u}')
	return reprstr

	def __str__(self):
	return self.__repr__()

	def __getattr__(self, name):
	return getattr(self.data, name)

	def __add__(self, other):
	if isinstance(other, CIAOImage):
	return self.data + other.data
	else:
	return self.data + other

	def __radd__(self, other):
	return self.__add__(other)

	def __sub__(self, other):
	if isinstance(other, CIAOImage):
	return self.data - other.data
	else:
	return self.data - other

	def __rsub__(self, other):
	return self.__sub__(other)

	def __mul__(self, other):
	if isinstance(other, CIAOImage):
	return self.data * other.data
	else:
	return self.data * other

	def __rmul__(self, other):
	return self.__mul__(other)

	def __truediv__(self, other):
	if isinstance(other, CIAOImage):
	return self.data / other.data
	else:
	return self.data / other

	def __rtruediv__(self, other):
	return self.__truediv__(other)

	@property
	def extent(self) -> list[float]:
	ext = [0, self.width.magnitude, 0, self.height.magnitude]
	return ext

	@property
	def meshgrid(self) -> np.meshgrid:
	return np.meshgrid(self.x, self.y)


	@dataclass
	class CIAOParameter:
	r""" CIAO parameters are lines in the SPM file starting with \@ and have several "sub" parameters """
	group: int \| None
	parameter: str
	ptype: str
	value: float \| str \| Quantity \| None

	sscale: str
	internal_designation: str

	hscale: float \| None = None
	external_designation: str \| None = None

	def __init__(self, parameter_string: str):
	self._raw_string = parameter_string.lstrip('\\@')
	match = RE_CIAO_PARAM.match(parameter_string)
	if match:
	self.group = int(match.group('group')) if match.group('group') else None
	self.parameter = match.group('param')
	self.ptype = match.group('type')

	# "Soft scale" and "Internal designation" seem to be interchangeable
	self.sscale = parse_parameter_value(match.group('softscale'))
	self.internal_designation = self.sscale

	self.value = parse_parameter_value(match.group('hardval')) if match.group('hardval') else None

	if self.ptype in ['V', 'C']:
	# "Value" or "Scale" parameter
	self.hscale = parse_parameter_value(match.group('hardscale')) if match.group('hardscale') else None
	elif self.ptype == 'S':
	# "Select" parameter
	self.external_designation = parse_parameter_value(match.group('hardval'))
	else:
	raise ValueError(f'Not a recognized CIAO parameter object: {parameter_string}')

	def __getattr__(self, name):
	return getattr(self.value, name)

	def __repr__(self) -> str:
	return self.ciao_string

	def __str__(self) -> str:
	return f'{self.value}'

	def __mul__(self, other) -> int \| float \| Quantity:
	if isinstance(other, CIAOParameter):
	return self.value * other.value
	else:
	return self.value * other

	def __truediv__(self, other) -> int \| float \| Quantity:
	if isinstance(other, CIAOParameter):
	return self.value / other.value
	else:
	return self.value / other

	def __add__(self, other) -> int \| float \| Quantity:
	if isinstance(other, CIAOParameter):
	return self.value + other.value
	else:
	return self.value + other

	def __sub__(self, other) -> int \| float \| Quantity:
	if isinstance(other, CIAOParameter):
	return self.value - other.value
	else:
	return self.value - other

	@property
	def ciao_string(self, backslash: bool = False):
	start = '\\@' if backslash else ''
	group_string = f'{self.group}:' if self.group else ''
	hscale_string = f' ({self.hscale})' if self.hscale else ''
	sscale_string = f' [{self.sscale}]' if self.sscale or self.ptype == 'S' else ''
	if self.ptype and self.ptype == 'S':
	value_string = f' "{self.value}"'
	elif self.ptype and self.ptype in ['V', 'C']:
	value_string = f' {self.value}'
	else:
	value_string = ''
	ciao_string = f'{start}{group_string}{self.parameter}: {self.ptype}{sscale_string}{hscale_string}{value_string}'

	return ciao_string


	def extract_metadata_lines(spm_bytestring: bytes) -> list[str]:
	""" Extract the metadata section between "File list" and "File list end" and decode and cleanup the lines """
	# Extract lines as list of bytestrings
	file_lines = spm_bytestring.splitlines()

	start_index, end_index = None, None
	for i, line in enumerate(file_lines):
	if line.strip() == b'\\*File list':
	start_index = i
	elif line.strip() == b'\\*File list end':
	end_index = i
	break

	if start_index is not None and end_index is not None:
	# Extract the identified lines between start and end. Decode strings and strip unwanted characters.
	metadata_lines = [x.decode('latin-1').lstrip('\\') for x in file_lines[start_index:end_index]]
	return metadata_lines
	else:
	raise ValueError('Beginning or end of "\\*File list" missing, cannot extract metadata')


	def interpret_metadata(metadata_lines: list[str], sort=False) -> dict[str, dict[str, int, float, str, Quantity]]:
	""" Walk through all lines in metadata and interpret sections beginning with * """
	metadata = {}
	current_section = None
	n_image = 0

	# Walk through each line of metadata and extract sections and parameters
	for line in metadata_lines:
	if line.startswith('*'):
	# Lines starting with * indicate a new section
	current_section = line.strip('*')

	# "Ciao image list" appears multiple times, so we give them a number
	if current_section == 'Ciao image list':
	current_section = f'Ciao image list {n_image}'
	n_image += 1

	# Initialize an empty dict to contain metadata for each section
	metadata[current_section] = {}

	elif line.startswith('@'):
	# Line is CIAO parameter, interpret and add to current section
	ciaoparam = CIAOParameter(line)

	# Note: The "parameter" used as key is not always unique and can appear multiple times with different
	# group number. Usually not an issue for CIAO images, however.
	key = ciaoparam.parameter if not ciaoparam.group else f'{ciaoparam.group}:{ciaoparam.parameter}'
	metadata[current_section][key] = ciaoparam
	else:
	# Line is regular parameter, add to metadata of current section
	key, value = line.split(':', 1)
	metadata[current_section][key] = parse_parameter_value(value)

	if sort:
	for key, value in metadata.items():
	metadata[key] = dict(sorted(metadata[key].items()))

	return metadata


	def flatten_metadata(metadata: dict) -> dict:
	""" Construct a "flat metadata" for accessing with __getitem__.
	Avoid "Ciao image list" as it appears multiple times """
	non_repeating_keys = [key for key in metadata.keys() if 'Ciao image list' not in key]
	flat_metadata = {k: v for key in non_repeating_keys for k, v in metadata[key].items()}

	return flat_metadata


	def extract_ciao_images(metadata: dict, file_bytes: bytes) -> dict[str, CIAOImage]:
	""" Data for CIAO images are found using the metadata from the Ciao image sections in the metadata """
	images = {}
	image_sections = {k: v for k, v in metadata.items() if k.startswith('Ciao image')}
	for i, image_metadata in enumerate(image_sections.values()):
	image = CIAOImage(image_metadata, metadata, file_bytes)
	key = image_metadata['2:Image Data'].internal_designation
	images[key] = image

	return images


	def parse_parameter_value(value_str: str) -> str \| int \| float \| Quantity \| datetime \| list[float, Quantity] \| None:
	""" Parse parameters into number, string or physical quantity """

	# Value is None, return it
	if not value_str:
	return value_str

	# Strip whitespace and check for matches
	value_str = value_str.strip()
	match_numerical = RE_NUMERICAL.match(value_str)
	if match_numerical and match_numerical.group(2):
	# Value is a quantity with unit
	unit = match_numerical.group(2)

	# A few units appear as e.g. log(Pa), replace with log_Pa etc.
	if '(' in unit:
	unit = unit.replace('(', '_').replace(')', '')
	try:
	return ureg.Quantity(float(match_numerical.group(1)), unit)
	except pint.UndefinedUnitError:
	print(f'Unit not recognized: {match_numerical.group(2)}, parameter not converted: {value_str}')
	return value_str

	elif match_numerical:
	# No unit detected, value is just number
	if '.' not in value_str:
	# No decimal, convert to integer
	return int(value_str)
	else:
	# Decimal present, convert to float
	return float(value_str)

	# Check if value is a list of numbers with possible unit
	match_multiple_numerical = RE_MULTIPLE_NUMERICAL.match(value_str)
	if match_multiple_numerical:
	# List of values separated by space, possibly with a unit
	values = match_multiple_numerical.group(1).split()
	values = [float(x) for x in values]

	if match_multiple_numerical.group(2):
	unit = match_multiple_numerical.group(2)
	unit = unit.replace('~m', 'µm') if unit else None
	values = Quantity(values, unit)

	return values

	# Try if value is a date
	try:
	value = datetime.strptime(value_str, '%I:%M:%S %p %a %b %d %Y')
	return value
	except ValueError:
	pass

	# No other matches, strip " and return value
	return value_str.strip('"')