jthielen · September 4, 2019 18:46
diff --git a/StringPreprocessor.py b/StringPreprocessor.py
 class StringPreprocessor(object):
    """Constructor for a extensible unit expression string preprocessor."""

    # List of default replacement pairs
    _default_replacements = [(",", ""),
                             (" per ", "/"),
                             ("^", "**")]

    # List of default regex substitution pairs.
    _default_subs_re = [('\N{DEGREE SIGN}', " degree"),
                        (r"([\w\.\-\+\*\\\^])\s+", r"\1 "), # merge multiple spaces
                        (r"({}) squared", r"\1**2"),  # Handle square and cube
                        (r"({}) cubed", r"\1**3"),
                        (r"cubic ({})", r"\1**3"),
                        (r"square ({})", r"\1**2"),
                        (r"sq ({})", r"\1**2"),
                        (r"\b([0-9]+\.?[0-9]*)(?=[e|E][a-zA-Z]|[a-df-zA-DF-Z])", r"\1*"),  # Handle numberLetter for multiplication
                        (r"([\w\.\-])\s+(?=\w)", r"\1*"),  # Handle space for multiplication
                ]

    # Define pretty format translation and regexes
    _pretty_table = maketrans('⁰¹²³⁴⁵⁶⁷⁸⁹·⁻', '0123456789*-')
    _pretty_exp_re = re.compile(r"⁻?[⁰¹²³⁴⁵⁶⁷⁸⁹]+(?:\.[⁰¹²³⁴⁵⁶⁷⁸⁹]*)?")

    def __init__(self):
        # Instantiate by compiling default regexes and setting replacements list from defaults
        self.reset_regex_subs()
        self.reset_replacements()

    def __call__(self, input_string):
        """Preprocess input string according to defined replacements and regexes.

        Processing occurs in the following order:

        1) String replacements as defined by tuples in the replacements
        2) Regex substitutions (both default and those added by `add_regex_sub()`)
        3) Pretty text format character handling
        """
        # String replacements
        for current, replacement in self._replacements:
            input_string = input_string.replace(current, replacement)

        # Regex substitutions
        for a, b in self._compiled_subs_re:
            input_string = a.sub(b, input_string)

        # Replace pretty format characters
        for pretty_exp in self._pretty_exp_re.findall(input_string):
            exp = '**' + pretty_exp.translate(self._pretty_table)
            input_string = input_string.replace(pretty_exp, exp)
        input_string = input_string.translate(self._pretty_table)

        return input_string

    def add_replacement(self, current, replacement):
        """Add given replacement pair to the replacement list."""
        self._replacements.append((current, replacement))

    def reset_replacements(self):
        """Reset replacement list to default."""
        self._replacements = self._default_replacements

    def add_regex_sub(self, regex_string, replacement):
        """Compile the given regex_string and append it to the regex sub list."""
        self._compiled_subs_re.append((re.compile(regex_string), replacement))

    def reset_regex_subs(self):
        """Reset regex substitution list to default."""
        self._compiled_subs_re = [(re.compile(a.format(r"[_a-zA-Z][_a-zA-Z0-9]*")), b)
                                  for a, b in self._default_subs_re]
	class StringPreprocessor(object):
	"""Constructor for a extensible unit expression string preprocessor."""

	# List of default replacement pairs
	_default_replacements = [(",", ""),
	(" per ", "/"),
	("^", "**")]

	# List of default regex substitution pairs.
	_default_subs_re = [('\N{DEGREE SIGN}', " degree"),
	(r"([\w\.\-\+\*\\\^])\s+", r"\1 "), # merge multiple spaces
	(r"({}) squared", r"\1**2"), # Handle square and cube
	(r"({}) cubed", r"\1**3"),
	(r"cubic ({})", r"\1**3"),
	(r"square ({})", r"\1**2"),
	(r"sq ({})", r"\1**2"),
	(r"\b([0-9]+\.?[0-9])(?=[e\|E][a-zA-Z]\|[a-df-zA-DF-Z])", r"\1"), # Handle numberLetter for multiplication
	(r"([\w\.\-])\s+(?=\w)", r"\1*"), # Handle space for multiplication
	]

	# Define pretty format translation and regexes
	_pretty_table = maketrans('⁰¹²³⁴⁵⁶⁷⁸⁹·⁻', '0123456789*-')
	_pretty_exp_re = re.compile(r"⁻?[⁰¹²³⁴⁵⁶⁷⁸⁹]+(?:\.[⁰¹²³⁴⁵⁶⁷⁸⁹]*)?")

	def __init__(self):
	# Instantiate by compiling default regexes and setting replacements list from defaults
	self.reset_regex_subs()
	self.reset_replacements()

	def __call__(self, input_string):
	"""Preprocess input string according to defined replacements and regexes.

	Processing occurs in the following order:

	1) String replacements as defined by tuples in the replacements
	2) Regex substitutions (both default and those added by `add_regex_sub()`)
	3) Pretty text format character handling
	"""
	# String replacements
	for current, replacement in self._replacements:
	input_string = input_string.replace(current, replacement)

	# Regex substitutions
	for a, b in self._compiled_subs_re:
	input_string = a.sub(b, input_string)

	# Replace pretty format characters
	for pretty_exp in self._pretty_exp_re.findall(input_string):
	exp = '**' + pretty_exp.translate(self._pretty_table)
	input_string = input_string.replace(pretty_exp, exp)
	input_string = input_string.translate(self._pretty_table)

	return input_string

	def add_replacement(self, current, replacement):
	"""Add given replacement pair to the replacement list."""
	self._replacements.append((current, replacement))

	def reset_replacements(self):
	"""Reset replacement list to default."""
	self._replacements = self._default_replacements

	def add_regex_sub(self, regex_string, replacement):
	"""Compile the given regex_string and append it to the regex sub list."""
	self._compiled_subs_re.append((re.compile(regex_string), replacement))

	def reset_regex_subs(self):
	"""Reset regex substitution list to default."""
	self._compiled_subs_re = [(re.compile(a.format(r"[_a-zA-Z][_a-zA-Z0-9]*")), b)
	for a, b in self._default_subs_re]