Last active
September 4, 2019 18:46
-
-
Save jthielen/57026fee4dc6599a3fb646000b8ca748 to your computer and use it in GitHub Desktop.
Idea for extensible unit string preprocessor for pint
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class StringPreprocessor(object): | |
"""Constructor for a extensible unit expression string preprocessor.""" | |
# List of default replacement pairs | |
_default_replacements = [(",", ""), | |
(" per ", "/"), | |
("^", "**")] | |
# List of default regex substitution pairs. | |
_default_subs_re = [('\N{DEGREE SIGN}', " degree"), | |
(r"([\w\.\-\+\*\\\^])\s+", r"\1 "), # merge multiple spaces | |
(r"({}) squared", r"\1**2"), # Handle square and cube | |
(r"({}) cubed", r"\1**3"), | |
(r"cubic ({})", r"\1**3"), | |
(r"square ({})", r"\1**2"), | |
(r"sq ({})", r"\1**2"), | |
(r"\b([0-9]+\.?[0-9]*)(?=[e|E][a-zA-Z]|[a-df-zA-DF-Z])", r"\1*"), # Handle numberLetter for multiplication | |
(r"([\w\.\-])\s+(?=\w)", r"\1*"), # Handle space for multiplication | |
] | |
# Define pretty format translation and regexes | |
_pretty_table = maketrans('⁰¹²³⁴⁵⁶⁷⁸⁹·⁻', '0123456789*-') | |
_pretty_exp_re = re.compile(r"⁻?[⁰¹²³⁴⁵⁶⁷⁸⁹]+(?:\.[⁰¹²³⁴⁵⁶⁷⁸⁹]*)?") | |
def __init__(self): | |
# Instantiate by compiling default regexes and setting replacements list from defaults | |
self.reset_regex_subs() | |
self.reset_replacements() | |
def __call__(self, input_string): | |
"""Preprocess input string according to defined replacements and regexes. | |
Processing occurs in the following order: | |
1) String replacements as defined by tuples in the replacements | |
2) Regex substitutions (both default and those added by `add_regex_sub()`) | |
3) Pretty text format character handling | |
""" | |
# String replacements | |
for current, replacement in self._replacements: | |
input_string = input_string.replace(current, replacement) | |
# Regex substitutions | |
for a, b in self._compiled_subs_re: | |
input_string = a.sub(b, input_string) | |
# Replace pretty format characters | |
for pretty_exp in self._pretty_exp_re.findall(input_string): | |
exp = '**' + pretty_exp.translate(self._pretty_table) | |
input_string = input_string.replace(pretty_exp, exp) | |
input_string = input_string.translate(self._pretty_table) | |
return input_string | |
def add_replacement(self, current, replacement): | |
"""Add given replacement pair to the replacement list.""" | |
self._replacements.append((current, replacement)) | |
def reset_replacements(self): | |
"""Reset replacement list to default.""" | |
self._replacements = self._default_replacements | |
def add_regex_sub(self, regex_string, replacement): | |
"""Compile the given regex_string and append it to the regex sub list.""" | |
self._compiled_subs_re.append((re.compile(regex_string), replacement)) | |
def reset_regex_subs(self): | |
"""Reset regex substitution list to default.""" | |
self._compiled_subs_re = [(re.compile(a.format(r"[_a-zA-Z][_a-zA-Z0-9]*")), b) | |
for a, b in self._default_subs_re] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment