Created
December 31, 2022 17:17
-
-
Save Stefan-Code/f44e78b57a1ba93cbc3b095205197906 to your computer and use it in GitHub Desktop.
Convert strings with SI prefixes to a floating point number
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def si_parse(s): | |
""" | |
Convert a string `s` with SI prefix scaling to a float. | |
Example: | |
>>> si_parse('1k') | |
1000.0 | |
>>> si_parse('1000 mm') | |
1.0 | |
>>> si_parse('1000 m') # m is interpreted as prefix, not unit! | |
1.0 | |
>>> si_parse('2TB') | |
2000000000000.0 | |
>>> si_parse('-1.5μm') | |
-1.5e-06 | |
""" | |
si_str_regex = r'(?P<value>[-+]?[0-9]*\.?[0-9]+)(?:\s*(?P<si_prefix>[yzafpnμumkMGTPEZY])(?P<unit>[A-z]*))?' | |
si_prefixes = ['y', 'z', 'a', 'f', 'p', 'n', 'u', 'm', None, 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'] | |
# index of the (empty) unity scaling prefix (encoded as None) | |
# indices to the left scale with <1, indices to the right with >1 | |
index_offset = 8 | |
# handle unicode \mu | |
s = s.replace('μ', 'u') | |
match = re.match(si_str_regex, s).groupdict() | |
value = float(match['value']) | |
# if there is no SI prefix in the string, match['si_prefix'] is None, | |
# which is found in si_prefixes at the correct index, leading to scaling = 1. | |
# factor of 3 because the steps between SI prefixes scale by 1000 (instead of 10) | |
scaling = 10 ** (3 * (si_prefixes.index(match['si_prefix']) - index_offset)) | |
return scaling * value |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment