Last active
September 26, 2019 13:19
-
-
Save wellic/a679da2858bda241f5a97b223b0fe57e to your computer and use it in GitHub Desktop.
Convert float and Normilize string for converting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import unicodedata | |
import chardet | |
def replace_some_unicode_chars(frac_str): | |
try: | |
if isinstance(frac_str, str): | |
frac_str = frac_str.replace("⁄", "/") | |
frac_str = frac_str.replace("×", "x") | |
frac_str = frac_str.replace("�", " ") | |
frac_str = frac_str.replace("”", '"') | |
frac_str = frac_str.replace("′′", '"') | |
frac_str = frac_str.replace("½", ' 1/2') | |
frac_str = frac_str.replace("¼", ' 1/4') | |
frac_str = frac_str.replace("¾", ' 3/4') | |
frac_str = frac_str.replace("⅔", ' 2/3') | |
frac_str = frac_str.replace("⅕", ' 1/5') | |
frac_str = frac_str.replace("⅖", ' 2/5') | |
frac_str = frac_str.replace("⅗", ' 3/5') | |
frac_str = frac_str.replace("⅘", ' 4/5') | |
frac_str = frac_str.replace("⅙", ' 1/6') | |
frac_str = frac_str.replace("⅚", ' 5/6') | |
frac_str = frac_str.replace("⅛", ' 1/8') | |
frac_str = frac_str.replace("⅜", ' 3/8') | |
frac_str = frac_str.replace("⅝", ' 5/8') | |
frac_str = frac_str.replace("⅞", ' 7/8') | |
except Exception: | |
pass | |
finally: | |
return frac_str | |
def normalize_frac_str(frac_str, c1='latin-1', c2='utf-8'): | |
''' | |
Example: https://repl.it/NH2p/14 | |
:param frac_str: | |
:param c1: | |
:param c2: | |
:return: | |
''' | |
try: | |
try: | |
if not frac_str: | |
return None | |
if not isinstance(frac_str, bytes): | |
frac_str = replace_some_unicode_chars(frac_str) | |
new_frac_str = bytes(frac_str, c1) | |
d = chardet.detect(new_frac_str) | |
if d['encoding'] not in ['latin-1', 'ISO-8859-1', 'ascii']: | |
frac_str = new_frac_str | |
except ValueError: | |
pass | |
if not isinstance(frac_str, str): | |
frac_str = frac_str.decode(c2) | |
frac_str = unicodedata.normalize("NFKC", frac_str) | |
frac_str = re.sub('\s+', ' ', frac_str) | |
frac_str = replace_some_unicode_chars(frac_str) | |
except ValueError: | |
return str(frac_str) | |
return frac_str | |
def get_first_number_from_text(frac_str): | |
''' | |
Example: https://repl.it/NH2p/14 | |
:param frac_str: | |
:return: | |
''' | |
try: | |
try: | |
frac_str = normalize_frac_str(frac_str) | |
frac_str = float(frac_str) | |
except ValueError: | |
frac_str = re.sub('[^\s\d\/.,]+', ' ', frac_str) | |
frac_str = re.sub('[,]+', '.', frac_str) | |
frac_str = re.sub('[.]+', '.', frac_str) | |
frac_str = frac_str.strip() | |
if '/' in frac_str: | |
num, denom = frac_str.split('/') | |
num = num.strip() | |
denom = denom.strip() | |
try: | |
leading, num = re.split('\s+', num) | |
whole = float(leading) | |
except ValueError: | |
whole = 0 | |
frac = float(num) / float(denom) | |
frac_str = whole - frac if whole < 0 else whole + frac | |
else: | |
frac_str = re.sub('\s+', '', frac_str) | |
frac_str = float(frac_str) | |
except ValueError: | |
frac_str = None | |
finally: | |
return frac_str | |
def get_unit_str_from_raw(raw_str): | |
unit_str = None | |
if raw_str is None: | |
return unit_str | |
raw_str = str(raw_str).strip() | |
if len(raw_str): | |
unit_str = raw_str | |
return unit_str |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Result: