Skip to content

Instantly share code, notes, and snippets.

@wellic
Last active September 26, 2019 13:19
Show Gist options
  • Save wellic/a679da2858bda241f5a97b223b0fe57e to your computer and use it in GitHub Desktop.
Save wellic/a679da2858bda241f5a97b223b0fe57e to your computer and use it in GitHub Desktop.
Convert float and Normilize string for converting
import re
import unicodedata
import chardet
def replace_some_unicode_chars(frac_str):
try:
if isinstance(frac_str, str):
frac_str = frac_str.replace("⁄", "/")
frac_str = frac_str.replace("×", "x")
frac_str = frac_str.replace("�", " ")
frac_str = frac_str.replace("”", '"')
frac_str = frac_str.replace("′′", '"')
frac_str = frac_str.replace("½", ' 1/2')
frac_str = frac_str.replace("¼", ' 1/4')
frac_str = frac_str.replace("¾", ' 3/4')
frac_str = frac_str.replace("⅔", ' 2/3')
frac_str = frac_str.replace("⅕", ' 1/5')
frac_str = frac_str.replace("⅖", ' 2/5')
frac_str = frac_str.replace("⅗", ' 3/5')
frac_str = frac_str.replace("⅘", ' 4/5')
frac_str = frac_str.replace("⅙", ' 1/6')
frac_str = frac_str.replace("⅚", ' 5/6')
frac_str = frac_str.replace("⅛", ' 1/8')
frac_str = frac_str.replace("⅜", ' 3/8')
frac_str = frac_str.replace("⅝", ' 5/8')
frac_str = frac_str.replace("⅞", ' 7/8')
except Exception:
pass
finally:
return frac_str
def normalize_frac_str(frac_str, c1='latin-1', c2='utf-8'):
'''
Example: https://repl.it/NH2p/14
:param frac_str:
:param c1:
:param c2:
:return:
'''
try:
try:
if not frac_str:
return None
if not isinstance(frac_str, bytes):
frac_str = replace_some_unicode_chars(frac_str)
new_frac_str = bytes(frac_str, c1)
d = chardet.detect(new_frac_str)
if d['encoding'] not in ['latin-1', 'ISO-8859-1', 'ascii']:
frac_str = new_frac_str
except ValueError:
pass
if not isinstance(frac_str, str):
frac_str = frac_str.decode(c2)
frac_str = unicodedata.normalize("NFKC", frac_str)
frac_str = re.sub('\s+', ' ', frac_str)
frac_str = replace_some_unicode_chars(frac_str)
except ValueError:
return str(frac_str)
return frac_str
def get_first_number_from_text(frac_str):
'''
Example: https://repl.it/NH2p/14
:param frac_str:
:return:
'''
try:
try:
frac_str = normalize_frac_str(frac_str)
frac_str = float(frac_str)
except ValueError:
frac_str = re.sub('[^\s\d\/.,]+', ' ', frac_str)
frac_str = re.sub('[,]+', '.', frac_str)
frac_str = re.sub('[.]+', '.', frac_str)
frac_str = frac_str.strip()
if '/' in frac_str:
num, denom = frac_str.split('/')
num = num.strip()
denom = denom.strip()
try:
leading, num = re.split('\s+', num)
whole = float(leading)
except ValueError:
whole = 0
frac = float(num) / float(denom)
frac_str = whole - frac if whole < 0 else whole + frac
else:
frac_str = re.sub('\s+', '', frac_str)
frac_str = float(frac_str)
except ValueError:
frac_str = None
finally:
return frac_str
def get_unit_str_from_raw(raw_str):
unit_str = None
if raw_str is None:
return unit_str
raw_str = str(raw_str).strip()
if len(raw_str):
unit_str = raw_str
return unit_str
@wellic
Copy link
Author

wellic commented Oct 24, 2017

Result:

3 1/4'' x 13 5/8 | 3 1/4'' x 13 5/8
3 1/4'' x 13 5/8 | 3 1/4'' � 13 5/8
3 1/4'' x 13 5/8 | b"3 1/4'' \xc3\x97 13 \xe2\x85\x9d"
3 1/4'' x 13 5/8 | 3 1/4'' x 13 �
3 1/4'' x 13 5/8 | 3 1/4'' × 13 ⅝

Convert to float
13.625 | � 13 5/8 incehs
13.625 | x 13 5/8 incehs
13.625 | b' \xc3\x97 13 \xe2\x85\x9d incehs'
13.625 | x 13 � incehs
13.625 | × 13 ⅝ incehs

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment