jeremy-rutman · October 9, 2019 10:37
diff --git a/read_csv_multiformat.py b/read_csv_multiformat.py
 # Uses chardet if encoding is not specified, reading the first kB to determine encoding. 
 # This obviates the unfortunate situation wherein reading a utf-16 as utf-8 does not generally throw an error, 
 # but will munge up the data.

 import chardet
 import pandas as pd

 def read_csv_multiformat(f,**kwargs):
    if not 'encoding' in kwargs:
        with open(f, 'rb') as fp:
            bin_dat = fp.read(1000)
            char_info = chardet.detect(bin_dat)
            print(char_info)
            if 'encoding' in char_info and 'confidence' in char_info and char_info['confidence'] > 0.6:
                kwargs['encoding'] = char_info['encoding']
    df = pd.read_csv(f,**kwargs)
    return df

 f = 'utf16.csv'
 df=read_csv_multiformat(f,sep='|')
 print(df.head())
	# Uses chardet if encoding is not specified, reading the first kB to determine encoding.
	# This obviates the unfortunate situation wherein reading a utf-16 as utf-8 does not generally throw an error,
	# but will munge up the data.

	import chardet
	import pandas as pd

	def read_csv_multiformat(f,**kwargs):
	if not 'encoding' in kwargs:
	with open(f, 'rb') as fp:
	bin_dat = fp.read(1000)
	char_info = chardet.detect(bin_dat)
	print(char_info)
	if 'encoding' in char_info and 'confidence' in char_info and char_info['confidence'] > 0.6:
	kwargs['encoding'] = char_info['encoding']
	df = pd.read_csv(f,**kwargs)
	return df

	f = 'utf16.csv'
	df=read_csv_multiformat(f,sep='\|')
	print(df.head())
No results found