jbn · October 16, 2015 20:52
diff --git a/nan_to_na.py b/nan_to_na.py
 import os
 import numpy as np
 import pandas as pd
 import rpy2

 from rpy2 import robjects
 from rpy2.robjects.packages import importr


 AMELIA = importr("Amelia")

 NAN_TO_NA_MAP = {np.dtype('float'): robjects.NA_Real,
                 np.dtype('int'): robjects.NA_Integer,
                 np.dtype('O'): robjects.NA_Character}

 DTYPE_TO_F = {np.dtype('float'): robjects.FloatVector,
              np.dtype('int'): robjects.IntVector,
              np.dtype('O'): robjects.StrVector}


 def can_be_int(xs):
    for x in xs:
        try:
            if not np.isnan(x): 
                assert int(x) == x 
        except:
            return False
    return True


 # The conversion from a pandas DataFrame to a R data.frame doesn't work well 
 # for Amelia imputation. Amelia imputes given a NA, not a NAN. In R, the 
 # former indicates a missing value, the later, a bad operation. Unfortunately, 
 # Pandas doesn't distinguish between the two. So, this function builds an R 
 # data.frame from a Pandas' one, assuming you want NA not NAN.
 def df_py_nan_to_r_na(df):
    d = {}
    for col in df.columns:
        xs = df[col]
        
        # There are no int-based numpy arrays. Everything must be a 
        # float, if it is numeric. R has integer-based arrays. Where 
        # possible, make the array integer based.
        dtype = xs.dtype
        if dtype == np.dtype('float') and can_be_int(xs):
            dtype = np.dtype('int')
            
        R_NA = NAN_TO_NA_MAP[dtype]
        xs = [R_NA if c else x for c, x in zip(xs.isnull(), xs)]
        d[col] = DTYPE_TO_F[dtype](xs)
        
    return robjects.DataFrame(d)
	import os
	import numpy as np
	import pandas as pd
	import rpy2

	from rpy2 import robjects
	from rpy2.robjects.packages import importr


	AMELIA = importr("Amelia")

	NAN_TO_NA_MAP = {np.dtype('float'): robjects.NA_Real,
	np.dtype('int'): robjects.NA_Integer,
	np.dtype('O'): robjects.NA_Character}

	DTYPE_TO_F = {np.dtype('float'): robjects.FloatVector,
	np.dtype('int'): robjects.IntVector,
	np.dtype('O'): robjects.StrVector}


	def can_be_int(xs):
	for x in xs:
	try:
	if not np.isnan(x):
	assert int(x) == x
	except:
	return False
	return True


	# The conversion from a pandas DataFrame to a R data.frame doesn't work well
	# for Amelia imputation. Amelia imputes given a NA, not a NAN. In R, the
	# former indicates a missing value, the later, a bad operation. Unfortunately,
	# Pandas doesn't distinguish between the two. So, this function builds an R
	# data.frame from a Pandas' one, assuming you want NA not NAN.
	def df_py_nan_to_r_na(df):
	d = {}
	for col in df.columns:
	xs = df[col]

	# There are no int-based numpy arrays. Everything must be a
	# float, if it is numeric. R has integer-based arrays. Where
	# possible, make the array integer based.
	dtype = xs.dtype
	if dtype == np.dtype('float') and can_be_int(xs):
	dtype = np.dtype('int')

	R_NA = NAN_TO_NA_MAP[dtype]
	xs = [R_NA if c else x for c, x in zip(xs.isnull(), xs)]
	d[col] = DTYPE_TO_F[dtype](xs)

	return robjects.DataFrame(d)