Eligijus112 · October 6, 2022 19:48
diff --git a/dummy_vars.py b/dummy_vars.py
 import pandas as pd 
 from sklearn.preprocessing import OneHotEncoder
 import numpy as np

 # Defining the function for dummy creation 
 def create_dummy(df: pd.DataFrame, dummy_var_list: list) -> Tuple:
    """
    Creates dummy variables for the variables in dummy_var_list

    Returns a tuple of the following
        * df - The dataframe with the dummy variables
        * dummy_var_list - The list of dummy variables created
    """
    # Placeholder for the dummy variables
    added_features = []
    for var in dummy_var_list:
        dummy = pd.get_dummies(df[var], prefix=var, drop_first=True)
        
        # Adding the new features to list 
        added_features.extend(dummy.columns)

        # Adding the dummy variables to the dataframe
        df = pd.concat([df, dummy], axis=1)
        df.drop(var, axis=1, inplace=True)

    # Returning the dataframe 
    return df, added_features

 # Defining a custom label encoding function 
 def custom_transform(
    enc: OneHotEncoder, 
    x: np.array, 
    prefix: str
    ) -> pd.DataFrame:
    """
    Applies a custom transformation to the data by 
    appending the created dummies to the dataframe
    """
    # Transforming the data
    out = enc.transform(x.reshape(-1, 1))

    # Getting the transformed values
    out_values = enc.get_feature_names_out().tolist()

    # Adding the names of the feature as a prefix
    out_values = [f"{prefix}_{value.split('_')[-1]}" for value in out_values]

    # Converting to a dataframe
    out = pd.DataFrame(out.toarray(), columns=out_values)

    # Changing the datatype to uint8
    out = out.astype('uint8')

    # Returning the transformed data
    return out
	import pandas as pd
	from sklearn.preprocessing import OneHotEncoder
	import numpy as np

	# Defining the function for dummy creation
	def create_dummy(df: pd.DataFrame, dummy_var_list: list) -> Tuple:
	"""
	Creates dummy variables for the variables in dummy_var_list

	Returns a tuple of the following
	* df - The dataframe with the dummy variables
	* dummy_var_list - The list of dummy variables created
	"""
	# Placeholder for the dummy variables
	added_features = []
	for var in dummy_var_list:
	dummy = pd.get_dummies(df[var], prefix=var, drop_first=True)

	# Adding the new features to list
	added_features.extend(dummy.columns)

	# Adding the dummy variables to the dataframe
	df = pd.concat([df, dummy], axis=1)
	df.drop(var, axis=1, inplace=True)

	# Returning the dataframe
	return df, added_features

	# Defining a custom label encoding function
	def custom_transform(
	enc: OneHotEncoder,
	x: np.array,
	prefix: str
	) -> pd.DataFrame:
	"""
	Applies a custom transformation to the data by
	appending the created dummies to the dataframe
	"""
	# Transforming the data
	out = enc.transform(x.reshape(-1, 1))

	# Getting the transformed values
	out_values = enc.get_feature_names_out().tolist()

	# Adding the names of the feature as a prefix
	out_values = [f"{prefix}_{value.split('_')[-1]}" for value in out_values]

	# Converting to a dataframe
	out = pd.DataFrame(out.toarray(), columns=out_values)

	# Changing the datatype to uint8
	out = out.astype('uint8')

	# Returning the transformed data
	return out