Created
May 21, 2019 07:00
-
-
Save rbhatia46/67506bd1e2379fe1edd0b9deefffc7fb to your computer and use it in GitHub Desktop.
Converts data with categorical values as string into the right format for LIME, with categorical values as integers labels.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| def convert_to_lime_format(X, categorical_names, col_names=None, invert=False): | |
| """Converts data with categorical values as string into the right format | |
| for LIME, with categorical values as integers labels. | |
| It takes categorical_names, the same dictionary that has to be passed | |
| to LIME to ensure consistency. | |
| col_names and invert allow to rebuild the original dataFrame from | |
| a numpy array in LIME format to be passed to a Pipeline or sklearn | |
| OneHotEncoder | |
| """ | |
| # If the data isn't a dataframe, we need to be able to build it | |
| if not isinstance(X, pd.DataFrame): | |
| X_lime = pd.DataFrame(X, columns=col_names) | |
| else: | |
| X_lime = X.copy() | |
| for k, v in categorical_names.items(): | |
| if not invert: | |
| label_map = { | |
| str_label: int_label for int_label, str_label in enumerate(v) | |
| } | |
| else: | |
| label_map = { | |
| int_label: str_label for int_label, str_label in enumerate(v) | |
| } | |
| X_lime.iloc[:, k] = X_lime.iloc[:, k].map(label_map) | |
| return X_lime |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment