This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| prediction1, bias1, contributions1 = ti.predict(rf, np.array([selected_df[0]]), joint_contribution=True) | |
| prediction2, bias2, contributions2 = ti.predict(rf, np.array([selected_df[1]]), joint_contribution=True) | |
| aggregated_contributions1 = utils.aggregated_contribution(contributions1) | |
| aggregated_contributions2 = utils.aggregated_contribution(contributions2) | |
| res = [] | |
| for k in set(aggregated_contributions1.keys()).union( | |
| set(aggregated_contributions2.keys())): | |
| res.append(([X_train.columns[index] for index in k] , |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import lime | |
| import lime.lime_tabular | |
| explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, | |
| mode = 'regression', | |
| feature_names = X_train.columns, | |
| categorical_features = [3], | |
| categorical_names = ['CHAS'], | |
| discretize_continuous = True) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| mat = scipy.io.loadmat('cover.mat') | |
| X = pd.DataFrame(mat['X']) | |
| y = pd.Series([x[0] for x in mat['y']]) | |
| # define % of anomalies | |
| anomalies_ratio = 0.009 | |
| if_sk = IsolationForest(n_estimators = 100, | |
| max_samples = 256, | |
| contamination = anomalies_ratio, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| if_eif = iso.iForest(X.values, | |
| ntrees = 100, | |
| sample_size = 256, | |
| ExtensionLevel = 0) | |
| # calculate anomaly scores | |
| anomaly_scores = if_eif.compute_paths(X_in = X.values) | |
| # sort the scores | |
| anomaly_scores_sorted = np.argsort(anomaly_scores) | |
| # retrieve indices of anomalous observations |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def pp_plot(x, dist, line=True, ax=None): | |
| ''' | |
| Function for comparing empirical data to a theoretical distribution by using a P-P plot. | |
| Params: | |
| x - empirical data | |
| dist - distribution object from scipy.stats; for example scipy.stats.norm(0, 1) | |
| line - boolean; specify if the reference line (y=x) should be drawn on the plot | |
| ax - specified ax for subplots, None is standalone | |
| ''' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, ax = plt.subplots(1, 2, figsize=(15, 8)) | |
| fig.suptitle('PP-plots', fontsize=22) | |
| sm.ProbPlot(rv_norm, scs.norm, loc=0, scale=1).ppplot(line='45', ax=ax[0]) | |
| ax[0].set_title('Statsmodels', fontsize=16) | |
| pp_plot(rv_norm, scs.norm(loc=0, scale=1), ax=ax[1]) | |
| ax[1].set_title('pp_plot', fontsize=16) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, ax = plt.subplots(1, 2, figsize=(15, 8)) | |
| sm.ProbPlot(rv_skew_norm).qqplot(line='s', ax=ax[0]); | |
| ax[0].set_title('Q-Q plot (vs. Normal)', fontsize=16) | |
| sns.distplot(rv_std_norm, kde=False, norm_hist=True, color='blue', label='Standard Normal', ax=ax[1]) | |
| sns.distplot(rv_skew_norm, kde=False, norm_hist=True, color='red', label='Skew Normal $\\alpha = 5$', ax=ax[1]) | |
| plt.title('Comparison of distributions', fontsize=16) | |
| plt.legend(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| sm.ProbPlot(rv_skew_norm, scs.skewnorm, distargs=(5, )).qqplot(line='s'); | |
| plt.title('Q-Q plot (vs. Skew Normal)', fontsize=16); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, ax = plt.subplots(1, 2, figsize=(15, 8)) | |
| pp_x = sm.ProbPlot(rv_skew_norm, fit=False) | |
| pp_y = sm.ProbPlot(rv_std_norm, fit=False) | |
| fig = pp_x.qqplot(line='s', other=pp_y, ax=ax[0]) | |
| ax[0].set_title('Q-Q plot (vs. Standard Normal)', fontsize=16) | |
| sns.distplot(rv_std_norm, kde=False, norm_hist=True, color='blue', label='Standard Normal', ax=ax[1]) | |
| sns.distplot(rv_skew_norm, kde=False, norm_hist=True, color='red', label='Skew Normal $\\alpha = 5$', ax=ax[1]) | |
| plt.title('Comparison of distributions', fontsize=16) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from sklearn.datasets import load_boston | |
| # load data | |
| boston = load_boston() | |
| X = pd.DataFrame(boston.data, columns=boston.feature_names) | |
| X.drop('CHAS', axis=1, inplace=True) | |
| y = pd.Series(boston.target, name='MEDV') | |
| # inspect data |