Last active
June 6, 2018 18:47
-
-
Save robertmaxwilliams/4bcfdfe347488a9fe57b69c8de6d99f7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier\n", | |
"from sklearn.gaussian_process import GaussianProcessRegressor\n", | |
"from sklearn.kernel_ridge import KernelRidge\n", | |
"from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression\n", | |
"from sklearn.neural_network import MLPRegressor, MLPClassifier\n", | |
"from sklearn.svm import SVC, SVR\n", | |
"from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier\n", | |
"from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier\n", | |
"from sklearn.gaussian_process import GaussianProcessRegressor\n", | |
"from sklearn.kernel_ridge import KernelRidge\n", | |
"from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression\n", | |
"from sklearn.neural_network import MLPRegressor, MLPClassifier\n", | |
"from sklearn.svm import SVC, SVR\n", | |
"from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['__abstractmethods__',\n", | |
" '__class__',\n", | |
" '__delattr__',\n", | |
" '__dict__',\n", | |
" '__dir__',\n", | |
" '__doc__',\n", | |
" '__eq__',\n", | |
" '__format__',\n", | |
" '__ge__',\n", | |
" '__getattribute__',\n", | |
" '__getstate__',\n", | |
" '__gt__',\n", | |
" '__hash__',\n", | |
" '__init__',\n", | |
" '__init_subclass__',\n", | |
" '__le__',\n", | |
" '__lt__',\n", | |
" '__module__',\n", | |
" '__ne__',\n", | |
" '__new__',\n", | |
" '__reduce__',\n", | |
" '__reduce_ex__',\n", | |
" '__repr__',\n", | |
" '__setattr__',\n", | |
" '__setstate__',\n", | |
" '__sizeof__',\n", | |
" '__str__',\n", | |
" '__subclasshook__',\n", | |
" '__weakref__',\n", | |
" '_abc_cache',\n", | |
" '_abc_negative_cache',\n", | |
" '_abc_negative_cache_version',\n", | |
" '_abc_registry',\n", | |
" '_estimator_type',\n", | |
" '_get_param_names',\n", | |
" '_validate_X_predict',\n", | |
" 'apply',\n", | |
" 'decision_path',\n", | |
" 'feature_importances_',\n", | |
" 'fit',\n", | |
" 'get_params',\n", | |
" 'predict',\n", | |
" 'predict_log_proba',\n", | |
" 'predict_proba',\n", | |
" 'score',\n", | |
" 'set_params']" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dir(DecisionTreeClassifier)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'predict_log_proba', 'predict_proba'}" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"set(dir(DecisionTreeClassifier)) - set(dir(DecisionTreeRegressor))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"set()" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"set(dir(DecisionTreeRegressor)) - set(dir(DecisionTreeClassifier))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"classy = [ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier, MLPClassifier, LogisticRegression, SVC, DecisionTreeClassifier]\n", | |
"reg = [RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, GaussianProcessRegressor, KernelRidge,\n", | |
" MLPRegressor, SVR, DecisionTreeRegressor, LinearRegression, Lasso]\n", | |
"\n", | |
"from functools import reduce\n", | |
"\n", | |
"class_things = reduce((lambda a, b: a&b), (set(dir(a())) for a in classy))\n", | |
"\n", | |
"reg_things = reduce((lambda a, b: a&b), (set(dir(a())) for a in reg))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'__class__',\n", | |
" '__delattr__',\n", | |
" '__dict__',\n", | |
" '__dir__',\n", | |
" '__doc__',\n", | |
" '__eq__',\n", | |
" '__format__',\n", | |
" '__ge__',\n", | |
" '__getattribute__',\n", | |
" '__getstate__',\n", | |
" '__gt__',\n", | |
" '__hash__',\n", | |
" '__init__',\n", | |
" '__init_subclass__',\n", | |
" '__le__',\n", | |
" '__lt__',\n", | |
" '__module__',\n", | |
" '__ne__',\n", | |
" '__new__',\n", | |
" '__reduce__',\n", | |
" '__reduce_ex__',\n", | |
" '__repr__',\n", | |
" '__setattr__',\n", | |
" '__setstate__',\n", | |
" '__sizeof__',\n", | |
" '__str__',\n", | |
" '__subclasshook__',\n", | |
" '__weakref__',\n", | |
" '_estimator_type',\n", | |
" '_get_param_names',\n", | |
" 'fit',\n", | |
" 'get_params',\n", | |
" 'predict',\n", | |
" 'predict_log_proba',\n", | |
" 'predict_proba',\n", | |
" 'random_state',\n", | |
" 'score',\n", | |
" 'set_params'}" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"class_things" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'__class__',\n", | |
" '__delattr__',\n", | |
" '__dict__',\n", | |
" '__dir__',\n", | |
" '__doc__',\n", | |
" '__eq__',\n", | |
" '__format__',\n", | |
" '__ge__',\n", | |
" '__getattribute__',\n", | |
" '__getstate__',\n", | |
" '__gt__',\n", | |
" '__hash__',\n", | |
" '__init__',\n", | |
" '__init_subclass__',\n", | |
" '__le__',\n", | |
" '__lt__',\n", | |
" '__module__',\n", | |
" '__ne__',\n", | |
" '__new__',\n", | |
" '__reduce__',\n", | |
" '__reduce_ex__',\n", | |
" '__repr__',\n", | |
" '__setattr__',\n", | |
" '__setstate__',\n", | |
" '__sizeof__',\n", | |
" '__str__',\n", | |
" '__subclasshook__',\n", | |
" '__weakref__',\n", | |
" '_estimator_type',\n", | |
" '_get_param_names',\n", | |
" 'fit',\n", | |
" 'get_params',\n", | |
" 'predict',\n", | |
" 'score',\n", | |
" 'set_params'}" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"reg_things" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'predict_log_proba', 'predict_proba', 'random_state'}" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"class_things - reg_things" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"set()" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"reg_things - class_things" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def class_count(module):\n", | |
" return module.__dict__['__doc__'].lower().count('class')\n", | |
"\n", | |
"def reg_count(module):\n", | |
" return module.__dict__['__doc__'].lower().count('regress')\n", | |
"\n", | |
"def is_classifier_doc(module):\n", | |
" \"\"\"some rought heristics based on docstring\"\"\"\n", | |
" c = class_count(module) \n", | |
" r = reg_count(module)\n", | |
" if r == 0 and c > 0:\n", | |
" return True\n", | |
" if c == 0 and r > 0:\n", | |
" return False\n", | |
" if c - r >= 3:\n", | |
" return True\n", | |
" if r - c >= 3:\n", | |
" return False\n", | |
" return 'unsure'\n", | |
"\n", | |
"def is_classifier_dir(module):\n", | |
" \"\"\"all classifier have this attribute, and no regressors do\"\"\"\n", | |
" return 'predict_proba' in dir(module)\n", | |
"\n", | |
"def percent_classifier(module):\n", | |
" \"\"\" returns a percent chance that the given sklearn model is a classifier \"\"\"\n", | |
" doc = is_classifier_doc(module) \n", | |
" dirr = is_classifier_dir(module) \n", | |
"\n", | |
" converter = {True: 1, 'unsure': 0.5, False: 0}\n", | |
" return (converter[doc] + converter[dirr])/2\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"True\tTrue\t1.0\t24\t0\tExtraTreesClassifier\n", | |
"True\tTrue\t1.0\t28\t0\tRandomForestClassifier\n", | |
"True\tTrue\t1.0\t22\t1\tAdaBoostClassifier\n", | |
"True\tTrue\t1.0\t7\t0\tMLPClassifier\n", | |
"True\tTrue\t1.0\t25\t5\tLogisticRegression\n", | |
"True\tTrue\t1.0\t26\t1\tSVC\n", | |
"True\tTrue\t1.0\t20\t2\tDecisionTreeClassifier\n", | |
"False\tFalse\t0.0\t1\t9\tRandomForestRegressor\n", | |
"False\tFalse\t0.0\t1\t5\tExtraTreesRegressor\n", | |
"False\tFalse\t0.0\t3\t11\tAdaBoostRegressor\n", | |
"False\tFalse\t0.0\t0\t2\tGaussianProcessRegressor\n", | |
"False\tFalse\t0.0\t0\t9\tKernelRidge\n", | |
"unsure\tFalse\t0.25\t2\t2\tMLPRegressor\n", | |
"False\tFalse\t0.0\t0\t4\tSVR\n", | |
"False\tFalse\t0.0\t2\t6\tDecisionTreeRegressor\n", | |
"False\tFalse\t0.0\t1\t4\tLinearRegression\n", | |
"unsure\tFalse\t0.25\t3\t4\tLasso\n" | |
] | |
} | |
], | |
"source": [ | |
"for x in classy + reg:\n", | |
" print(is_classifier_doc(x), is_classifier_dir(x), percent_classifier(x), class_count(x), reg_count(x), x.__name__, sep='\\t')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The only difference if you instantiate the models is that you get
{'predict_log_proba', 'predict_proba', 'random_state'}
as class only attributes.