-
-
Save stoensin/7bc4875435fe850e440f393b869d8640 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import StandardScaler | |
#标准化,返回值为标准化后的数据 | |
StandardScaler().fit_transform(iris.data) | |
from sklearn.preprocessing import MinMaxScaler | |
#区间缩放,返回值为缩放到[0, 1]区间的数据 | |
MinMaxScaler().fit_transform(iris.data) | |
from sklearn.preprocessing import Normalizer | |
#归一化,返回值为归一化后的数据 | |
Normalizer().fit_transform(iris.data) | |
from sklearn.preprocessing import Binarizer | |
#二值化,阈值设置为3,返回值为二值化后的数据 x>3:a x<3:b | |
Binarizer(threshold=3).fit_transform(iris.data) | |
from sklearn.preprocessing import OneHotEncoder | |
#哑编码,对IRIS数据集的目标值,返回值为哑编码后的数据 | |
OneHotEncoder().fit_transform(iris.target.reshape((-1,1))) | |
from numpy import vstack, array, nan | |
from sklearn.preprocessing import Imputer | |
#缺失值计算,返回值为计算缺失值后的数据 | |
#参数missing_value为缺失值的表示形式,默认为NaN | |
#参数strategy为缺失值填充方式,默认为mean(均值) | |
Imputer().fit_transform(vstack((array([nan, nan, nan, nan]), iris.data))) | |
from sklearn.preprocessing import PolynomialFeatures | |
#多项式转换 | |
#参数degree为度,默认值为2 | |
PolynomialFeatures().fit_transform(iris.data) | |
from numpy import log1p | |
from sklearn.preprocessing import FunctionTransformer | |
#自定义转换函数为对数函数的数据变换 | |
#第一个参数是单变元函数 | |
FunctionTransformer(log1p).fit_transform(iris.data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.decomposition import PCA | |
#主成分分析法,返回降维后的数据 | |
#参数n_components为主成分数目 | |
PCA(n_components=2).fit_transform(iris.data) | |
from sklearn.lda import LDA | |
#线性判别分析法,返回降维后的数据 | |
#参数n_components为降维后的维数 | |
LDA(n_components=2).fit_transform(iris.data, iris.target) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import VarianceThreshold | |
#方差选择法,返回值为特征选择后的数据 | |
#参数threshold为方差的阈值 | |
VarianceThreshold(threshold=3).fit_transform(iris.data) | |
from sklearn.feature_selection import SelectKBest | |
from scipy.stats import pearsonr | |
#选择K个最好的特征,返回选择特征后的数据 | |
#第一个参数为计算评估特征是否好的函数,该函数输入特征矩阵和目标向量,输出二元组(评分,P值)的数组,数组第i项为第i个特征的评分和P值。在此定义为计算相关系数 | |
#参数k为选择的特征个数 | |
SelectKBest(lambda X, Y: array(map(lambda x:pearsonr(x, Y), X.T)).T, k=2).fit_transform(iris.data, iris.target) | |
from sklearn.feature_selection import SelectKBest | |
from sklearn.feature_selection import chi2 | |
#选择K个最好的特征,返回选择特征后的数据 | |
SelectKBest(chi2, k=2).fit_transform(iris.data, iris.target) | |
from sklearn.feature_selection import SelectKBest | |
from minepy import MINE | |
#由于MINE的设计不是函数式的,定义mic方法将其为函数式的,返回一个二元组,二元组的第2项设置成固定的P值0.5 | |
def mic(x, y): | |
m = MINE() | |
m.compute_score(x, y) | |
return (m.mic(), 0.5) | |
#选择K个最好的特征,返回特征选择后的数据 | |
SelectKBest(lambda X, Y: array(map(lambda x:mic(x, Y), X.T)).T, k=2).fit_transform(iris.data, iris.target) | |
from sklearn.feature_selection import RFE | |
from sklearn.linear_model import LogisticRegression | |
#递归特征消除法,返回特征选择后的数据 | |
#参数estimator为基模型 递归消除特征法使用一个基模型来进行多轮训练,每轮训练后,消除若干权值系数的特征,再基于新的特征集进行下一轮训练。 | |
#参数n_features_to_select为选择的特征个数 | |
RFE(estimator=LogisticRegression(), n_features_to_select=2).fit_transform(iris.data, iris.target) | |
from sklearn.feature_selection import SelectFromModel | |
from sklearn.linear_model import LogisticRegression | |
#带L1惩罚项的逻辑回归作为基模型的特征选择 | |
SelectFromModel(LogisticRegression(penalty="l1", C=0.1)).fit_transform(iris.data, iris.target) | |
# 使用带惩罚项的基模型,除了筛选出特征外,同时也进行了降维。使用feature_selection库的SelectFromModel类结合带L1惩罚项的逻辑回归模型 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment