Skip to content

Instantly share code, notes, and snippets.

@pjbull
Last active April 18, 2021 12:09
Show Gist options
  • Save pjbull/063a9b4e4f9cfcc4d03cba18fee63de7 to your computer and use it in GitHub Desktop.
Save pjbull/063a9b4e4f9cfcc4d03cba18fee63de7 to your computer and use it in GitHub Desktop.
Sparse Interaction Terms for scikit-learn
from sklearn.base import BaseEstimator, TransformerMixin
from scipy import sparse
from itertools import combinations
class SparseInteractions(BaseEstimator, TransformerMixin):
def __init__(self, degree=2, feature_name_separator="_"):
self.degree = degree
self.feature_name_separator = feature_name_separator
def fit(self, X, y=None):
return self
def transform(self, X):
if not sparse.isspmatrix_csc(X):
X = sparse.csc_matrix(X)
if hasattr(X, "columns"):
self.orig_col_names = X.columns
else:
self.orig_col_names = np.array([str(i) for i in range(X.shape[1])])
spi = self._create_sparse_interactions(X)
return spi
def get_feature_names(self):
return self.feature_names
def _create_sparse_interactions(self, X):
out_mat = []
self.feature_names = self.orig_col_names.tolist()
for sub_degree in range(2, self.degree + 1):
for col_ixs in combinations(range(X.shape[1]), sub_degree):
# add name for new column
name = self.feature_name_separator.join(self.orig_col_names[list(col_ixs)])
self.feature_names.append(name)
# get column multiplications value
out = X[:, col_ixs[0]]
for j in col_ixs[1:]:
out = out.multiply(X[:, j])
out_mat.append(out)
return sparse.hstack([X] + out_mat)
@AdamSpannbauer
Copy link

Thanks for the code.

The script errors due to missing import numpy as np. Other than that the class has run fine in my Pipeline.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment