Last active
December 20, 2015 03:29
-
-
Save kemaleren/6063691 to your computer and use it in GitHub Desktop.
line profiler dumps for SpectralBiclustering and SpectralCoclustering on both dense and sparse matrices.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import fetch_20newsgroups_vectorized | |
from sklearn.datasets import make_checkerboard | |
from sklearn.datasets import make_biclusters | |
from sklearn.cluster.bicluster import SpectralBiclustering | |
from sklearn.cluster.bicluster import SpectralCoclustering | |
newsgroups = fetch_20newsgroups_vectorized() | |
# Spectral Co-Clustering | |
a, rows, columns = make_biclusters((40000, 300), 10, noise=2) | |
model = SpectralCoclustering(10, mini_batch=False) | |
lprun -f model._fit model.fit(a) | |
model = SpectralCoclustering(20, mini_batch=False) | |
lprun -f model._fit model.fit(newsgroups.data) | |
model = SpectralCoclustering(20, mini_batch=True) | |
lprun -f model._fit model.fit(newsgroups.data) | |
# Spectral Biclustering | |
a, rows, columns = make_checkerboard((40000, 300), 10, noise=2) | |
model = SpectralBiclustering(10) | |
lprun -f model._fit model.fit(a) | |
model = SpectralBiclustering(20, mini_batch=False) | |
lprun -f model._fit model.fit(newsgroups.data) | |
model = SpectralBiclustering(20, mini_batch=True) | |
lprun -f model._fit model.fit(newsgroups.data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
* SpectralCoclustering dense test: 40000 x 300 matrix | |
File: sklearn/cluster/bicluster/spectral.py | |
Function: _fit at line 271 | |
Total time: 2.62557 s | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
271 def _fit(self, X): | |
272 1 124484 124484.0 4.7 normalized_data, row_diag, col_diag = _scale_normalize(X) | |
273 1 48 48.0 0.0 n_sv = 1 + int(np.ceil(np.log2(self.n_clusters))) | |
274 1 1737672 1737672.0 66.2 u, v = self._svd(normalized_data, n_sv, n_discard=1) | |
275 1 830 830.0 0.0 z = np.vstack((row_diag[:, np.newaxis] * u, | |
276 1 243 243.0 0.0 col_diag[:, np.newaxis] * v)) | |
277 | |
278 1 761520 761520.0 29.0 _, labels = self._k_means(z, self.n_clusters) | |
279 | |
280 1 4 4.0 0.0 n_rows = X.shape[0] | |
281 1 4 4.0 0.0 self.row_labels_ = labels[:n_rows] | |
282 1 2 2.0 0.0 self.column_labels_ = labels[n_rows:] | |
283 | |
284 1 3 3.0 0.0 self.rows_ = np.vstack(self.row_labels_ == c | |
285 1 615 615.0 0.0 for c in range(self.n_clusters)) | |
286 1 2 2.0 0.0 self.columns_ = np.vstack(self.column_labels_ == c | |
287 1 143 143.0 0.0 for c in range(self.n_clusters)) | |
* SpectralCoclustering sparse test: 20newsgroups_vectorized, with mini_batch=False | |
File: sklearn/cluster/bicluster/spectral.py | |
Function: _fit at line 271 | |
Total time: 32.9558 s | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
271 def _fit(self, X): | |
272 1 138058 138058.0 0.4 normalized_data, row_diag, col_diag = _scale_normalize(X) | |
273 1 17 17.0 0.0 n_sv = 1 + int(np.ceil(np.log2(self.n_clusters))) | |
274 1 380753 380753.0 1.2 u, v = self._svd(normalized_data, n_sv, n_discard=1) | |
275 1 259 259.0 0.0 z = np.vstack((row_diag[:, np.newaxis] * u, | |
276 1 3901 3901.0 0.0 col_diag[:, np.newaxis] * v)) | |
277 | |
278 1 32428991 32428991.0 98.4 _, labels = self._k_means(z, self.n_clusters) | |
279 | |
280 1 6 6.0 0.0 n_rows = X.shape[0] | |
281 1 4 4.0 0.0 self.row_labels_ = labels[:n_rows] | |
282 1 3 3.0 0.0 self.column_labels_ = labels[n_rows:] | |
283 | |
284 1 3 3.0 0.0 self.rows_ = np.vstack(self.row_labels_ == c | |
285 1 590 590.0 0.0 for c in range(self.n_clusters)) | |
286 1 2 2.0 0.0 self.columns_ = np.vstack(self.column_labels_ == c | |
287 1 3263 3263.0 0.0 for c in range(self.n_clusters)) | |
* SpectralCoclustering sparse test: 20newsgroups_vectorized, with mini_batch=True | |
File: sklearn/cluster/bicluster/spectral.py | |
Function: _fit at line 271 | |
Total time: 1.92757 s | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
271 def _fit(self, X): | |
272 1 140340 140340.0 7.3 normalized_data, row_diag, col_diag = _scale_normalize(X) | |
273 1 17 17.0 0.0 n_sv = 1 + int(np.ceil(np.log2(self.n_clusters))) | |
274 1 401109 401109.0 20.8 u, v = self._svd(normalized_data, n_sv, n_discard=1) | |
275 1 262 262.0 0.0 z = np.vstack((row_diag[:, np.newaxis] * u, | |
276 1 3825 3825.0 0.2 col_diag[:, np.newaxis] * v)) | |
277 | |
278 1 1377933 1377933.0 71.5 _, labels = self._k_means(z, self.n_clusters) | |
279 | |
280 1 9 9.0 0.0 n_rows = X.shape[0] | |
281 1 5 5.0 0.0 self.row_labels_ = labels[:n_rows] | |
282 1 2 2.0 0.0 self.column_labels_ = labels[n_rows:] | |
283 | |
284 1 4 4.0 0.0 self.rows_ = np.vstack(self.row_labels_ == c | |
285 1 562 562.0 0.0 for c in range(self.n_clusters)) | |
286 1 2 2.0 0.0 self.columns_ = np.vstack(self.column_labels_ == c | |
287 1 3501 3501.0 0.2 for c in range(self.n_clusters)) | |
* SpectralBiclustering dense test: 40000 x 300 checkerboard matrix | |
File: sklearn/cluster/bicluster/spectral.py | |
Function: _fit at line 411 | |
Total time: 9.88391 s | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
411 def _fit(self, X): | |
412 1 4 4.0 0.0 n_sv = self.n_components | |
413 1 3 3.0 0.0 if self.method == 'bistochastic': | |
414 1 3718143 3718143.0 37.6 normalized_data = _bistochastic_normalize(X) | |
415 1 3 3.0 0.0 n_sv += 1 | |
416 elif self.method == 'scale': | |
417 normalized_data, _, _ = _scale_normalize(X) | |
418 n_sv += 1 | |
419 elif self.method == 'log': | |
420 normalized_data = _log_normalize(X) | |
421 1 3 3.0 0.0 n_discard = 0 if self.method == 'log' else 1 | |
422 1 2105260 2105260.0 21.3 u, v = self._svd(normalized_data, n_sv, n_discard) | |
423 1 3 3.0 0.0 ut = u.T | |
424 1 2 2.0 0.0 vt = v.T | |
425 | |
426 1 2 2.0 0.0 try: | |
427 1 12 12.0 0.0 n_row_clusters, n_col_clusters = self.n_clusters | |
428 1 3 3.0 0.0 except TypeError: | |
429 1 2 2.0 0.0 n_row_clusters = n_col_clusters = self.n_clusters | |
430 | |
431 1 2 2.0 0.0 best_ut = self._fit_best_piecewise(ut, self.n_best, | |
432 1 3033660 3033660.0 30.7 n_row_clusters) | |
433 | |
434 1 5 5.0 0.0 best_vt = self._fit_best_piecewise(vt, self.n_best, | |
435 1 296716 296716.0 3.0 n_col_clusters) | |
436 | |
437 1 3 3.0 0.0 self.row_labels_ = self._project_and_cluster(X, best_vt.T, | |
438 1 650654 650654.0 6.6 n_row_clusters) | |
439 | |
440 1 9 9.0 0.0 self.column_labels_ = self._project_and_cluster(X.T, best_ut.T, | |
441 1 71899 71899.0 0.7 n_col_clusters) | |
* SpectralBiclustering sparse test: 20newsgroups_vectorized, with mini_batch=False | |
File: sklearn/cluster/bicluster/spectral.py | |
Function: _fit at line 411 | |
Total time: 130.362 s | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
411 def _fit(self, X): | |
412 1 5 5.0 0.0 n_sv = self.n_components | |
413 1 3 3.0 0.0 if self.method == 'bistochastic': | |
414 1 168287 168287.0 0.1 normalized_data = _bistochastic_normalize(X) | |
415 1 3 3.0 0.0 n_sv += 1 | |
416 elif self.method == 'scale': | |
417 normalized_data, _, _ = _scale_normalize(X) | |
418 n_sv += 1 | |
419 elif self.method == 'log': | |
420 normalized_data = _log_normalize(X) | |
421 1 3 3.0 0.0 n_discard = 0 if self.method == 'log' else 1 | |
422 1 421802 421802.0 0.3 u, v = self._svd(normalized_data, n_sv, n_discard) | |
423 1 3 3.0 0.0 ut = u.T | |
424 1 2 2.0 0.0 vt = v.T | |
425 | |
426 1 2 2.0 0.0 try: | |
427 1 13 13.0 0.0 n_row_clusters, n_col_clusters = self.n_clusters | |
428 1 5 5.0 0.0 except TypeError: | |
429 1 1 1.0 0.0 n_row_clusters = n_col_clusters = self.n_clusters | |
430 | |
431 1 2 2.0 0.0 best_ut = self._fit_best_piecewise(ut, self.n_best, | |
432 1 5315974 5315974.0 4.1 n_row_clusters) | |
433 | |
434 1 4 4.0 0.0 best_vt = self._fit_best_piecewise(vt, self.n_best, | |
435 1 104340257 104340257.0 80.0 n_col_clusters) | |
436 | |
437 1 10 10.0 0.0 self.row_labels_ = self._project_and_cluster(X, best_vt.T, | |
438 1 3851173 3851173.0 3.0 n_row_clusters) | |
439 | |
440 1 149 149.0 0.0 self.column_labels_ = self._project_and_cluster(X.T, best_ut.T, | |
441 1 16182799 16182799.0 12.4 n_col_clusters) | |
* SpectralBiclustering sparse test: 20newsgroups_vectorized, with mini_batch=True | |
File: sklearn/cluster/bicluster/spectral.py | |
Function: _fit at line 411 | |
Total time: 3.26277 s | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
411 def _fit(self, X): | |
412 1 3 3.0 0.0 n_sv = self.n_components | |
413 1 2 2.0 0.0 if self.method == 'bistochastic': | |
414 1 151776 151776.0 4.7 normalized_data = _bistochastic_normalize(X) | |
415 1 3 3.0 0.0 n_sv += 1 | |
416 elif self.method == 'scale': | |
417 normalized_data, _, _ = _scale_normalize(X) | |
418 n_sv += 1 | |
419 elif self.method == 'log': | |
420 normalized_data = _log_normalize(X) | |
421 1 3 3.0 0.0 n_discard = 0 if self.method == 'log' else 1 | |
422 1 402177 402177.0 12.3 u, v = self._svd(normalized_data, n_sv, n_discard) | |
423 1 3 3.0 0.0 ut = u.T | |
424 1 2 2.0 0.0 vt = v.T | |
425 | |
426 1 1 1.0 0.0 try: | |
427 1 13 13.0 0.0 n_row_clusters, n_col_clusters = self.n_clusters | |
428 1 4 4.0 0.0 except TypeError: | |
429 1 1 1.0 0.0 n_row_clusters = n_col_clusters = self.n_clusters | |
430 | |
431 1 3 3.0 0.0 best_ut = self._fit_best_piecewise(ut, self.n_best, | |
432 1 760194 760194.0 23.3 n_row_clusters) | |
433 | |
434 1 4 4.0 0.0 best_vt = self._fit_best_piecewise(vt, self.n_best, | |
435 1 1570605 1570605.0 48.1 n_col_clusters) | |
436 | |
437 1 10 10.0 0.0 self.row_labels_ = self._project_and_cluster(X, best_vt.T, | |
438 1 120669 120669.0 3.7 n_row_clusters) | |
439 | |
440 1 159 159.0 0.0 self.column_labels_ = self._project_and_cluster(X.T, best_ut.T, | |
441 1 174007 174007.0 5.3 n_col_clusters) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment