fabianp · September 9, 2014 13:41
diff --git a/gistfile1.txt b/gistfile1.txt

 =====================
 Lasso and Elastic Net
 =====================

 Lasso and elastic net (L1 and L2 penalisation) implemented using a
 coordinate descent.

 The coefficients can be forced to be positive.

 Computing regularization path using the lasso...
 Computing regularization path using the positive lasso...
 Computing regularization path using the elastic net...
 Computing regularization path using the positve elastic net...
 Wrote profile results to plot_lasso_coordinate_descent_path.py.lprof
 Timer unit: 1e-06 s

 Total time: 0.016946 s
 File: /home/fp985994/dev/scikit-learn/sklearn/linear_model/coordinate_descent.py
 Function: compute_strong_active_set at line 38

 Line #      Hits         Time  Per Hit   % Time  Line Contents
 ==============================================================
    38                                           @profile
    39                                           def compute_strong_active_set(X, y, current_alpha, prev_alpha, l1_ratio, coef,
    40                                                                         residual):
    41                                               """
    42                                               Compute the features that are active along a regularization path.
    43                                           
    44                                               An active feature satisfies the following criteria.
    45                                               |X_{j} * (y - X*w)| < 2 * current_alpha - prev_alpha
    46                                           
    47                                               Parameters
    48                                               ----------
    49                                               X : {ndarray, sparse matrix}, shape (n_samples, n_features)
    50                                                   Training data.
    51                                           
    52                                               y : ndarray, shape (n_samples,) or (n_samples, n_outputs)
    53                                                   Target values.
    54                                           
    55                                               current_alpha : float
    56                                                   Current regularization parameter.
    57                                           
    58                                               prev_alpha : float
    59                                                   Previous regularization parameter while fitting along the grid of
    60                                                   alphas.
    61                                           
    62                                               l1_ratio : float
    63                                                   ElasticNet mixing parameter. A l1_ratio of 1.0 implies a l1 penalty
    64                                                   while that of 0.0 indicates a l2 penalty.
    65                                           
    66                                               coef : ndarray, shape (n_features)
    67                                                   Initial feature vector along the regularization path.
    68                                           
    69                                               Returns
    70                                               -------
    71                                               active_features : ndarray
    72                                                   Array of active features.
    73                                               """
    74       400          657      1.6      3.9      prev_alpha, current_alpha = prev_alpha * l1_ratio, current_alpha * l1_ratio
    75                                           
    76                                               # If the right hand side is negative, there is no point doing
    77                                               # the remaining computation as the left hand side is always positive.
    78       400         1490      3.7      8.8      if 2*current_alpha - prev_alpha < 0:
    79                                                   return np.arange(X.shape[1])
    80                                           
    81       400          486      1.2      2.9      multi_output = (y.ndim >= 2)
    82                                           
    83       400          267      0.7      1.6      if multi_output:
    84                                                   # coef is of shape n_outputs * n_features.
    85                                                   # We need n_features * n_outputs.
    86                                                   coef = coef.T
    87                                           
    88       400         5448     13.6     32.1      active_rule = safe_sparse_dot(X.T, residual)
    89       400         1724      4.3     10.2      active_rule = np.abs(active_rule, active_rule)
    90       400          307      0.8      1.8      if multi_output:
    91                                                   active_rule = row_norms(active_rule)
    92                                           
    93       400         3664      9.2     21.6      active_features = (active_rule >= 2*current_alpha - prev_alpha)
    94       400         2302      5.8     13.6      active_features = np.nonzero(active_features)[0]
    95       400          352      0.9      2.1      del active_rule
    96       400          249      0.6      1.5      return active_features

 Total time: 0.114876 s
 File: /home/fp985994/dev/scikit-learn/sklearn/linear_model/coordinate_descent.py
 Function: check_kkt_conditions at line 98

 Line #      Hits         Time  Per Hit   % Time  Line Contents
 ==============================================================
    98                                           @profile
    99                                           def check_kkt_conditions(X, y, coef, strong_active_set, alpha, l1_ratio,
   100                                                                    residual, active_set=None,
   101                                                                    return_active_set=True, rtol=1e-3):
   102                                               """
   103                                               Checks if the KKT conditions are satisfied for ElasticNet and Lasso.
   104                                           
   105                                               These necessary conditions are used to check if a given feature vector
   106                                               minimizes the loss function.
   107                                               If return_active_set is set to True, then it returns the set of
   108                                               features for which the KKT conditions are violated (active_features).
   109                                               Else it returns False when the conditions are violated for the first
   110                                               feature.
   111                                           
   112                                               Parameters
   113                                               ----------
   114                                               X : {ndarray, sparse matrix}, shape (n_samples, n_features)
   115                                                   Training data.
   116                                           
   117                                               y : ndarray, shape (n_samples,) or (n_samples, n_outputs)
   118                                                   Target values.
   119                                           
   120                                               coef : ndarray, shape (n_features,)
   121                                                   Feature vector.
   122                                           
   123                                               strong_active_set : array-like
   124                                                   The features indices for which the KKT conditions are to be verified.
   125                                           
   126                                               alpha : float
   127                                                   Regularization parameter.
   128                                           
   129                                               l1_ratio : float
   130                                                   ElasticNet mixing parameter. A l1_ratio of 1.0 implies a l1 penalty
   131                                                   while that of 0.0 indicates a l2 penalty.
   132                                           
   133                                               active_set : array-like, optional
   134                                                   If supplied, the features for which the KKT conditions are violated
   135                                                   are appended to this.
   136                                           
   137                                               return_active_set : bool
   138                                                   Whether or not to return the active_set.
   139                                           
   140                                               Returns
   141                                               -------
   142                                               kkt_violations : bool
   143                                                   Whether or not the KKT violations are violated across the
   144                                                   strong_active_set.
   145                                           
   146                                               active_set : ndarray
   147                                                   Active features across the strong_active_set. Returned only if
   148                                                   return_active_set is set to True.
   149                                               """
   150       718          911      1.3      0.8      kkt_violations = False
   151                                           
   152       718          915      1.3      0.8      if active_set is None:
   153                                                   active_set = []
   154       718         1638      2.3      1.4      if not isinstance(active_set, list):
   155       459         1024      2.2      0.9          active_set = active_set.tolist()
   156                                           
   157       718         1070      1.5      0.9      multi_output = (y.ndim >= 2)
   158       718          795      1.1      0.7      if multi_output:
   159                                                   coef = coef.T
   160                                           
   161       718         2145      3.0      1.9      X_sparse = sparse.isspmatrix(X)
   162       718          813      1.1      0.7      if X_sparse:
   163                                                   X_indices = X.indices
   164                                                   X_data = X.data
   165                                                   X_indptr = X.indptr
   166                                           
   167      3954         6339      1.6      5.5      for i in strong_active_set:
   168      3236         3606      1.1      3.1          if X_sparse:
   169                                                       startptr = X_indptr[i]
   170                                                       endptr = X_indptr[i + 1]
   171                                                       dot_data = X_data[startptr: endptr]
   172                                                       residual_data = residual.take(X_indices[startptr: endptr],
   173                                                                                     mode='clip')
   174                                                       loss_derivative = np.dot(dot_data, residual_data)
   175                                                   else:
   176      3236        14044      4.3     12.2              feature = X[:, i]
   177      3236        13740      4.2     12.0              loss_derivative = np.dot(feature, residual)
   178      3236         5011      1.5      4.4          active_coef = coef[i]
   179                                           
   180                                                   # XXX: I'm not sure about the multi_output case.
   181      3236         3719      1.1      3.2          if multi_output:
   182                                                       loss_derivative = norm(loss_derivative)
   183                                                       mean_coef = np.mean(active_coef)
   184                                                       l1_penalty = alpha * l1_ratio
   185                                                       if mean_coef < 0:
   186                                                           l1_penalty = -l1_penalty
   187                                                       active_coef = norm(active_coef)
   188                                                       l2_penalty = alpha * (1 - l1_ratio) * active_coef
   189                                                   else:
   190      3236         5452      1.7      4.7              l1_penalty = alpha * l1_ratio
   191      3236         9169      2.8      8.0              if active_coef < 0:
   192       416          618      1.5      0.5                  l1_penalty = -l1_penalty
   193      3236         7357      2.3      6.4              l2_penalty = alpha * (1 - l1_ratio) * active_coef
   194                                           
   195      3236         5163      1.6      4.5          penalty = l1_penalty + l2_penalty
   196      3236         7399      2.3      6.4          tol = 1e-3 + rtol * abs(penalty)
   197                                           
   198      3236         8742      2.7      7.6          if active_coef != 0 and (abs(loss_derivative - penalty) > tol):
   199      1328         2257      1.7      2.0              active_set.append(i)
   200      1328         1585      1.2      1.4              kkt_violations = True
   201      1328         1618      1.2      1.4              if not return_active_set:
   202                                                           break
   203      1908         5612      2.9      4.9          elif active_coef == 0 and abs(loss_derivative) > alpha*l1_ratio:
   204       588         1012      1.7      0.9              active_set.append(i)
   205       588          717      1.2      0.6              kkt_violations = True
   206       588          703      1.2      0.6              if not return_active_set:
   207                                                           break
   208                                           
   209       718          860      1.2      0.7      if return_active_set:
   210       718          842      1.2      0.7          return active_set, kkt_violations
   211                                               else:
   212                                                   return kkt_violations

 Total time: 0.600602 s
 File: /home/fp985994/dev/scikit-learn/sklearn/linear_model/coordinate_descent.py
 Function: enet_path at line 487

 Line #      Hits         Time  Per Hit   % Time  Line Contents
 ==============================================================
   487                                           @profile
   488                                           def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
   489                                                         precompute='auto', Xy=None, fit_intercept=True,
   490                                                         normalize=False, copy_X=True, coef_init=None,
   491                                                         verbose=False, return_models=False, return_n_iter=False,
   492                                                         **params):
   493                                               """Compute elastic net path with coordinate descent
   494                                           
   495                                               The elastic net optimization function varies for mono and multi-outputs.
   496                                           
   497                                               For mono-output tasks it is::
   498                                           
   499                                                   1 / (2 * n_samples) * ||y - Xw||^2_2 +
   500                                                   + alpha * l1_ratio * ||w||_1
   501                                                   + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2
   502                                           
   503                                               For multi-output tasks it is::
   504                                           
   505                                                   (1 / (2 * n_samples)) * ||Y - XW||^Fro_2
   506                                                   + alpha * l1_ratio * ||W||_21
   507                                                   + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2
   508                                           
   509                                               Where::
   510                                           
   511                                                   ||W||_21 = \sum_i \sqrt{\sum_j w_{ij}^2}
   512                                           
   513                                               i.e. the sum of norm of each row.
   514                                           
   515                                               Parameters
   516                                               ----------
   517                                               X : {array-like}, shape (n_samples, n_features)
   518                                                   Training data. Pass directly as Fortran-contiguous data to avoid
   519                                                   unnecessary memory duplication. If ``y`` is mono-output then ``X``
   520                                                   can be sparse.
   521                                           
   522                                               y : ndarray, shape = (n_samples,) or (n_samples, n_outputs)
   523                                                   Target values
   524                                           
   525                                               l1_ratio : float, optional
   526                                                   float between 0 and 1 passed to elastic net (scaling between
   527                                                   l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso
   528                                           
   529                                               eps : float
   530                                                   Length of the path. ``eps=1e-3`` means that
   531                                                   ``alpha_min / alpha_max = 1e-3``
   532                                           
   533                                               n_alphas : int, optional
   534                                                   Number of alphas along the regularization path
   535                                           
   536                                               alphas : ndarray, optional
   537                                                   List of alphas where to compute the models.
   538                                                   If None alphas are set automatically
   539                                           
   540                                               precompute : True | False | 'auto' | array-like
   541                                                   Whether to use a precomputed Gram matrix to speed up
   542                                                   calculations. If set to ``'auto'`` let us decide. The Gram
   543                                                   matrix can also be passed as argument.
   544                                           
   545                                               Xy : array-like, optional
   546                                                   Xy = np.dot(X.T, y) that can be precomputed. It is useful
   547                                                   only when the Gram matrix is precomputed.
   548                                           
   549                                               fit_intercept : bool
   550                                                   Fit or not an intercept.
   551                                                   WARNING : deprecated, will be removed in 0.16.
   552                                           
   553                                               normalize : boolean, optional, default False
   554                                                   If ``True``, the regressors X will be normalized before regression.
   555                                                   WARNING : deprecated, will be removed in 0.16.
   556                                           
   557                                               copy_X : boolean, optional, default True
   558                                                   If ``True``, X will be copied; else, it may be overwritten.
   559                                           
   560                                               coef_init : array, shape (n_features, ) | None
   561                                                   The initial values of the coefficients.
   562                                           
   563                                               verbose : bool or integer
   564                                                   Amount of verbosity.
   565                                           
   566                                               return_models : boolean, optional, default False
   567                                                   If ``True``, the function will return list of models. Setting it
   568                                                   to ``False`` will change the function output returning the values
   569                                                   of the alphas and the coefficients along the path. Returning the
   570                                                   model list will be removed in version 0.16.
   571                                           
   572                                               params : kwargs
   573                                                   keyword arguments passed to the coordinate descent solver.
   574                                           
   575                                               return_n_iter : bool
   576                                                   whether to return the number of iterations or not.
   577                                           
   578                                               Returns
   579                                               -------
   580                                               models : a list of models along the regularization path
   581                                                   (Is returned if ``return_models`` is set ``True`` (default).
   582                                           
   583                                               alphas : array, shape (n_alphas,)
   584                                                   The alphas along the path where models are computed.
   585                                                   (Is returned, along with ``coefs``, when ``return_models`` is set
   586                                                   to ``False``)
   587                                           
   588                                               coefs : array, shape (n_features, n_alphas) or
   589                                                       (n_outputs, n_features, n_alphas)
   590                                                   Coefficients along the path.
   591                                                   (Is returned, along with ``alphas``, when ``return_models`` is set
   592                                                   to ``False``).
   593                                           
   594                                               dual_gaps : array, shape (n_alphas,)
   595                                                   The dual gaps at the end of the optimization for each alpha.
   596                                                   (Is returned, along with ``alphas``, when ``return_models`` is set
   597                                                   to ``False``).
   598                                           
   599                                               n_iters : array-like, shape (n_alphas,)
   600                                                   The number of iterations taken by the coordinate descent optimizer to
   601                                                   reach the specified tolerance for each alpha.
   602                                                   (Is returned, along with ``alphas``, when ``return_models`` is set
   603                                                   to ``False`` and ``return_n_iter`` is set to True).
   604                                           
   605                                               Notes
   606                                               -----
   607                                               See examples/plot_lasso_coordinate_descent_path.py for an example.
   608                                           
   609                                               Deprecation Notice: Setting ``return_models`` to ``False`` will make
   610                                               the Lasso Path return an output in the style used by :func:`lars_path`.
   611                                               This will be become the norm as of version 0.15. Leaving ``return_models``
   612                                               set to `True` will let the function return a list of models as before.
   613                                           
   614                                               See also
   615                                               --------
   616                                               MultiTaskElasticNet
   617                                               MultiTaskElasticNetCV
   618                                               ElasticNet
   619                                               ElasticNetCV
   620                                               """
   621         4           12      3.0      0.0      if return_models:
   622                                                   warnings.warn("Use enet_path(return_models=False), as it returns the"
   623                                                                 " coefficients and alphas instead of just a list of"
   624                                                                 " models as previously `lasso_path`/`enet_path` did."
   625                                                                 " `return_models` will eventually be removed in 0.16,"
   626                                                                 " after which, returning alphas and coefs"
   627                                                                 " will become the norm.",
   628                                                                 DeprecationWarning, stacklevel=2)
   629                                           
   630         4           12      3.0      0.0      if normalize is True:
   631                                                   warnings.warn("normalize param will be removed in 0.16."
   632                                                                 " Intercept fitting and feature normalization will be"
   633                                                                 " done in estimators.",
   634                                                                 DeprecationWarning, stacklevel=2)
   635                                               else:
   636         4            9      2.2      0.0          normalize = False
   637                                           
   638         4           11      2.8      0.0      if fit_intercept is True or fit_intercept is None:
   639                                                   warnings.warn("fit_intercept param will be removed in 0.16."
   640                                                                 " Intercept fitting and feature normalization will be"
   641                                                                 " done in estimators.",
   642                                                                 DeprecationWarning, stacklevel=2)
   643                                           
   644         4           11      2.8      0.0      if fit_intercept is None:
   645                                                   fit_intercept = True
   646                                           
   647         4           12      3.0      0.0      X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X and
   648         4          338     84.5      0.1                      fit_intercept)
   649         4           16      4.0      0.0      n_samples, n_features = X.shape
   650                                           
   651         4           11      2.8      0.0      multi_output = False
   652         4           12      3.0      0.0      if y.ndim != 1:
   653                                                   multi_output = True
   654                                                   _, n_outputs = y.shape
   655                                           
   656                                               # MultiTaskElasticNet does not support sparse matrices
   657         4            9      2.2      0.0      X_sparse_scaling = None
   658         4           22      5.5      0.0      if not multi_output and sparse.isspmatrix(X):
   659                                                   if 'X_mean' in params:
   660                                                       # As sparse matrices are not actually centered we need this
   661                                                       # to be passed to the CD solver.
   662                                                       X_sparse_scaling = params['X_mean'] / params['X_std']
   663                                                   else:
   664                                                       X_sparse_scaling = np.ones(n_features)
   665                                           
   666                                               X, y, X_mean, y_mean, X_std, precompute, Xy = \
   667         4          445    111.2      0.1          _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy=False)
   668         4           15      3.8      0.0      if alphas is None:
   669                                                   # No need to normalize of fit_intercept: it has been done
   670                                                   # above
   671         4           12      3.0      0.0          alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio,
   672         4            8      2.0      0.0                               fit_intercept=False, eps=eps, n_alphas=n_alphas,
   673         4          511    127.8      0.1                               normalize=False, copy_X=False)
   674                                               else:
   675                                                   alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered
   676                                           
   677         4           36      9.0      0.0      feature_array = np.arange(X.shape[1], dtype=np.int32)
   678         4           12      3.0      0.0      n_alphas = len(alphas)
   679         4           13      3.2      0.0      tol = params.get('tol', 1e-4)
   680         4           11      2.8      0.0      positive = params.get('positive', False)
   681         4           11      2.8      0.0      max_iter = params.get('max_iter', 1000)
   682         4           20      5.0      0.0      dual_gaps = np.empty(n_alphas)
   683         4           10      2.5      0.0      n_iters = []
   684                                           
   685         4           24      6.0      0.0      rng = check_random_state(params.get('random_state', None))
   686         4           12      3.0      0.0      selection = params.get('selection', 'cyclic')
   687         4           11      2.8      0.0      if selection not in ['random', 'cyclic']:
   688                                                   raise ValueError("selection should be either random or cyclic.")
   689         4           11      2.8      0.0      random = (selection == 'random')
   690         4           11      2.8      0.0      models = []
   691                                           
   692         4           10      2.5      0.0      if not multi_output:
   693         4           22      5.5      0.0          coefs = np.empty((n_features, n_alphas), dtype=np.float64)
   694                                               else:
   695                                                   coefs = np.empty((n_outputs, n_features, n_alphas),
   696                                                                    dtype=np.float64)
   697                                           
   698         4           10      2.5      0.0      if coef_init is None:
   699         4           37      9.2      0.0          coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1]))
   700                                               else:
   701                                                   coef_ = np.asfortranarray(coef_init)
   702                                           
   703                                               # Compute the non zero coefficient for the first alpha, which we assume
   704                                               # is the active set, that is checked on further across other alpha.
   705         4           29      7.2      0.0      l1_reg = alphas[0] * l1_ratio * n_samples
   706         4           22      5.5      0.0      l2_reg = alphas[0] * (1.0 - l1_ratio) * n_samples
   707         4           25      6.2      0.0      active_set = np.arange(n_features, dtype=np.int32)
   708         4           10      2.5      0.0      model = _cd_fast_path(
   709         4            9      2.2      0.0          X, y, coef_, l1_reg, l2_reg, active_set, max_iter, tol,
   710         4          255     63.8      0.0          rng, random, positive, precompute, Xy, X_sparse_scaling
   711                                                   )
   712         4           12      3.0      0.0      coef_, dual_gap_, eps_, n_iter_ = model
   713         4           14      3.5      0.0      n_iters.append(n_iter_)
   714         4           13      3.2      0.0      dual_gaps[0] = dual_gap_
   715         4           35      8.8      0.0      coefs[..., 0] = coef_
   716                                           
   717         4           11      2.8      0.0      if multi_output:
   718                                                   active_set = np.nonzero(np.abs(coef_.T) > 1e-12)[0]
   719                                                   active_set = np.nonzero(np.bincount(active_set) == n_outputs)[0]
   720                                               else:
   721         4           81     20.2      0.0          active_set = np.nonzero(np.abs(coef_) > 1e-12)[0]
   722                                           
   723       400         1309      3.3      0.2      for i, alpha in enumerate(alphas[1:]):
   724                                           
   725       396         1070      2.7      0.2          kkt_violations = True
   726       396         1267      3.2      0.2          residual = None
   727       396         2204      5.6      0.4          l1_reg = alpha * l1_ratio * n_samples
   728       396         1930      4.9      0.3          l2_reg = alpha * (1.0 - l1_ratio) * n_samples
   729                                           
   730       796         2384      3.0      0.4          while kkt_violations:
   731       400         4083     10.2      0.7              eligible_features = np.copy(active_set)
   732       400         1168      2.9      0.2              if residual is None:
   733       396         8957     22.6      1.5                  residual = compute_residual(X, y, coef_)
   734       400         1235      3.1      0.2              strong_active_set = compute_strong_active_set(
   735       400         3028      7.6      0.5                  X, y, n_samples*alpha, n_samples*alphas[i-1], l1_ratio,
   736       400        24443     61.1      4.1                  coef_, residual,
   737                                                           )
   738      1169         3437      2.9      0.6              while kkt_violations:
   739       769         2682      3.5      0.4                  if isinstance(eligible_features, list):
   740       369         1040      2.8      0.2                      eligible_features = np.asarray(eligible_features,
   741       369         4200     11.4      0.7                                                     dtype=np.int32)
   742       769         2133      2.8      0.4                  model = _cd_fast_path(
   743       769         2081      2.7      0.3                      X, y, coef_, l1_reg, l2_reg, eligible_features,
   744       769         2018      2.6      0.3                      max_iter, tol, rng, random, positive, precompute,
   745       769       136395    177.4     22.7                      Xy, X_sparse_scaling
   746                                                               )
   747       769        21483     27.9      3.6                  residual = compute_residual(X, y, coef_)
   748       769         2477      3.2      0.4                  coef_, dual_gap_, eps_, n_iter_ = model
   749                                           
   750                                                           # Check KKT conditions for features present in strong_active_set.
   751                                                           # and not in eligible predictors. If KKT conditions are violated add them
   752                                                           # to the eligible features, and optimize again.
   753       769         2102      2.7      0.3                  kkt_violations = False
   754       769         2343      3.0      0.4                  common_features = np.in1d(strong_active_set, eligible_features,
   755       769        49696     64.6      8.3                                            assume_unique=True)
   756       769         6367      8.3      1.1                  not_in_active = strong_active_set[~common_features]
   757                                           
   758       769        16275     21.2      2.7                  if np.any(not_in_active):
   759       455         1378      3.0      0.2                      eligible_features, kkt_violations = check_kkt_conditions(
   760       455         2871      6.3      0.5                          X, y, coef_, not_in_active, n_samples*alpha, l1_ratio,
   761       455       137453    302.1     22.9                          residual, eligible_features, return_active_set=True)
   762                                           
   763                                                       # Check KKT conditions for all predictors. The strong and eligible
   764                                                       # predictors have been removed since they have already been optimized
   765                                                       # for in the above while loop.
   766       400         1109      2.8      0.2              kkt_violations = False
   767       400         1193      3.0      0.2              non_active = np.setdiff1d(feature_array, eligible_features,
   768       400        31963     79.9      5.3                                        assume_unique=True)
   769       400         1387      3.5      0.2              non_active = np.setdiff1d(non_active, strong_active_set,
   770       400        29590     74.0      4.9                                        assume_unique=True)
   771       400         8007     20.0      1.3              if np.any(non_active):
   772       263          802      3.0      0.1                  active_set, kkt_violations = check_kkt_conditions(
   773       263         1610      6.1      0.3                      X, y, coef_, non_active, n_samples*alpha, l1_ratio,
   774       263        63788    242.5     10.6                      residual, active_set, return_active_set=True)
   775                                           
   776       396         1158      2.9      0.2          if dual_gap_ > eps_:
   777                                                       warnings.warn('Objective did not converge.' +
   778                                                                     ' You might want' +
   779                                                                     ' to increase the number of iterations',
   780                                                                     ConvergenceWarning)
   781       396         3356      8.5      0.6          coefs[..., i + 1] = coef_
   782       396         1407      3.6      0.2          dual_gaps[i + 1] = dual_gap_
   783       396         1320      3.3      0.2          n_iters.append(n_iter_)
   784       396         1096      2.8      0.2          if return_models:
   785                                                       if not multi_output:
   786                                                           model = ElasticNet(
   787                                                               alpha=alpha, l1_ratio=l1_ratio,
   788                                                               fit_intercept=fit_intercept
   789                                                               if sparse.isspmatrix(X) else False,
   790                                                               precompute=precompute)
   791                                                       else:
   792                                                           model = MultiTaskElasticNet(
   793                                                               alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False)
   794                                                       model.dual_gap_ = dual_gaps[i + 1]
   795                                                       model.coef_ = coefs[..., i + 1]
   796                                                       model.n_iter_ = n_iters[i + 1]
   797                                                       if (fit_intercept and not sparse.isspmatrix(X)) or multi_output:
   798                                                           model.fit_intercept = True
   799                                                           model._set_intercept(X_mean, y_mean, X_std)
   800                                                       models.append(model)
   801                                           
   802       396         1031      2.6      0.2          if verbose:
   803                                                       if verbose > 2:
   804                                                           print(model)
   805                                                       elif verbose > 1:
   806                                                           print('Path: %03i out of %03i' % (i, n_alphas))
   807                                                       else:
   808                                                           sys.stderr.write('.')
   809                                           
   810         4           12      3.0      0.0      if return_models:
   811                                                   return models
   812         4           10      2.5      0.0      elif return_n_iter:
   813                                                   return alphas, coefs, dual_gaps, n_iters
   814                                               else:
   815         4           11      2.8      0.0          return alphas, coefs, dual_gaps

	=====================
	Lasso and Elastic Net
	=====================

	Lasso and elastic net (L1 and L2 penalisation) implemented using a
	coordinate descent.

	The coefficients can be forced to be positive.

	Computing regularization path using the lasso...
	Computing regularization path using the positive lasso...
	Computing regularization path using the elastic net...
	Computing regularization path using the positve elastic net...
	Wrote profile results to plot_lasso_coordinate_descent_path.py.lprof
	Timer unit: 1e-06 s

	Total time: 0.016946 s
	File: /home/fp985994/dev/scikit-learn/sklearn/linear_model/coordinate_descent.py
	Function: compute_strong_active_set at line 38

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	38 @profile
	39 def compute_strong_active_set(X, y, current_alpha, prev_alpha, l1_ratio, coef,
	40 residual):
	41 """
	42 Compute the features that are active along a regularization path.
	43
	44 An active feature satisfies the following criteria.
	45 \|X_{j} * (y - Xw)\| < 2 current_alpha - prev_alpha
	46
	47 Parameters
	48 ----------
	49 X : {ndarray, sparse matrix}, shape (n_samples, n_features)
	50 Training data.
	51
	52 y : ndarray, shape (n_samples,) or (n_samples, n_outputs)
	53 Target values.
	54
	55 current_alpha : float
	56 Current regularization parameter.
	57
	58 prev_alpha : float
	59 Previous regularization parameter while fitting along the grid of
	60 alphas.
	61
	62 l1_ratio : float
	63 ElasticNet mixing parameter. A l1_ratio of 1.0 implies a l1 penalty
	64 while that of 0.0 indicates a l2 penalty.
	65
	66 coef : ndarray, shape (n_features)
	67 Initial feature vector along the regularization path.
	68
	69 Returns
	70 -------
	71 active_features : ndarray
	72 Array of active features.
	73 """
	74 400 657 1.6 3.9 prev_alpha, current_alpha = prev_alpha * l1_ratio, current_alpha * l1_ratio
	75
	76 # If the right hand side is negative, there is no point doing
	77 # the remaining computation as the left hand side is always positive.
	78 400 1490 3.7 8.8 if 2*current_alpha - prev_alpha < 0:
	79 return np.arange(X.shape[1])
	80
	81 400 486 1.2 2.9 multi_output = (y.ndim >= 2)
	82
	83 400 267 0.7 1.6 if multi_output:
	84 # coef is of shape n_outputs * n_features.
	85 # We need n_features * n_outputs.
	86 coef = coef.T
	87
	88 400 5448 13.6 32.1 active_rule = safe_sparse_dot(X.T, residual)
	89 400 1724 4.3 10.2 active_rule = np.abs(active_rule, active_rule)
	90 400 307 0.8 1.8 if multi_output:
	91 active_rule = row_norms(active_rule)
	92
	93 400 3664 9.2 21.6 active_features = (active_rule >= 2*current_alpha - prev_alpha)
	94 400 2302 5.8 13.6 active_features = np.nonzero(active_features)[0]
	95 400 352 0.9 2.1 del active_rule
	96 400 249 0.6 1.5 return active_features

	Total time: 0.114876 s
	File: /home/fp985994/dev/scikit-learn/sklearn/linear_model/coordinate_descent.py
	Function: check_kkt_conditions at line 98

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	98 @profile
	99 def check_kkt_conditions(X, y, coef, strong_active_set, alpha, l1_ratio,
	100 residual, active_set=None,
	101 return_active_set=True, rtol=1e-3):
	102 """
	103 Checks if the KKT conditions are satisfied for ElasticNet and Lasso.
	104
	105 These necessary conditions are used to check if a given feature vector
	106 minimizes the loss function.
	107 If return_active_set is set to True, then it returns the set of
	108 features for which the KKT conditions are violated (active_features).
	109 Else it returns False when the conditions are violated for the first
	110 feature.
	111
	112 Parameters
	113 ----------
	114 X : {ndarray, sparse matrix}, shape (n_samples, n_features)
	115 Training data.
	116
	117 y : ndarray, shape (n_samples,) or (n_samples, n_outputs)
	118 Target values.
	119
	120 coef : ndarray, shape (n_features,)
	121 Feature vector.
	122
	123 strong_active_set : array-like
	124 The features indices for which the KKT conditions are to be verified.
	125
	126 alpha : float
	127 Regularization parameter.
	128
	129 l1_ratio : float
	130 ElasticNet mixing parameter. A l1_ratio of 1.0 implies a l1 penalty
	131 while that of 0.0 indicates a l2 penalty.
	132
	133 active_set : array-like, optional
	134 If supplied, the features for which the KKT conditions are violated
	135 are appended to this.
	136
	137 return_active_set : bool
	138 Whether or not to return the active_set.
	139
	140 Returns
	141 -------
	142 kkt_violations : bool
	143 Whether or not the KKT violations are violated across the
	144 strong_active_set.
	145
	146 active_set : ndarray
	147 Active features across the strong_active_set. Returned only if
	148 return_active_set is set to True.
	149 """
	150 718 911 1.3 0.8 kkt_violations = False
	151
	152 718 915 1.3 0.8 if active_set is None:
	153 active_set = []
	154 718 1638 2.3 1.4 if not isinstance(active_set, list):
	155 459 1024 2.2 0.9 active_set = active_set.tolist()
	156
	157 718 1070 1.5 0.9 multi_output = (y.ndim >= 2)
	158 718 795 1.1 0.7 if multi_output:
	159 coef = coef.T
	160
	161 718 2145 3.0 1.9 X_sparse = sparse.isspmatrix(X)
	162 718 813 1.1 0.7 if X_sparse:
	163 X_indices = X.indices
	164 X_data = X.data
	165 X_indptr = X.indptr
	166
	167 3954 6339 1.6 5.5 for i in strong_active_set:
	168 3236 3606 1.1 3.1 if X_sparse:
	169 startptr = X_indptr[i]
	170 endptr = X_indptr[i + 1]
	171 dot_data = X_data[startptr: endptr]
	172 residual_data = residual.take(X_indices[startptr: endptr],
	173 mode='clip')
	174 loss_derivative = np.dot(dot_data, residual_data)
	175 else:
	176 3236 14044 4.3 12.2 feature = X[:, i]
	177 3236 13740 4.2 12.0 loss_derivative = np.dot(feature, residual)
	178 3236 5011 1.5 4.4 active_coef = coef[i]
	179
	180 # XXX: I'm not sure about the multi_output case.
	181 3236 3719 1.1 3.2 if multi_output:
	182 loss_derivative = norm(loss_derivative)
	183 mean_coef = np.mean(active_coef)
	184 l1_penalty = alpha * l1_ratio
	185 if mean_coef < 0:
	186 l1_penalty = -l1_penalty
	187 active_coef = norm(active_coef)
	188 l2_penalty = alpha * (1 - l1_ratio) * active_coef
	189 else:
	190 3236 5452 1.7 4.7 l1_penalty = alpha * l1_ratio
	191 3236 9169 2.8 8.0 if active_coef < 0:
	192 416 618 1.5 0.5 l1_penalty = -l1_penalty
	193 3236 7357 2.3 6.4 l2_penalty = alpha * (1 - l1_ratio) * active_coef
	194
	195 3236 5163 1.6 4.5 penalty = l1_penalty + l2_penalty
	196 3236 7399 2.3 6.4 tol = 1e-3 + rtol * abs(penalty)
	197
	198 3236 8742 2.7 7.6 if active_coef != 0 and (abs(loss_derivative - penalty) > tol):
	199 1328 2257 1.7 2.0 active_set.append(i)
	200 1328 1585 1.2 1.4 kkt_violations = True
	201 1328 1618 1.2 1.4 if not return_active_set:
	202 break
	203 1908 5612 2.9 4.9 elif active_coef == 0 and abs(loss_derivative) > alpha*l1_ratio:
	204 588 1012 1.7 0.9 active_set.append(i)
	205 588 717 1.2 0.6 kkt_violations = True
	206 588 703 1.2 0.6 if not return_active_set:
	207 break
	208
	209 718 860 1.2 0.7 if return_active_set:
	210 718 842 1.2 0.7 return active_set, kkt_violations
	211 else:
	212 return kkt_violations

	Total time: 0.600602 s
	File: /home/fp985994/dev/scikit-learn/sklearn/linear_model/coordinate_descent.py
	Function: enet_path at line 487

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	487 @profile
	488 def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
	489 precompute='auto', Xy=None, fit_intercept=True,
	490 normalize=False, copy_X=True, coef_init=None,
	491 verbose=False, return_models=False, return_n_iter=False,
	492 **params):
	493 """Compute elastic net path with coordinate descent
	494
	495 The elastic net optimization function varies for mono and multi-outputs.
	496
	497 For mono-output tasks it is::
	498
	499 1 / (2 * n_samples) * \|\|y - Xw\|\|^2_2 +
	500 + alpha * l1_ratio * \|\|w\|\|_1
	501 + 0.5 * alpha * (1 - l1_ratio) * \|\|w\|\|^2_2
	502
	503 For multi-output tasks it is::
	504
	505 (1 / (2 * n_samples)) * \|\|Y - XW\|\|^Fro_2
	506 + alpha * l1_ratio * \|\|W\|\|_21
	507 + 0.5 * alpha * (1 - l1_ratio) * \|\|W\|\|_Fro^2
	508
	509 Where::
	510
	511 \|\|W\|\|_21 = \sum_i \sqrt{\sum_j w_{ij}^2}
	512
	513 i.e. the sum of norm of each row.
	514
	515 Parameters
	516 ----------
	517 X : {array-like}, shape (n_samples, n_features)
	518 Training data. Pass directly as Fortran-contiguous data to avoid
	519 unnecessary memory duplication. If ``y`` is mono-output then ``X``
	520 can be sparse.
	521
	522 y : ndarray, shape = (n_samples,) or (n_samples, n_outputs)
	523 Target values
	524
	525 l1_ratio : float, optional
	526 float between 0 and 1 passed to elastic net (scaling between
	527 l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso
	528
	529 eps : float
	530 Length of the path. ``eps=1e-3`` means that
	531 ``alpha_min / alpha_max = 1e-3``
	532
	533 n_alphas : int, optional
	534 Number of alphas along the regularization path
	535
	536 alphas : ndarray, optional
	537 List of alphas where to compute the models.
	538 If None alphas are set automatically
	539
	540 precompute : True \| False \| 'auto' \| array-like
	541 Whether to use a precomputed Gram matrix to speed up
	542 calculations. If set to ``'auto'`` let us decide. The Gram
	543 matrix can also be passed as argument.
	544
	545 Xy : array-like, optional
	546 Xy = np.dot(X.T, y) that can be precomputed. It is useful
	547 only when the Gram matrix is precomputed.
	548
	549 fit_intercept : bool
	550 Fit or not an intercept.
	551 WARNING : deprecated, will be removed in 0.16.
	552
	553 normalize : boolean, optional, default False
	554 If ``True``, the regressors X will be normalized before regression.
	555 WARNING : deprecated, will be removed in 0.16.
	556
	557 copy_X : boolean, optional, default True
	558 If ``True``, X will be copied; else, it may be overwritten.
	559
	560 coef_init : array, shape (n_features, ) \| None
	561 The initial values of the coefficients.
	562
	563 verbose : bool or integer
	564 Amount of verbosity.
	565
	566 return_models : boolean, optional, default False
	567 If ``True``, the function will return list of models. Setting it
	568 to ``False`` will change the function output returning the values
	569 of the alphas and the coefficients along the path. Returning the
	570 model list will be removed in version 0.16.
	571
	572 params : kwargs
	573 keyword arguments passed to the coordinate descent solver.
	574
	575 return_n_iter : bool
	576 whether to return the number of iterations or not.
	577
	578 Returns
	579 -------
	580 models : a list of models along the regularization path
	581 (Is returned if ``return_models`` is set ``True`` (default).
	582
	583 alphas : array, shape (n_alphas,)
	584 The alphas along the path where models are computed.
	585 (Is returned, along with ``coefs``, when ``return_models`` is set
	586 to ``False``)
	587
	588 coefs : array, shape (n_features, n_alphas) or
	589 (n_outputs, n_features, n_alphas)
	590 Coefficients along the path.
	591 (Is returned, along with ``alphas``, when ``return_models`` is set
	592 to ``False``).
	593
	594 dual_gaps : array, shape (n_alphas,)
	595 The dual gaps at the end of the optimization for each alpha.
	596 (Is returned, along with ``alphas``, when ``return_models`` is set
	597 to ``False``).
	598
	599 n_iters : array-like, shape (n_alphas,)
	600 The number of iterations taken by the coordinate descent optimizer to
	601 reach the specified tolerance for each alpha.
	602 (Is returned, along with ``alphas``, when ``return_models`` is set
	603 to ``False`` and ``return_n_iter`` is set to True).
	604
	605 Notes
	606 -----
	607 See examples/plot_lasso_coordinate_descent_path.py for an example.
	608
	609 Deprecation Notice: Setting ``return_models`` to ``False`` will make
	610 the Lasso Path return an output in the style used by :func:`lars_path`.
	611 This will be become the norm as of version 0.15. Leaving ``return_models``
	612 set to `True` will let the function return a list of models as before.
	613
	614 See also
	615 --------
	616 MultiTaskElasticNet
	617 MultiTaskElasticNetCV
	618 ElasticNet
	619 ElasticNetCV
	620 """
	621 4 12 3.0 0.0 if return_models:
	622 warnings.warn("Use enet_path(return_models=False), as it returns the"
	623 " coefficients and alphas instead of just a list of"
	624 " models as previously `lasso_path`/`enet_path` did."
	625 " `return_models` will eventually be removed in 0.16,"
	626 " after which, returning alphas and coefs"
	627 " will become the norm.",
	628 DeprecationWarning, stacklevel=2)
	629
	630 4 12 3.0 0.0 if normalize is True:
	631 warnings.warn("normalize param will be removed in 0.16."
	632 " Intercept fitting and feature normalization will be"
	633 " done in estimators.",
	634 DeprecationWarning, stacklevel=2)
	635 else:
	636 4 9 2.2 0.0 normalize = False
	637
	638 4 11 2.8 0.0 if fit_intercept is True or fit_intercept is None:
	639 warnings.warn("fit_intercept param will be removed in 0.16."
	640 " Intercept fitting and feature normalization will be"
	641 " done in estimators.",
	642 DeprecationWarning, stacklevel=2)
	643
	644 4 11 2.8 0.0 if fit_intercept is None:
	645 fit_intercept = True
	646
	647 4 12 3.0 0.0 X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X and
	648 4 338 84.5 0.1 fit_intercept)
	649 4 16 4.0 0.0 n_samples, n_features = X.shape
	650
	651 4 11 2.8 0.0 multi_output = False
	652 4 12 3.0 0.0 if y.ndim != 1:
	653 multi_output = True
	654 _, n_outputs = y.shape
	655
	656 # MultiTaskElasticNet does not support sparse matrices
	657 4 9 2.2 0.0 X_sparse_scaling = None
	658 4 22 5.5 0.0 if not multi_output and sparse.isspmatrix(X):
	659 if 'X_mean' in params:
	660 # As sparse matrices are not actually centered we need this
	661 # to be passed to the CD solver.
	662 X_sparse_scaling = params['X_mean'] / params['X_std']
	663 else:
	664 X_sparse_scaling = np.ones(n_features)
	665
	666 X, y, X_mean, y_mean, X_std, precompute, Xy = \
	667 4 445 111.2 0.1 _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy=False)
	668 4 15 3.8 0.0 if alphas is None:
	669 # No need to normalize of fit_intercept: it has been done
	670 # above
	671 4 12 3.0 0.0 alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio,
	672 4 8 2.0 0.0 fit_intercept=False, eps=eps, n_alphas=n_alphas,
	673 4 511 127.8 0.1 normalize=False, copy_X=False)
	674 else:
	675 alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered
	676
	677 4 36 9.0 0.0 feature_array = np.arange(X.shape[1], dtype=np.int32)
	678 4 12 3.0 0.0 n_alphas = len(alphas)
	679 4 13 3.2 0.0 tol = params.get('tol', 1e-4)
	680 4 11 2.8 0.0 positive = params.get('positive', False)
	681 4 11 2.8 0.0 max_iter = params.get('max_iter', 1000)
	682 4 20 5.0 0.0 dual_gaps = np.empty(n_alphas)
	683 4 10 2.5 0.0 n_iters = []
	684
	685 4 24 6.0 0.0 rng = check_random_state(params.get('random_state', None))
	686 4 12 3.0 0.0 selection = params.get('selection', 'cyclic')
	687 4 11 2.8 0.0 if selection not in ['random', 'cyclic']:
	688 raise ValueError("selection should be either random or cyclic.")
	689 4 11 2.8 0.0 random = (selection == 'random')
	690 4 11 2.8 0.0 models = []
	691
	692 4 10 2.5 0.0 if not multi_output:
	693 4 22 5.5 0.0 coefs = np.empty((n_features, n_alphas), dtype=np.float64)
	694 else:
	695 coefs = np.empty((n_outputs, n_features, n_alphas),
	696 dtype=np.float64)
	697
	698 4 10 2.5 0.0 if coef_init is None:
	699 4 37 9.2 0.0 coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1]))
	700 else:
	701 coef_ = np.asfortranarray(coef_init)
	702
	703 # Compute the non zero coefficient for the first alpha, which we assume
	704 # is the active set, that is checked on further across other alpha.
	705 4 29 7.2 0.0 l1_reg = alphas[0] * l1_ratio * n_samples
	706 4 22 5.5 0.0 l2_reg = alphas[0] * (1.0 - l1_ratio) * n_samples
	707 4 25 6.2 0.0 active_set = np.arange(n_features, dtype=np.int32)
	708 4 10 2.5 0.0 model = _cd_fast_path(
	709 4 9 2.2 0.0 X, y, coef_, l1_reg, l2_reg, active_set, max_iter, tol,
	710 4 255 63.8 0.0 rng, random, positive, precompute, Xy, X_sparse_scaling
	711 )
	712 4 12 3.0 0.0 coef_, dual_gap_, eps_, n_iter_ = model
	713 4 14 3.5 0.0 n_iters.append(n_iter_)
	714 4 13 3.2 0.0 dual_gaps[0] = dual_gap_
	715 4 35 8.8 0.0 coefs[..., 0] = coef_
	716
	717 4 11 2.8 0.0 if multi_output:
	718 active_set = np.nonzero(np.abs(coef_.T) > 1e-12)[0]
	719 active_set = np.nonzero(np.bincount(active_set) == n_outputs)[0]
	720 else:
	721 4 81 20.2 0.0 active_set = np.nonzero(np.abs(coef_) > 1e-12)[0]
	722
	723 400 1309 3.3 0.2 for i, alpha in enumerate(alphas[1:]):
	724
	725 396 1070 2.7 0.2 kkt_violations = True
	726 396 1267 3.2 0.2 residual = None
	727 396 2204 5.6 0.4 l1_reg = alpha * l1_ratio * n_samples
	728 396 1930 4.9 0.3 l2_reg = alpha * (1.0 - l1_ratio) * n_samples
	729
	730 796 2384 3.0 0.4 while kkt_violations:
	731 400 4083 10.2 0.7 eligible_features = np.copy(active_set)
	732 400 1168 2.9 0.2 if residual is None:
	733 396 8957 22.6 1.5 residual = compute_residual(X, y, coef_)
	734 400 1235 3.1 0.2 strong_active_set = compute_strong_active_set(
	735 400 3028 7.6 0.5 X, y, n_samplesalpha, n_samplesalphas[i-1], l1_ratio,
	736 400 24443 61.1 4.1 coef_, residual,
	737 )
	738 1169 3437 2.9 0.6 while kkt_violations:
	739 769 2682 3.5 0.4 if isinstance(eligible_features, list):
	740 369 1040 2.8 0.2 eligible_features = np.asarray(eligible_features,
	741 369 4200 11.4 0.7 dtype=np.int32)
	742 769 2133 2.8 0.4 model = _cd_fast_path(
	743 769 2081 2.7 0.3 X, y, coef_, l1_reg, l2_reg, eligible_features,
	744 769 2018 2.6 0.3 max_iter, tol, rng, random, positive, precompute,
	745 769 136395 177.4 22.7 Xy, X_sparse_scaling
	746 )
	747 769 21483 27.9 3.6 residual = compute_residual(X, y, coef_)
	748 769 2477 3.2 0.4 coef_, dual_gap_, eps_, n_iter_ = model
	749
	750 # Check KKT conditions for features present in strong_active_set.
	751 # and not in eligible predictors. If KKT conditions are violated add them
	752 # to the eligible features, and optimize again.
	753 769 2102 2.7 0.3 kkt_violations = False
	754 769 2343 3.0 0.4 common_features = np.in1d(strong_active_set, eligible_features,
	755 769 49696 64.6 8.3 assume_unique=True)
	756 769 6367 8.3 1.1 not_in_active = strong_active_set[~common_features]
	757
	758 769 16275 21.2 2.7 if np.any(not_in_active):
	759 455 1378 3.0 0.2 eligible_features, kkt_violations = check_kkt_conditions(
	760 455 2871 6.3 0.5 X, y, coef_, not_in_active, n_samples*alpha, l1_ratio,
	761 455 137453 302.1 22.9 residual, eligible_features, return_active_set=True)
	762
	763 # Check KKT conditions for all predictors. The strong and eligible
	764 # predictors have been removed since they have already been optimized
	765 # for in the above while loop.
	766 400 1109 2.8 0.2 kkt_violations = False
	767 400 1193 3.0 0.2 non_active = np.setdiff1d(feature_array, eligible_features,
	768 400 31963 79.9 5.3 assume_unique=True)
	769 400 1387 3.5 0.2 non_active = np.setdiff1d(non_active, strong_active_set,
	770 400 29590 74.0 4.9 assume_unique=True)
	771 400 8007 20.0 1.3 if np.any(non_active):
	772 263 802 3.0 0.1 active_set, kkt_violations = check_kkt_conditions(
	773 263 1610 6.1 0.3 X, y, coef_, non_active, n_samples*alpha, l1_ratio,
	774 263 63788 242.5 10.6 residual, active_set, return_active_set=True)
	775
	776 396 1158 2.9 0.2 if dual_gap_ > eps_:
	777 warnings.warn('Objective did not converge.' +
	778 ' You might want' +
	779 ' to increase the number of iterations',
	780 ConvergenceWarning)
	781 396 3356 8.5 0.6 coefs[..., i + 1] = coef_
	782 396 1407 3.6 0.2 dual_gaps[i + 1] = dual_gap_
	783 396 1320 3.3 0.2 n_iters.append(n_iter_)
	784 396 1096 2.8 0.2 if return_models:
	785 if not multi_output:
	786 model = ElasticNet(
	787 alpha=alpha, l1_ratio=l1_ratio,
	788 fit_intercept=fit_intercept
	789 if sparse.isspmatrix(X) else False,
	790 precompute=precompute)
	791 else:
	792 model = MultiTaskElasticNet(
	793 alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False)
	794 model.dual_gap_ = dual_gaps[i + 1]
	795 model.coef_ = coefs[..., i + 1]
	796 model.n_iter_ = n_iters[i + 1]
	797 if (fit_intercept and not sparse.isspmatrix(X)) or multi_output:
	798 model.fit_intercept = True
	799 model._set_intercept(X_mean, y_mean, X_std)
	800 models.append(model)
	801
	802 396 1031 2.6 0.2 if verbose:
	803 if verbose > 2:
	804 print(model)
	805 elif verbose > 1:
	806 print('Path: %03i out of %03i' % (i, n_alphas))
	807 else:
	808 sys.stderr.write('.')
	809
	810 4 12 3.0 0.0 if return_models:
	811 return models
	812 4 10 2.5 0.0 elif return_n_iter:
	813 return alphas, coefs, dual_gaps, n_iters
	814 else:
	815 4 11 2.8 0.0 return alphas, coefs, dual_gaps