Created
October 1, 2011 13:37
-
-
Save ogrisel/1256060 to your computer and use it in GitHub Desktop.
load_svmlight_file line profile
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%lprun -f datasets.svmlight_format._load_svmlight_file _ = datasets.load_svmlight_file('competition_data/public_train_data.svmlight.dat') | |
Timer unit: 1e-06 s | |
File: /home/ogrisel/coding/scikit-learn/sklearn/datasets/svmlight_format.py | |
Function: _load_svmlight_file at line 21 | |
Total time: 58.3922 s | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
21 def _load_svmlight_file(f, buffer_mb, n_features): | |
22 1 7 7.0 0.0 data = [] | |
23 1 5 5.0 0.0 indptr = [] | |
24 1 4 4.0 0.0 indices = [] | |
25 1 5 5.0 0.0 labels = [] | |
26 | |
27 50001 230613 4.6 0.4 for line in f: | |
28 50000 141327 2.8 0.2 line = line.strip() | |
29 | |
30 50000 168408 3.4 0.3 hash_position = line.find('#') | |
31 50000 104337 2.1 0.2 if hash_position == 0: | |
32 continue | |
33 50000 100519 2.0 0.2 elif hash_position > 0: | |
34 line = line[:hash_position].strip() | |
35 | |
36 50000 675187 13.5 1.2 line_parts = line.split() | |
37 50000 245655 4.9 0.4 y, features = line_parts[0], line_parts[1:] | |
38 | |
39 50000 147872 3.0 0.3 labels.append(float(y)) | |
40 50000 118925 2.4 0.2 indptr.append(len(data)) | |
41 | |
42 5730286 11460625 2.0 19.6 for feat in features: | |
43 5680286 14263551 2.5 24.4 idx, value = feat.split(":") | |
44 5680286 15822776 2.8 27.1 indices.append(int(idx)) | |
45 5680286 14151570 2.5 24.2 data.append(float(value)) | |
46 | |
47 1 3 3.0 0.0 indptr.append(len(data)) | |
48 1 17321 17321.0 0.0 indptr = np.array(indptr, dtype=np.int) | |
49 | |
50 1 5 5.0 0.0 if n_features is not None: | |
51 shape = (indptr.shape[0] - 1, n_features) | |
52 else: | |
53 1 2 2.0 0.0 shape = None # inferred | |
54 | |
55 1 297895 297895.0 0.5 X = sp.csr_matrix((np.array(data, dtype=np.double), | |
56 1 392693 392693.0 0.7 np.array(indices, dtype=np.int), | |
57 1 45218 45218.0 0.1 indptr), shape) | |
58 | |
59 1 7638 7638.0 0.0 return X, np.array(labels, dtype=np.double) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment