Created
March 18, 2018 13:51
-
-
Save psinger/39a439428068fff5a8200d776f0f8e63 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.sparse import csr_matrix, rand | |
import tables as tb | |
a = rand(2000,2000, format='csr') #imagine that many values are stored in this matrix and that sparsity is low | |
b = a.T | |
l, m, n = a.shape[0], a.shape[1], b.shape[1] | |
f = tb.open_file('dot.h5', 'w') | |
filters = tb.Filters(complevel=5, complib='blosc') | |
out = f.create_carray(f.root, 'data', tb.Float32Atom(), shape=(l, n), filters=filters) | |
bl = 1000 #this is the number of rows we calculate each loop | |
#this may not the most efficient value | |
#look into buffersize usage in PyTables and adopt the buffersite of the | |
#carray accordingly to improve specifically fetching performance | |
b = b.tocsc() #we slice b on columns, csc improves performance | |
#this can also be changed to slice on rows instead of columns | |
for i in range(0, l, bl): | |
out[:,i:min(i+bl, l)] = (a.dot(b[:,i:min(i+bl, l)])).toarray() | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment