Last active
August 1, 2017 04:19
-
-
Save mjwillson/060644552eb037ebb3e7 to your computer and use it in GitHub Desktop.
Matrix of sliding window ngrams without any copying via numpy striding tricks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy.lib.stride_tricks import as_strided | |
def ngrams_via_striding(array, order): | |
itemsize = array.itemsize | |
assert array.strides == (itemsize,) | |
return as_strided(array, (max(array.size + 1 - order, 0), order), (itemsize, itemsize)) | |
In [71]: a = numpy.arange(10) | |
In [72]: ngrams_via_striding(a, 4) | |
Out[72]: | |
array([[0, 1, 2, 3], | |
[1, 2, 3, 4], | |
[2, 3, 4, 5], | |
[3, 4, 5, 6], | |
[4, 5, 6, 7], | |
[5, 6, 7, 8], | |
[6, 7, 8, 9]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment