Skip to content

Instantly share code, notes, and snippets.

@mmozeiko
Created July 26, 2014 00:11

Revisions

  1. mmozeiko created this gist Jul 26, 2014.
    63 changes: 63 additions & 0 deletions lz4.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,63 @@
    # Warning: this implementation doesn't check if writes or reads will happen
    # out of input/output buffer range, so that will generate IndexError exception

    def LZ4_decompress(source, osize):
    isize = len(source)

    src = bytearray(source)
    dst = bytearray(osize)

    si = 0
    di = 0

    while True:
    token = src[si]
    si += 1

    literals = token >> 4
    match = token & 0xF

    if literals > 0:
    # process literal section
    if literals == 15:
    while src[si] == 255:
    literals += 255
    si += 1
    literals += src[si]
    si += 1

    dst[di:di+literals] = src[si:si+literals]
    di += literals
    si += literals

    # lz4 data can end only with block that has literals and has no match copy
    if si == len(src):
    break

    # process match copy section
    offset = src[si] + (src[si+1] << 8)
    si += 2
    assert offset != 0, "this doesn't make sense, offset=0 means this must be literal"

    if match == 15:
    while src[si] == 255:
    match += 255
    si += 1
    match += src[si]
    si += 1

    match += 4

    # use faster copy using slicing if possible
    # it can be used only when di - offset + match <= di
    if match <= offset:
    # fast copy
    dst[di:di+match] = dst[di-offset:di-offset+match]
    else:
    # slower copy
    for i in xrange(match):
    dst[di+i] = dst[di-offset+i]

    di += match

    return str(dst)