smhanov · April 13, 2025 09:41
diff --git a/copyinsert.py b/copyinsert.py
 #!/usr/bin/env python3
 import time
 import sys

 # Python by Steve Hanov
 # Suffix array construction from:
 # From Ge Nong, Sen Zhang and Wai Hong Chan, Two Efficient Algorithms for Linear Suffix Array Construction, 2008
 # longestMatches from: http://stevehanov.ca/blog/index.php?id=146
 # Released to the public domain.
 class SuffixArray:
    def __init__(self, A, B):
        self.length1 = len(A)+1
        self.length2 = len(B)+1
        self.seq = A + b"\001" + B + b"\000"

        # find the start or end of each bucket
        def getBuckets(s, bkt, n, K, end, os):
            sum = 0
            # clear all buckets
            for i in range(K+1):
                bkt[i] = 0
            
            # compute the size of each bucket
            for i in range(n):
                bkt[s[i+os]] += 1

            for i in range(K+1):
                sum += bkt[i]
                if end:
                    bkt[i] = sum
                else:
                    bkt[i] = sum - bkt[i]

        # compute SAl
        def induceSAl(t, SA, s, bkt, n, K, end, os, oa):
            # find starts of buckets
            getBuckets(s, bkt, n, K, end, os)
            for i in range(n):
                j = SA[i+oa] - 1
                if j >= 0 and not t[j]:
                    SA[bkt[s[j+os]]+oa] = j
                    bkt[s[j+os]] += 1

        # compute SAs
        def induceSAs(t, SA, s, bkt, n, K, end, os, oa):
            # find ends of buckets
            getBuckets(s, bkt, n, K, end, os);
            for i in range(n - 1, -1, -1):
                j = SA[i+oa] - 1
                if j >= 0 and t[j]:
                    bkt[s[j+os]] -= 1
                    SA[bkt[s[j+os]]+oa] = j

        # find the suffix array SA of s[0..n-1] in {1..K}^n
        # require s[n-1]=0 (the sentinel!), n>=2
        # use a working space (excluding s and SA) of
        # at most 2.25n+O(1) for a constant alphabet
        def SA_IS(s, SA, n, K, os = 0, oa = 0):
            def isLms(i):
                return i > 0 and t[i] and not t[i-1]

            #print("Stage 1");
            t = bytearray(n)
            t[n - 2] = 0
            t[n - 1] = 1
            for i in range(n-3, -1, -1):
                if s[i+os] < s[i+1+os] or s[i+os] == s[i+1+os] and t[i+1] == 1:
                    t[i] = 1
                else:
                    t[i] = 0

            #print("Stage 2");
            bkt = [0] * (K+1)
            getBuckets(s, bkt, n, K, True, os)

            for i in range(n):
                SA[i+oa] = -1

            for i in range(1, n):
                if isLms(i):
                    bkt[s[i+os]] -= 1
                    SA[bkt[s[i+os]]+oa] = i

            #print("Stage 3");
            induceSAl(t, SA, s, bkt, n, K, False, os, oa)

            induceSAs(t, SA, s, bkt, n, K, True, os, oa)

            bkt = None

            n1 = 0
            for i in range(n):
                if isLms(SA[i+oa]):
                    SA[n1+oa] = SA[i+oa]
                    n1 += 1

            for i in range(n1, n):
                SA[i+oa] = -1

            #print("Stage 4");
            name = 0
            prev = -1
            for i in range(n1):
                pos = SA[i+oa]
                diff = False
                for d in range(n):
                    if prev == -1 or s[pos+d+os] != s[prev+d+os] or \
                        t[pos+d] != t[prev+d]:
                        diff = True
                        break
                    elif d > 0 and (isLms(pos+d) or isLms(prev+d)):
                        break
                if diff:
                    name += 1
                    prev = pos
                pos = int(pos/2)
                SA[n1 + pos + oa] = name - 1

            j = n - 1
            for i in range(n-1, n1-1, -1):
                if SA[i+oa] >= 0:
                    SA[j+oa] = SA[i+oa]
                    j -= 1

            s1 = SA1 = SA
            s1off = n - n1
            if name < n1:
                SA_IS(s1, SA1, n1, name-1, s1off+oa, oa)
            else:
                for i in range(n1):
                    SA1[s1[i+s1off+oa]+oa] = i

            bkt = [0] * (K+1)

            #print("Stage 5");
            getBuckets(s, bkt, n, K, True, os)

            j = 0
            for i in range(1, n):
                if isLms(i):
                    s1[s1off+oa+j] = i
                    j += 1

            for i in range(n1):
                SA1[i+oa] = s1[oa+s1off+SA1[oa+i]]

            for i in range(n1, n):
                SA[i+oa] = -1

            for i in range(n1-1, -1, -1):
                j = SA[oa+i]
                SA[oa+i] = -1
                bkt[s[os+j]] -= 1
                SA[bkt[s[os+j]]+oa] = j

            #print("Stage 6");
            induceSAl(t, SA, s, bkt, n, K, False, os, oa)
            induceSAs(t, SA, s, bkt, n, K, True, os, oa)

        # Given string s and suffix array sa, compute the longest common prefix 
        # information in O(N) time.
        def calculateLcp(s, sa):
            n = len(sa)
            k = 0
            lcp = [0] * n
            rank = [0] * n

            for i in range(n):
              rank[sa[i]] = i

            i = 0
            while True:
                if i >= n: break

                if rank[i] == n-1:
                    k=0 
                    i += 1
                    continue
            
                j = sa[rank[i]+1]

                while i+k<n and j+k<n and s[i+k] == s[j+k]:
                  k += 1
                
                lcp[rank[i]]=k

                i += 1
                if k: k -= 1
            
            return lcp

        def remap(a, b):
            # Fast SA construction algorithms assume the sequence is
            # numeric, and has an upper value due to the bucket sort.

            # reserve 0 and 1 for the separator and end character.
            nextName = 2

            # Get the set of all characters used.
            items = set(a)
            items.update(b)

            # map each character to its index in the sorted list
            mapping = {}
            for item in sorted(items):
                mapping[item] = nextName
                nextName += 1

            # create a new string that is numeric.
            newString = []
            newString.extend([mapping[item] for item in a])
            newString.append(1)
            newString.extend([mapping[item] for item in b])
            newString.append(0)

            return newString, nextName - 1

        # Here is the code to initialize the suffix array object
        remapped, K = remap(A, B)
        self.sa = [0] * len(remapped)

        if self.length1 + self.length2 > 2:
            SA_IS(remapped, self.sa, len(self.sa), K)
            #self.sa = sais_native(remapped, len(remapped), K)
        self.lcp = calculateLcp(self.seq, self.sa)

    def show(self):
        for i in range(len(self.sa)):
            self.showPosition(i)

    def showPosition(self, saIndex):        
        i = saIndex
        p = self.sa[i]
        if self.sa[i] < self.length1:
            s = "A  "
        else:
            s = "  B"
            p -= self.length1
        print("{3} {0:8} {1:5} |{2}".format(p, self.lcp[i], json.dumps(self.seq[self.sa[i]:self.sa[i]+20]), s))

    def longestMatches(self):
        # returns, for every position in B, a tuple with the longest matching 
        # position in A and the length of that match.
        result = [None] * self.length2

        # forward pass
        lcp = 0
        aIndex = 0
        for i in range(len(self.sa)):
            if self.sa[i] < self.length1:
                # string in A
                lcp = self.lcp[i]
                aIndex = self.sa[i]
            else:
                # string in B.
                result[self.sa[i] - self.length1] = (aIndex, lcp)
                lcp = min(lcp, self.lcp[i])

        # reverse pass
        lcp = 0
        aIndex = 0
        for i in range(len(self.sa)-1, -1, -1):
            if self.sa[i] < self.length1:
                # string in A
                aIndex = self.sa[i]
                if i > 0:
                    lcp = self.lcp[i-1]
            else:
                # string in B.
                lcp = min(lcp, self.lcp[i])
                bIndex = self.sa[i] - self.length1
                oldAIndex, oldLcp = result[bIndex]
                if lcp > oldLcp:
                    result[bIndex] = (aIndex, lcp)

        return result

 class SAFindLongest:
    def __init__(self, A, B):
        self.matches = SuffixArray(A, B).longestMatches()

    def findLongest(self, positionInB):
        return self.matches[positionInB]


 class BruteFindLongest:
    def __init__(self, A, B):
        self.A = A
        self.B = B

    def findLongest(self, positionInB):
        a = self.A
        def prefixLength(position, text):
            l = 0
            while(l < len(text) and position+l < len(a) and text[l] == a[position+l]):
                l += 1
            return l

        text = self.B[positionInB:]

        longest = 0
        longestPos = 0
        for p in range(len(self.A)):
            l = prefixLength(p, text)
            if l > longest:
                longest = l
                longestPos = p

        return longestPos, longest

 class TableFindLongest:
    def __init__(self, A, B):
        self.A = A
        self.B = B
        minlen = 4
        self.table = {}
        for i in range(len(A)-minlen+1):
            code = A[i:i+minlen]
            if True or code not in self.table:
                self.table[code] = [i]
            else:
                self.table[code].append(i)

    def findLongest(self, positionInB):
        minlen = 4
        code = self.B[positionInB:positionInB+minlen]
        if code not in self.table:
            return 0, 0

        a = self.A
        lcp = 0
        pos = 0
        def prefixLength(position, text):
            l = 0
            while(l < len(text) and position+l < len(a) and text[l] == a[position+l]):
                l += 1
            return l


        text = self.B[positionInB:]
        for index in self.table[code]:
            l = prefixLength(index, text)
            if l > lcp:
                pos = index
                lcp = l

        return pos, lcp


 def longestPrefix(A, B):
    l = 0
    m = min(len(A), len(B))
    while l < m and A[l] == B[l]:
        l += 1

    return l

 def longestSuffix(A, B):
    l = 0
    la = len(A)
    lb = len(B)
    m = min(la, lb)
    while l < m and A[la-l-1] == B[lb-l-1]:
        l += 1

    return l


 def DiffEncode(a, b, Algorithm):
    start = time.time()
    cmds = []
    j = 0
    insertFrom = -1
    check = bytearray()
    totalCost = 0
    minimumMatch = 6

    def copy(start, length):
        nonlocal totalCost, check
        totalCost += 9
        cmds.append("COPY {0}, {1}".format(start, length))
        check += a[start:start+length]

    def insert(start, length):
        nonlocal totalCost, check
        totalCost += 1+5+length
        cmds.append("INSERT {0}, {1}".format(start, length))
        check += b[start:start+length]

    j = prefix = longestPrefix(a, b)
    if prefix > 0:
        copy(0, prefix)

    if prefix != len(b):
        suffix = min(longestSuffix(a, b), len(b) - prefix)
        limit = len(b) - suffix

        if suffix + prefix != len(b):
            if len(b) - suffix > prefix:
                algorithm = Algorithm(a[prefix:len(a)-suffix], b[prefix:limit])

                while j < limit:
                    position, length = algorithm.findLongest(j-prefix)
                    position += prefix
                    if length < minimumMatch:
                        if insertFrom == -1: insertFrom = j
                        j += 1
                    else:
                        if insertFrom >= 0:
                            insert(insertFrom, j - insertFrom)
                            insertFrom = -1
                        copy(position, length)
                        j += length

                if insertFrom >= 0:
                    insert(insertFrom, j-insertFrom)

        if limit < len(b):
            copy(len(a)-suffix, suffix)

    if check != b:
        print("\n".join(cmds))
        print("ERROR")
        raise(Exception("Error"))

    totalTime = time.time() - start

    return "\n".join(cmds), totalTime, totalCost

 import os
 def performance():
    # Output a CSV file with the N and the time to construct a suffix
    # array of a random string of length N.
    for i in range(1000, 1000000, 1000):
        s = os.urandom(i//2)
        t = os.urandom(i//2)
        start = time.time()
        s = SuffixArray(s, t)
        print("{0},{1}".format(i, time.time()-start))


 if "--performance" in sys.argv:
    performance()
    sys.exit(0)

 Algorithms = {
    "suffixarray": SAFindLongest,
    "bruteforce": BruteFindLongest,
    "table": TableFindLongest
 }

 import argparse
 parser = argparse.ArgumentParser()
 parser.add_argument("file1", type=str, help="First file to compare")
 parser.add_argument("file2", type=str, help="Second file to compare")
 parser.add_argument("--performance", help="Performance test SA construction")
 parser.add_argument("-a", "--algorithm", type=str, help="Algorithm to use",
    choices=Algorithms.keys(), default="suffixarray")
 args = parser.parse_args()

 f1 = open(args.file1, "rb").read()
 f2 = open(args.file2, "rb").read()

 cmds, time, cost = DiffEncode(f1, f2, Algorithms[args.algorithm])
 print("Cmds:" + cmds)
 print("Total time: {0}".format(time))
 print("Instruction cost: {0}".format(cost))
	#!/usr/bin/env python3
	import time
	import sys

	# Python by Steve Hanov
	# Suffix array construction from:
	# From Ge Nong, Sen Zhang and Wai Hong Chan, Two Efficient Algorithms for Linear Suffix Array Construction, 2008
	# longestMatches from: http://stevehanov.ca/blog/index.php?id=146
	# Released to the public domain.
	class SuffixArray:
	def __init__(self, A, B):
	self.length1 = len(A)+1
	self.length2 = len(B)+1
	self.seq = A + b"\001" + B + b"\000"

	# find the start or end of each bucket
	def getBuckets(s, bkt, n, K, end, os):
	sum = 0
	# clear all buckets
	for i in range(K+1):
	bkt[i] = 0

	# compute the size of each bucket
	for i in range(n):
	bkt[s[i+os]] += 1

	for i in range(K+1):
	sum += bkt[i]
	if end:
	bkt[i] = sum
	else:
	bkt[i] = sum - bkt[i]

	# compute SAl
	def induceSAl(t, SA, s, bkt, n, K, end, os, oa):
	# find starts of buckets
	getBuckets(s, bkt, n, K, end, os)
	for i in range(n):
	j = SA[i+oa] - 1
	if j >= 0 and not t[j]:
	SA[bkt[s[j+os]]+oa] = j
	bkt[s[j+os]] += 1

	# compute SAs
	def induceSAs(t, SA, s, bkt, n, K, end, os, oa):
	# find ends of buckets
	getBuckets(s, bkt, n, K, end, os);
	for i in range(n - 1, -1, -1):
	j = SA[i+oa] - 1
	if j >= 0 and t[j]:
	bkt[s[j+os]] -= 1
	SA[bkt[s[j+os]]+oa] = j

	# find the suffix array SA of s[0..n-1] in {1..K}^n
	# require s[n-1]=0 (the sentinel!), n>=2
	# use a working space (excluding s and SA) of
	# at most 2.25n+O(1) for a constant alphabet
	def SA_IS(s, SA, n, K, os = 0, oa = 0):
	def isLms(i):
	return i > 0 and t[i] and not t[i-1]

	#print("Stage 1");
	t = bytearray(n)
	t[n - 2] = 0
	t[n - 1] = 1
	for i in range(n-3, -1, -1):
	if s[i+os] < s[i+1+os] or s[i+os] == s[i+1+os] and t[i+1] == 1:
	t[i] = 1
	else:
	t[i] = 0

	#print("Stage 2");
	bkt = [0] * (K+1)
	getBuckets(s, bkt, n, K, True, os)

	for i in range(n):
	SA[i+oa] = -1

	for i in range(1, n):
	if isLms(i):
	bkt[s[i+os]] -= 1
	SA[bkt[s[i+os]]+oa] = i

	#print("Stage 3");
	induceSAl(t, SA, s, bkt, n, K, False, os, oa)

	induceSAs(t, SA, s, bkt, n, K, True, os, oa)

	bkt = None

	n1 = 0
	for i in range(n):
	if isLms(SA[i+oa]):
	SA[n1+oa] = SA[i+oa]
	n1 += 1

	for i in range(n1, n):
	SA[i+oa] = -1

	#print("Stage 4");
	name = 0
	prev = -1
	for i in range(n1):
	pos = SA[i+oa]
	diff = False
	for d in range(n):
	if prev == -1 or s[pos+d+os] != s[prev+d+os] or \
	t[pos+d] != t[prev+d]:
	diff = True
	break
	elif d > 0 and (isLms(pos+d) or isLms(prev+d)):
	break
	if diff:
	name += 1
	prev = pos
	pos = int(pos/2)
	SA[n1 + pos + oa] = name - 1

	j = n - 1
	for i in range(n-1, n1-1, -1):
	if SA[i+oa] >= 0:
	SA[j+oa] = SA[i+oa]
	j -= 1

	s1 = SA1 = SA
	s1off = n - n1
	if name < n1:
	SA_IS(s1, SA1, n1, name-1, s1off+oa, oa)
	else:
	for i in range(n1):
	SA1[s1[i+s1off+oa]+oa] = i

	bkt = [0] * (K+1)

	#print("Stage 5");
	getBuckets(s, bkt, n, K, True, os)

	j = 0
	for i in range(1, n):
	if isLms(i):
	s1[s1off+oa+j] = i
	j += 1

	for i in range(n1):
	SA1[i+oa] = s1[oa+s1off+SA1[oa+i]]

	for i in range(n1, n):
	SA[i+oa] = -1

	for i in range(n1-1, -1, -1):
	j = SA[oa+i]
	SA[oa+i] = -1
	bkt[s[os+j]] -= 1
	SA[bkt[s[os+j]]+oa] = j

	#print("Stage 6");
	induceSAl(t, SA, s, bkt, n, K, False, os, oa)
	induceSAs(t, SA, s, bkt, n, K, True, os, oa)

	# Given string s and suffix array sa, compute the longest common prefix
	# information in O(N) time.
	def calculateLcp(s, sa):
	n = len(sa)
	k = 0
	lcp = [0] * n
	rank = [0] * n

	for i in range(n):
	rank[sa[i]] = i

	i = 0
	while True:
	if i >= n: break

	if rank[i] == n-1:
	k=0
	i += 1
	continue

	j = sa[rank[i]+1]

	while i+k<n and j+k<n and s[i+k] == s[j+k]:
	k += 1

	lcp[rank[i]]=k

	i += 1
	if k: k -= 1

	return lcp

	def remap(a, b):
	# Fast SA construction algorithms assume the sequence is
	# numeric, and has an upper value due to the bucket sort.

	# reserve 0 and 1 for the separator and end character.
	nextName = 2

	# Get the set of all characters used.
	items = set(a)
	items.update(b)

	# map each character to its index in the sorted list
	mapping = {}
	for item in sorted(items):
	mapping[item] = nextName
	nextName += 1

	# create a new string that is numeric.
	newString = []
	newString.extend([mapping[item] for item in a])
	newString.append(1)
	newString.extend([mapping[item] for item in b])
	newString.append(0)

	return newString, nextName - 1

	# Here is the code to initialize the suffix array object
	remapped, K = remap(A, B)
	self.sa = [0] * len(remapped)

	if self.length1 + self.length2 > 2:
	SA_IS(remapped, self.sa, len(self.sa), K)
	#self.sa = sais_native(remapped, len(remapped), K)
	self.lcp = calculateLcp(self.seq, self.sa)

	def show(self):
	for i in range(len(self.sa)):
	self.showPosition(i)

	def showPosition(self, saIndex):
	i = saIndex
	p = self.sa[i]
	if self.sa[i] < self.length1:
	s = "A "
	else:
	s = " B"
	p -= self.length1
	print("{3} {0:8} {1:5} \|{2}".format(p, self.lcp[i], json.dumps(self.seq[self.sa[i]:self.sa[i]+20]), s))

	def longestMatches(self):
	# returns, for every position in B, a tuple with the longest matching
	# position in A and the length of that match.
	result = [None] * self.length2

	# forward pass
	lcp = 0
	aIndex = 0
	for i in range(len(self.sa)):
	if self.sa[i] < self.length1:
	# string in A
	lcp = self.lcp[i]
	aIndex = self.sa[i]
	else:
	# string in B.
	result[self.sa[i] - self.length1] = (aIndex, lcp)
	lcp = min(lcp, self.lcp[i])

	# reverse pass
	lcp = 0
	aIndex = 0
	for i in range(len(self.sa)-1, -1, -1):
	if self.sa[i] < self.length1:
	# string in A
	aIndex = self.sa[i]
	if i > 0:
	lcp = self.lcp[i-1]
	else:
	# string in B.
	lcp = min(lcp, self.lcp[i])
	bIndex = self.sa[i] - self.length1
	oldAIndex, oldLcp = result[bIndex]
	if lcp > oldLcp:
	result[bIndex] = (aIndex, lcp)

	return result

	class SAFindLongest:
	def __init__(self, A, B):
	self.matches = SuffixArray(A, B).longestMatches()

	def findLongest(self, positionInB):
	return self.matches[positionInB]


	class BruteFindLongest:
	def __init__(self, A, B):
	self.A = A
	self.B = B

	def findLongest(self, positionInB):
	a = self.A
	def prefixLength(position, text):
	l = 0
	while(l < len(text) and position+l < len(a) and text[l] == a[position+l]):
	l += 1
	return l

	text = self.B[positionInB:]

	longest = 0
	longestPos = 0
	for p in range(len(self.A)):
	l = prefixLength(p, text)
	if l > longest:
	longest = l
	longestPos = p

	return longestPos, longest

	class TableFindLongest:
	def __init__(self, A, B):
	self.A = A
	self.B = B
	minlen = 4
	self.table = {}
	for i in range(len(A)-minlen+1):
	code = A[i:i+minlen]
	if True or code not in self.table:
	self.table[code] = [i]
	else:
	self.table[code].append(i)

	def findLongest(self, positionInB):
	minlen = 4
	code = self.B[positionInB:positionInB+minlen]
	if code not in self.table:
	return 0, 0

	a = self.A
	lcp = 0
	pos = 0
	def prefixLength(position, text):
	l = 0
	while(l < len(text) and position+l < len(a) and text[l] == a[position+l]):
	l += 1
	return l


	text = self.B[positionInB:]
	for index in self.table[code]:
	l = prefixLength(index, text)
	if l > lcp:
	pos = index
	lcp = l

	return pos, lcp


	def longestPrefix(A, B):
	l = 0
	m = min(len(A), len(B))
	while l < m and A[l] == B[l]:
	l += 1

	return l

	def longestSuffix(A, B):
	l = 0
	la = len(A)
	lb = len(B)
	m = min(la, lb)
	while l < m and A[la-l-1] == B[lb-l-1]:
	l += 1

	return l


	def DiffEncode(a, b, Algorithm):
	start = time.time()
	cmds = []
	j = 0
	insertFrom = -1
	check = bytearray()
	totalCost = 0
	minimumMatch = 6

	def copy(start, length):
	nonlocal totalCost, check
	totalCost += 9
	cmds.append("COPY {0}, {1}".format(start, length))
	check += a[start:start+length]

	def insert(start, length):
	nonlocal totalCost, check
	totalCost += 1+5+length
	cmds.append("INSERT {0}, {1}".format(start, length))
	check += b[start:start+length]

	j = prefix = longestPrefix(a, b)
	if prefix > 0:
	copy(0, prefix)

	if prefix != len(b):
	suffix = min(longestSuffix(a, b), len(b) - prefix)
	limit = len(b) - suffix

	if suffix + prefix != len(b):
	if len(b) - suffix > prefix:
	algorithm = Algorithm(a[prefix:len(a)-suffix], b[prefix:limit])

	while j < limit:
	position, length = algorithm.findLongest(j-prefix)
	position += prefix
	if length < minimumMatch:
	if insertFrom == -1: insertFrom = j
	j += 1
	else:
	if insertFrom >= 0:
	insert(insertFrom, j - insertFrom)
	insertFrom = -1
	copy(position, length)
	j += length

	if insertFrom >= 0:
	insert(insertFrom, j-insertFrom)

	if limit < len(b):
	copy(len(a)-suffix, suffix)

	if check != b:
	print("\n".join(cmds))
	print("ERROR")
	raise(Exception("Error"))

	totalTime = time.time() - start

	return "\n".join(cmds), totalTime, totalCost

	import os
	def performance():
	# Output a CSV file with the N and the time to construct a suffix
	# array of a random string of length N.
	for i in range(1000, 1000000, 1000):
	s = os.urandom(i//2)
	t = os.urandom(i//2)
	start = time.time()
	s = SuffixArray(s, t)
	print("{0},{1}".format(i, time.time()-start))


	if "--performance" in sys.argv:
	performance()
	sys.exit(0)

	Algorithms = {
	"suffixarray": SAFindLongest,
	"bruteforce": BruteFindLongest,
	"table": TableFindLongest
	}

	import argparse
	parser = argparse.ArgumentParser()
	parser.add_argument("file1", type=str, help="First file to compare")
	parser.add_argument("file2", type=str, help="Second file to compare")
	parser.add_argument("--performance", help="Performance test SA construction")
	parser.add_argument("-a", "--algorithm", type=str, help="Algorithm to use",
	choices=Algorithms.keys(), default="suffixarray")
	args = parser.parse_args()

	f1 = open(args.file1, "rb").read()
	f2 = open(args.file2, "rb").read()

	cmds, time, cost = DiffEncode(f1, f2, Algorithms[args.algorithm])
	print("Cmds:" + cmds)
	print("Total time: {0}".format(time))
	print("Instruction cost: {0}".format(cost))