mfbx9da4 · March 17, 2018 19:27 · mfbx9da4 · Mar 16, 2018
diff --git a/longest_common_prefix.py b/longest_common_prefix.py
 from timer import Timer
 import matplotlib.pyplot as plt

 class Solution(object):
  def longestCommonPrefix(self, strings):
    if not len(strings):
      return 0
    if len(strings) == 1:
      return len(strings[0])

    smallest_i = 0
    smallest = strings[smallest_i]
    for i, string in enumerate(strings):
      if len(string) < len(smallest):
        smallest_i = i
        smallest = string

    lo = 0
    hi = len(smallest) - 1
    found_prefix = False

    while lo < hi:
      mid = ((hi - lo + 1) // 2) + lo

      broke = False
      for string in strings:
        if string[:mid] != smallest[:mid]:
          hi = mid - 1
          broke = True
          break

      if not broke:
        found_prefix = True
        lo = mid

    # print('prefix', str2[:lo])
    # return str2[:lo]
    return lo

  def longestCommonPrefix2(self, strings):
    smallest = strings[0]
    smallest_i = 0
    for i in range(1, len(strings)):
      string = strings[i]
      if len(string) < len(smallest):
        smallest_i = i
        smallest = string

    for s in range(len(smallest)):
      for i, string in enumerate(strings):
        if i != smallest_i and string[s] != smallest[s]:
          return s

  def longestCommonPrefix3(self, strings):
    strings_as_bytes = [bytes(string, 'utf-8') for string in strings]
    smallest = strings_as_bytes[0]
    smallest_i = 0
    for i in range(1, len(strings_as_bytes)):
      string = strings_as_bytes[i]
      if len(string) < len(smallest):
        smallest_i = i
        smallest = string

    for s in range(len(smallest)):
      for i, string in enumerate(strings_as_bytes):
        if i != smallest_i and string[s] != smallest[s]:
          return s

 solution = Solution()

 string1 = 'aaaabbbbbbbbbbbbbbbbbbbbbbbb'
 string2 = 'aaaacccccccccccccccccccccccc'
 with Timer('short prefix v1') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix([string1, string2]) == 4
 with Timer('short prefix v2') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix2([string1, string2]) == 4
 with Timer('short prefix v3') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix3([string1, string2]) == 4

 string1 = 'aaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbb'
 string2 = 'aaaaaaaaaaaaaaaaaaaaaacccccccccccccccccccccccc'
 with Timer('medium prefix v1') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix([string1, string2]) == 22
 with Timer('medium prefix v2') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix2([string1, string2]) == 22
 with Timer('medium prefix v3') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix3([string1, string2]) == 22


 string1 = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbb'
 string2 = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacccccccccccccccccccccccc'
 with Timer('long prefix v1') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix([string1, string2]) == 224
 with Timer('long prefix v2') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix2([string1, string2]) == 224
 with Timer('long prefix v3') as t:
  for i in range(10000):
    assert solution.longestCommonPrefix3([string1, string2]) == 224

 v1 = []
 v2 = []
 v3 = []
 for prefix_length in range(1000):
  if prefix_length % 100 == 0:
    print(prefix_length)
  string1 = ('a' * prefix_length) + 'bbbbbbbbbbbbbbbbbbbbbbbb'
  string2 = ('a' * prefix_length) + 'cccccccccccccccccccccccc'
  with Timer(print_message=False) as t:
    for i in range(10):
      assert solution.longestCommonPrefix([string1, string2]) == prefix_length
  v1.append(t.delta)
  with Timer(print_message=False) as t:
    for i in range(10):
      assert solution.longestCommonPrefix2([string1, string2]) == prefix_length
  v2.append(t.delta)
  with Timer(print_message=False) as t:
    for i in range(10):
      assert solution.longestCommonPrefix3([string1, string2]) == prefix_length
  v3.append(t.delta)

 plt.plot(v1, label='binary search, native string comparison')
 plt.plot(v2, label='compare characters')
 plt.plot(v3, label='compare bytes')
 plt.ylabel('time (s)')
 plt.xlabel('prefix_length')
 plt.legend()
 plt.show()
	from timer import Timer
	import matplotlib.pyplot as plt

	class Solution(object):
	def longestCommonPrefix(self, strings):
	if not len(strings):
	return 0
	if len(strings) == 1:
	return len(strings[0])

	smallest_i = 0
	smallest = strings[smallest_i]
	for i, string in enumerate(strings):
	if len(string) < len(smallest):
	smallest_i = i
	smallest = string

	lo = 0
	hi = len(smallest) - 1
	found_prefix = False

	while lo < hi:
	mid = ((hi - lo + 1) // 2) + lo

	broke = False
	for string in strings:
	if string[:mid] != smallest[:mid]:
	hi = mid - 1
	broke = True
	break

	if not broke:
	found_prefix = True
	lo = mid

	# print('prefix', str2[:lo])
	# return str2[:lo]
	return lo

	def longestCommonPrefix2(self, strings):
	smallest = strings[0]
	smallest_i = 0
	for i in range(1, len(strings)):
	string = strings[i]
	if len(string) < len(smallest):
	smallest_i = i
	smallest = string

	for s in range(len(smallest)):
	for i, string in enumerate(strings):
	if i != smallest_i and string[s] != smallest[s]:
	return s

	def longestCommonPrefix3(self, strings):
	strings_as_bytes = [bytes(string, 'utf-8') for string in strings]
	smallest = strings_as_bytes[0]
	smallest_i = 0
	for i in range(1, len(strings_as_bytes)):
	string = strings_as_bytes[i]
	if len(string) < len(smallest):
	smallest_i = i
	smallest = string

	for s in range(len(smallest)):
	for i, string in enumerate(strings_as_bytes):
	if i != smallest_i and string[s] != smallest[s]:
	return s

	solution = Solution()

	string1 = 'aaaabbbbbbbbbbbbbbbbbbbbbbbb'
	string2 = 'aaaacccccccccccccccccccccccc'
	with Timer('short prefix v1') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix([string1, string2]) == 4
	with Timer('short prefix v2') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix2([string1, string2]) == 4
	with Timer('short prefix v3') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix3([string1, string2]) == 4

	string1 = 'aaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbb'
	string2 = 'aaaaaaaaaaaaaaaaaaaaaacccccccccccccccccccccccc'
	with Timer('medium prefix v1') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix([string1, string2]) == 22
	with Timer('medium prefix v2') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix2([string1, string2]) == 22
	with Timer('medium prefix v3') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix3([string1, string2]) == 22


	string1 = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbb'
	string2 = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacccccccccccccccccccccccc'
	with Timer('long prefix v1') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix([string1, string2]) == 224
	with Timer('long prefix v2') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix2([string1, string2]) == 224
	with Timer('long prefix v3') as t:
	for i in range(10000):
	assert solution.longestCommonPrefix3([string1, string2]) == 224

	v1 = []
	v2 = []
	v3 = []
	for prefix_length in range(1000):
	if prefix_length % 100 == 0:
	print(prefix_length)
	string1 = ('a' * prefix_length) + 'bbbbbbbbbbbbbbbbbbbbbbbb'
	string2 = ('a' * prefix_length) + 'cccccccccccccccccccccccc'
	with Timer(print_message=False) as t:
	for i in range(10):
	assert solution.longestCommonPrefix([string1, string2]) == prefix_length
	v1.append(t.delta)
	with Timer(print_message=False) as t:
	for i in range(10):
	assert solution.longestCommonPrefix2([string1, string2]) == prefix_length
	v2.append(t.delta)
	with Timer(print_message=False) as t:
	for i in range(10):
	assert solution.longestCommonPrefix3([string1, string2]) == prefix_length
	v3.append(t.delta)

	plt.plot(v1, label='binary search, native string comparison')
	plt.plot(v2, label='compare characters')
	plt.plot(v3, label='compare bytes')
	plt.ylabel('time (s)')
	plt.xlabel('prefix_length')
	plt.legend()
	plt.show()