codebynumbers · December 16, 2015 17:39
diff --git a/Substring Serch b/Substring Serch
 # ======================
 # Naive search
 # ======================

 import time
 import random

 input = 'https://www.facebook.com/teefury?hc_location=stream'
 bids = []

 with open('/usr/share/dict/words') as fh:
    for line in fh:
        word = line.strip().lower()
        if len(word) > 3:
            bids.append(word)

 start = time.time()

 for i in xrange(0, len(input)):
    for keyword in bids:
        if input[i:].startswith(keyword):
            print keyword, input[i:]

 print "Elapsed %0.5f" % (time.time() - start)

 # OUTPUT
 facebook facebook.com/teefury?hc_location=stream
 face facebook.com/teefury?hc_location=stream
 book book.com/teefury?hc_location=stream
 fury fury?hc_location=stream
 location location=stream
 cation cation=stream
 stream stream
 ream ream
 Elapsed 2.56454


 # ======================
 # Trie - (memory usage savings)
 # ======================


 import time
 import random
 import marisa_trie

 input = 'https://www.facebook.com/teefury?hc_location=stream'

 bids = []

 with open('/usr/share/dict/words') as fh:
    for line in fh:
        word = line.strip().lower().decode('utf-8')
        if len(word) > 3:
            bids.append(word)

 trie = marisa_trie.Trie(bids)

 start = time.time()

 for i in xrange(0, len(input)):
    # lookahead max 20 characters or until end of string
    for j in xrange(1, min(len(input)-i+1, 20)):
        if unicode("".join(input[i:i+j])) in trie:
            print input[i:i+j]

 print "Elapsed %0.5f" % (time.time() - start)

 ## OUTPUT 
 face
 facebook
 book
 fury
 location
 cation
 stream
 ream
 Elapsed 0.00278


 # ======================
 # Big hash map (memory usage could be an issue)
 # ======================

 import time
 import random

 input = 'https://www.facebook.com/teefury?hc_location=stream'

 bids = {}

 with open('/usr/share/dict/words') as fh:
    for line in fh:
        word = line.strip().lower().decode('utf-8')
        if len(word) > 3:
            bids[word] = True

 start = time.time()

 for i in xrange(0, len(input)):
    for j in xrange(1, min(len(input)-i+1, 20)):
        if bids.get(unicode("".join(input[i:i+j]))):
            print input[i:i+j]

 print "Elapsed %0.5f" % (time.time() - start)

 ## OUTPUT 
 face
 facebook
 book
 fury
 location
 cation
 stream
 ream
 Elapsed 0.00252
	# ======================
	# Naive search
	# ======================

	import time
	import random

	input = 'https://www.facebook.com/teefury?hc_location=stream'
	bids = []

	with open('/usr/share/dict/words') as fh:
	for line in fh:
	word = line.strip().lower()
	if len(word) > 3:
	bids.append(word)

	start = time.time()

	for i in xrange(0, len(input)):
	for keyword in bids:
	if input[i:].startswith(keyword):
	print keyword, input[i:]

	print "Elapsed %0.5f" % (time.time() - start)

	# OUTPUT
	facebook facebook.com/teefury?hc_location=stream
	face facebook.com/teefury?hc_location=stream
	book book.com/teefury?hc_location=stream
	fury fury?hc_location=stream
	location location=stream
	cation cation=stream
	stream stream
	ream ream
	Elapsed 2.56454


	# ======================
	# Trie - (memory usage savings)
	# ======================


	import time
	import random
	import marisa_trie

	input = 'https://www.facebook.com/teefury?hc_location=stream'

	bids = []

	with open('/usr/share/dict/words') as fh:
	for line in fh:
	word = line.strip().lower().decode('utf-8')
	if len(word) > 3:
	bids.append(word)

	trie = marisa_trie.Trie(bids)

	start = time.time()

	for i in xrange(0, len(input)):
	# lookahead max 20 characters or until end of string
	for j in xrange(1, min(len(input)-i+1, 20)):
	if unicode("".join(input[i:i+j])) in trie:
	print input[i:i+j]

	print "Elapsed %0.5f" % (time.time() - start)

	## OUTPUT
	face
	facebook
	book
	fury
	location
	cation
	stream
	ream
	Elapsed 0.00278


	# ======================
	# Big hash map (memory usage could be an issue)
	# ======================

	import time
	import random

	input = 'https://www.facebook.com/teefury?hc_location=stream'

	bids = {}

	with open('/usr/share/dict/words') as fh:
	for line in fh:
	word = line.strip().lower().decode('utf-8')
	if len(word) > 3:
	bids[word] = True

	start = time.time()

	for i in xrange(0, len(input)):
	for j in xrange(1, min(len(input)-i+1, 20)):
	if bids.get(unicode("".join(input[i:i+j]))):
	print input[i:i+j]

	print "Elapsed %0.5f" % (time.time() - start)

	## OUTPUT
	face
	facebook
	book
	fury
	location
	cation
	stream
	ream
	Elapsed 0.00252
No results found