Created
February 8, 2018 02:05
-
-
Save CHBaker/bbf56d01795ef7eee7f27267d4fccf00 to your computer and use it in GitHub Desktop.
indexing function for storing website keywords and URL's
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# skips repeat keywords and URL's | |
index = [] | |
def find(keyword, index): | |
for i, item in enumerate(index): | |
try: | |
r = item.index(keyword) | |
except ValueError: | |
continue | |
yield i, r | |
def add_to_index(index,keyword,url): | |
pos = [match for match in find(keyword, index)] | |
if pos == []: | |
return index.append([keyword, [url]]) | |
try: | |
is_url = index[(pos[0][0])][1].index(url) | |
except ValueError: | |
is_url = False | |
return | |
if not is_url: | |
return index[(pos[0][0])][1].append(url) | |
# 1 | |
add_to_index(index,'cats','http://cats.com') | |
# 2 | |
add_to_index(index,'cats','http://cats.com') | |
# 3 | |
add_to_index(index,'dogs','http://dogs.org') | |
# 4 | |
add_to_index(index,'chewed-gum','http://chewed-gum.org') | |
# RESULTS: | |
# 1 >>> [['cats', ['http://cats.com', 'http://cats.com']] | |
# 2 >>> [['cats', ['http://cats.com', 'http://cats.com']] | |
# 3 >>> [ | |
# ['cats', ['http://cats.com', 'http://cats.com'], | |
# ['dogs', ['http://dogs.com']] | |
# ] | |
# 4 >>> [ | |
# ['cats', ['http://cats.com', 'http://cats.com'], | |
# ['dogs', ['http://dogs.com']], | |
# ['chewed-gum', ['http://chewed-gum.org']] | |
# ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment