Skip to content

Instantly share code, notes, and snippets.

@jrgavilanes
Created February 11, 2019 21:31
Show Gist options
  • Save jrgavilanes/2db2020c40d64c0c0ef7b9e86a7e98ce to your computer and use it in GitHub Desktop.
Save jrgavilanes/2db2020c40d64c0c0ef7b9e86a7e98ce to your computer and use it in GitHub Desktop.
text index python
superindice = {}
def indexa(id, content, index):
for word in set(content.lower().split()):
if word not in index:
index[word] = [id]
else:
if id not in index[word]:
index[word].append(id)
def desindexa(id, index):
to_delete=[]
for key in index.keys():
if id in index[key]:
index[key].remove(id)
if len(index[key])==0:
to_delete.append(key)
for key in to_delete:
del index[key]
def busca(words, index):
words = list(set(words.lower().split()))
matrix=[]
for w in words:
matrix.append(index[w]) if w in index else matrix.append([])
if (len(matrix)==0):
return []
i=0
result=set(matrix[i])
while i<len(words)-1:
result = result.intersection(set(matrix[i+1]))
i+=1
return list(result)
def main():
# from faker import Faker
# fake = Faker()
# print(fake.paragraph(10))
archivo1 = "Hola! En un lugar de Holanda"
archivo2 = "de cuyo nombre no quiero acordarme como una ola"
indexa("archivo1",archivo1,superindice)
indexa("archivo2",archivo2,superindice)
desindexa("archivo1", superindice)
print(busca("quiero ola", superindice))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment