InsiderPhD · March 26, 2018 10:55
diff --git a/InvertedIndex.py b/InvertedIndex.py
 import nltk

 # sample BooleanIR inverted index
 document1 = "I started with my company about six months ago. They hired myself and my boss at the same time but due to her work load they had me helping with her workload and not the job I was recruited and hired for. During this time my boss was always trying to be my friend and have conversations with me so she was well aware I was trying to have a baby. In January they hired a third person and have had me training her how to do all the elements of the job so I could do the job I was hired for. My boss and I had a specific conversation in January when I thought I was pregnant and she asked me the following day if I was. We also had a conversation where I was concerned I was training my replacement and I would have no job security and she assured me I was not."
 document2 = "Fast forward to this week. I am pregnant and told her on Thursday so I would be able to go to Dr. appts etc. she still continued to have me write down all my processes, knowledge etc. Then on Friday, I was called into the office and HR told me they were eliminating my position due to budget cuts and I was being laid off. I am the only one with this position. I could either get two weeks severance or work two weeks, but not both. I couldn’t figure out why I would continue to work and they didn’t explain it thoroughly. Thy handed me paperwork and said if I wanted the money I needed to sign it. They did not explain it, just simply asked if I had questions. I was blindsided, shaking and could barely read it. I knew I had 7 days to revoke so I just signed it so I could leave."

 documents = list()
 documents.append(document1)
 documents.append(document2)
 # tokenise the text
 tokeniser = nltk.tokenize.SpaceTokenizer()
 document1Tokens =  tokeniser.tokenize(document1.replace('.', '').replace(',', ''))
 document2Tokens = tokeniser.tokenize(document2.replace('.', '').replace(',', ''))

 #normalise the token
 document1TokensNormalised = list()
 document2TokensNormalised = list()
 porter = nltk.PorterStemmer()
 for t in document1Tokens:
    document1TokensNormalised.append(porter.stem(t))
 for t in document2Tokens:
    document2TokensNormalised.append(porter.stem(t))

 allTokens = list()
 for t in document1TokensNormalised:
    allTokens.append(t)
 for t in document2TokensNormalised:
    allTokens.append(t)
 singleTokens = list()
 for t in allTokens:
    exists = None
    for t2 in singleTokens:
        if t2 == t: 
            exists = True
    if(exists != True):
        singleTokens.append(t)

 # build the index
 invertedIndex = list()
 for t in singleTokens:
    tokenIndex = list()
    tokenIndex.append(t)
    
    postings = list()
    exists = None
    for t2 in document1TokensNormalised:
        if(t == t2) & (exists != True): 
            postings.append(1)
            exists = True
    exists = None
    for t2 in document2TokensNormalised:
        if(t == t2) and (exists != True): 
            postings.append(2)
            exists = True
    tokenIndex.append(postings)
    invertedIndex.append(tokenIndex)
    
 print(invertedIndex)
	import nltk

	# sample BooleanIR inverted index
	document1 = "I started with my company about six months ago. They hired myself and my boss at the same time but due to her work load they had me helping with her workload and not the job I was recruited and hired for. During this time my boss was always trying to be my friend and have conversations with me so she was well aware I was trying to have a baby. In January they hired a third person and have had me training her how to do all the elements of the job so I could do the job I was hired for. My boss and I had a specific conversation in January when I thought I was pregnant and she asked me the following day if I was. We also had a conversation where I was concerned I was training my replacement and I would have no job security and she assured me I was not."
	document2 = "Fast forward to this week. I am pregnant and told her on Thursday so I would be able to go to Dr. appts etc. she still continued to have me write down all my processes, knowledge etc. Then on Friday, I was called into the office and HR told me they were eliminating my position due to budget cuts and I was being laid off. I am the only one with this position. I could either get two weeks severance or work two weeks, but not both. I couldn’t figure out why I would continue to work and they didn’t explain it thoroughly. Thy handed me paperwork and said if I wanted the money I needed to sign it. They did not explain it, just simply asked if I had questions. I was blindsided, shaking and could barely read it. I knew I had 7 days to revoke so I just signed it so I could leave."

	documents = list()
	documents.append(document1)
	documents.append(document2)
	# tokenise the text
	tokeniser = nltk.tokenize.SpaceTokenizer()
	document1Tokens = tokeniser.tokenize(document1.replace('.', '').replace(',', ''))
	document2Tokens = tokeniser.tokenize(document2.replace('.', '').replace(',', ''))

	#normalise the token
	document1TokensNormalised = list()
	document2TokensNormalised = list()
	porter = nltk.PorterStemmer()
	for t in document1Tokens:
	document1TokensNormalised.append(porter.stem(t))
	for t in document2Tokens:
	document2TokensNormalised.append(porter.stem(t))

	allTokens = list()
	for t in document1TokensNormalised:
	allTokens.append(t)
	for t in document2TokensNormalised:
	allTokens.append(t)
	singleTokens = list()
	for t in allTokens:
	exists = None
	for t2 in singleTokens:
	if t2 == t:
	exists = True
	if(exists != True):
	singleTokens.append(t)

	# build the index
	invertedIndex = list()
	for t in singleTokens:
	tokenIndex = list()
	tokenIndex.append(t)

	postings = list()
	exists = None
	for t2 in document1TokensNormalised:
	if(t == t2) & (exists != True):
	postings.append(1)
	exists = True
	exists = None
	for t2 in document2TokensNormalised:
	if(t == t2) and (exists != True):
	postings.append(2)
	exists = True
	tokenIndex.append(postings)
	invertedIndex.append(tokenIndex)

	print(invertedIndex)