Harshdeep Singh HarshSingh16

Data Science Professional. Add me here : https://www.linkedin.com/in/harshsingh12/

HarshSingh16 / Importing_Libraries.py

Created February 7, 2019 08:58

HarshSingh16 / Load_Data.py

Created February 7, 2019 09:06

	#Loading the datasets
	lines=open("movie_lines.txt",encoding="utf-8",errors="ignore").read().split("\n")
	conversations=open("movie_conversations.txt",encoding="utf-8",errors="ignore").read().split("\n")

HarshSingh16 / id2linedict.py

Created February 7, 2019 18:05

HarshSingh16 / conversations_list.py

Last active February 7, 2019 18:39

	#Creating a LIST for conversations
	coversation_ids=[]
	for conversation in conversations[:-1]:
	_conversation=conversation.split(" +++$+++ ")[-1][1:-1].replace("'","").replace(" ","")
	coversation_ids.append(_conversation.split(","))

HarshSingh16 / QuestionsAnswers.py

Created February 8, 2019 06:18

	##Mapping Questions and Answers
	Questions=[]
	Answers=[]
	for conversation in conversation_ids:
	for i in range(len(conversation)-1):
	Questions.append(id2line[conversation[i]])
	Answers.append(id2line[conversation[i+1]])

HarshSingh16 / Clean_text.py

Created February 8, 2019 06:52

	##Cleaning the text
	def clean_text(text):
	text=text.lower()
	text=re.sub(r"he's","he is", text)
	text=re.sub(r"she's","she is",text)
	text=re.sub(r"i'm","i am",text)
	text=re.sub(r"that's","that is",text)
	text=re.sub(r"what's","what is",text)
	text=re.sub(r"where's","where is",text)
	text=re.sub(r"\'ll"," will",text)

HarshSingh16 / Clean_Qn_Ans.py

Created February 8, 2019 07:08

	#Cleaning Questions
	Clean_questions=[]
	for question in Questions:
	_question1=clean_text(question)
	Clean_questions.append(_question1)

	#Cleaning Answers
	Clean_answers=[]
	for answer in Answers:
	_answer1=clean_text(answer)

HarshSingh16 / WordCountDict.py

Created February 8, 2019 07:46

	#Creating a dictionary that maps each word to its occurences
	#For questions
	word2count={}
	for sentence in Clean_questions:
	for word in sentence.split():
	if word not in word2count:
	word2count[word]=1
	else:
	word2count[word]+=1
	#For answers

HarshSingh16 / gist:8b5c598e4c006c45ce1e5b562a3d0b23

Created February 8, 2019 08:31


	#Creating a dictionary that maps each word to a unique integer and also checking if the frequency threshold is met

	threshold=20
	questionswordstoint={}
	wordnumber=0

	for word,count in word2count.items():

	if count>=threshold:

HarshSingh16 / word2integer.py

Created February 8, 2019 18:37

	#SETTING A THRESHOLD AND MAPPING EACH WORD TO A UNIQUE INTEGER

	threshold=20
	word_number=0
	dict_integer={}
	for word,frequency in word2count.items():
	if frequency>20:
	dict_integer[word]=word_number
	word_number+=1