This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
challenge_train_preds = [] | |
challenge_dev_preds = [] | |
challenge_test_preds = [] | |
for i in range(len(df_challenge_train)): | |
if '(1)' in df_challenge_train['only_answers'].iloc[i]: | |
challenge_train_preds.append(random.choice(num_list)) | |
else: | |
if '(E)' in df_challenge_train['only_answers'].iloc[i]: | |
challenge_train_preds.append(random.choice(char_list_2)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
easy_train_preds = [] | |
easy_dev_preds = [] | |
easy_test_preds = [] | |
for i in range(len(df_easy_train)): | |
if '(1)' in df_easy_train['only_answers'].iloc[i]: | |
easy_train_preds.append(random.choice(num_list)) | |
else: | |
if '(E)' in df_easy_train['only_answers'].iloc[i]: | |
easy_train_preds.append(random.choice(char_list_2)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_context(question, options): | |
''' | |
This function will return a context after joining a question | |
and its options separately. | |
For example: question + option_1 | |
question + option_2 | |
question + option_3 | |
question + option_4 | |
''' | |
scores = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_context_for_each_candidate(question,options_list): | |
""" Searches the user query and finds the best matches using elasticsearch.""" | |
#query = input("Enter query: ") | |
"""This function will return a context after joining a question | |
and its options separately. | |
For example: question + option_1 | |
question + option_2 | |
question + option_3 | |
question + option_4""" | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def data_generator(a): | |
''' | |
This function will accept a data point and returns a list of options | |
converts the only_options list of options. | |
Output will be as follows: | |
options = [option_1, option_2, option_3, option_4, option_5] | |
Note: If there's no option E then it will be written as 'None of the above'. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from elasticsearch import Elasticsearch | |
from elasticsearch.helpers import parallel_bulk | |
import pandas as pd | |
import numpy as np | |
import re | |
es = Elasticsearch([{'host':'localhost', 'port':9200}]) | |
corpus2 = open('drive/MyDrive/ARC/ARC_Corpus.txt', 'r', encoding='utf-8') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_answers(question): | |
''' | |
This function will take a value in question column then, will split the string | |
at (A) or (1). After splitting we will take only first part which is the question. | |
''' | |
if '(A)' in question: | |
x = '(A) ' + question.split('(A)')[1] | |
elif '(1)' in question: | |
x = '(1) ' + question.split('(1)')[1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_question(question): | |
''' | |
This function will take a value in question column then, will split the string | |
at (A) or (1). After splitting we will take only first part which is the question. | |
''' | |
if '(A)' in question: | |
x = question.split('(A)')[0] | |
elif '(1)' in question: | |
x = question.split('(1)')[0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Year_graph(type_): | |
print(type_+" dataset Analysis ...............") | |
for i,v in enumerate(["Train","Test","Dev"],start = 1): | |
plt.figure(figsize=(9,6)) | |
examName_dict = dict(eval(v+"_multiple_main_"+ type_ +"[\"year\"].value_counts()")) | |
plt.title(v+"_ExamName vs No_of_qustions_of_particular_catagory("+type_+")") | |
plt.barh([i for i in examName_dict.keys()],[i for i in examName_dict.values()]) | |
plt.xlabel("No_of_qustions_of_particular_catagory") | |
plt.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Exam_graph(type_): | |
print(type_+" dataset Analysis ...............") | |
for i,v in enumerate(["Train","Test","Dev"],start = 1): | |
plt.figure(figsize=(9,6)) | |
examName_dict = dict(eval(v+"_multiple_main_"+ type_ +"[\"examName\"].value_counts()")) | |
plt.title(v+"_ExamName vs No_of_qustions_of_particular_catagory("+type_+")") | |
plt.barh([i for i in examName_dict.keys()],[i for i in examName_dict.values()]) | |
plt.xlabel("No_of_qustions_of_particular_catagory") | |
plt.show() |