Skip to content

Instantly share code, notes, and snippets.

challenge_train_preds = []
challenge_dev_preds = []
challenge_test_preds = []
for i in range(len(df_challenge_train)):
if '(1)' in df_challenge_train['only_answers'].iloc[i]:
challenge_train_preds.append(random.choice(num_list))
else:
if '(E)' in df_challenge_train['only_answers'].iloc[i]:
challenge_train_preds.append(random.choice(char_list_2))
easy_train_preds = []
easy_dev_preds = []
easy_test_preds = []
for i in range(len(df_easy_train)):
if '(1)' in df_easy_train['only_answers'].iloc[i]:
easy_train_preds.append(random.choice(num_list))
else:
if '(E)' in df_easy_train['only_answers'].iloc[i]:
easy_train_preds.append(random.choice(char_list_2))
def get_context(question, options):
'''
This function will return a context after joining a question
and its options separately.
For example: question + option_1
question + option_2
question + option_3
question + option_4
'''
scores = []
def get_context_for_each_candidate(question,options_list):
""" Searches the user query and finds the best matches using elasticsearch."""
#query = input("Enter query: ")
"""This function will return a context after joining a question
and its options separately.
For example: question + option_1
question + option_2
question + option_3
question + option_4"""
def data_generator(a):
'''
This function will accept a data point and returns a list of options
converts the only_options list of options.
Output will be as follows:
options = [option_1, option_2, option_3, option_4, option_5]
Note: If there's no option E then it will be written as 'None of the above'.
from elasticsearch import Elasticsearch
from elasticsearch.helpers import parallel_bulk
import pandas as pd
import numpy as np
import re
es = Elasticsearch([{'host':'localhost', 'port':9200}])
corpus2 = open('drive/MyDrive/ARC/ARC_Corpus.txt', 'r', encoding='utf-8')
def extract_answers(question):
'''
This function will take a value in question column then, will split the string
at (A) or (1). After splitting we will take only first part which is the question.
'''
if '(A)' in question:
x = '(A) ' + question.split('(A)')[1]
elif '(1)' in question:
x = '(1) ' + question.split('(1)')[1]
def extract_question(question):
'''
This function will take a value in question column then, will split the string
at (A) or (1). After splitting we will take only first part which is the question.
'''
if '(A)' in question:
x = question.split('(A)')[0]
elif '(1)' in question:
x = question.split('(1)')[0]
def Year_graph(type_):
print(type_+" dataset Analysis ...............")
for i,v in enumerate(["Train","Test","Dev"],start = 1):
plt.figure(figsize=(9,6))
examName_dict = dict(eval(v+"_multiple_main_"+ type_ +"[\"year\"].value_counts()"))
plt.title(v+"_ExamName vs No_of_qustions_of_particular_catagory("+type_+")")
plt.barh([i for i in examName_dict.keys()],[i for i in examName_dict.values()])
plt.xlabel("No_of_qustions_of_particular_catagory")
plt.show()
def Exam_graph(type_):
print(type_+" dataset Analysis ...............")
for i,v in enumerate(["Train","Test","Dev"],start = 1):
plt.figure(figsize=(9,6))
examName_dict = dict(eval(v+"_multiple_main_"+ type_ +"[\"examName\"].value_counts()"))
plt.title(v+"_ExamName vs No_of_qustions_of_particular_catagory("+type_+")")
plt.barh([i for i in examName_dict.keys()],[i for i in examName_dict.values()])
plt.xlabel("No_of_qustions_of_particular_catagory")
plt.show()