Created
October 10, 2019 20:28
-
-
Save ronghanghu/ec6efe1e0fb8fee65866912006711de3 to your computer and use it in GitHub Desktop.
An offline evaluation script for TextVQA https://textvqa.org/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import re | |
class EvalAIAnswerProcessor: | |
""" | |
Processes an answer similar to Eval AI | |
copied from | |
https://github.com/facebookresearch/pythia/blob/c46b3b3391275b4181567db80943473a89ab98ab/pythia/tasks/processors.py#L897 | |
""" | |
CONTRACTIONS = { | |
"aint": "ain't", | |
"arent": "aren't", | |
"cant": "can't", | |
"couldve": "could've", | |
"couldnt": "couldn't", | |
"couldn'tve": "couldn't've", | |
"couldnt've": "couldn't've", | |
"didnt": "didn't", | |
"doesnt": "doesn't", | |
"dont": "don't", | |
"hadnt": "hadn't", | |
"hadnt've": "hadn't've", | |
"hadn'tve": "hadn't've", | |
"hasnt": "hasn't", | |
"havent": "haven't", | |
"hed": "he'd", | |
"hed've": "he'd've", | |
"he'dve": "he'd've", | |
"hes": "he's", | |
"howd": "how'd", | |
"howll": "how'll", | |
"hows": "how's", | |
"Id've": "I'd've", | |
"I'dve": "I'd've", | |
"Im": "I'm", | |
"Ive": "I've", | |
"isnt": "isn't", | |
"itd": "it'd", | |
"itd've": "it'd've", | |
"it'dve": "it'd've", | |
"itll": "it'll", | |
"let's": "let's", | |
"maam": "ma'am", | |
"mightnt": "mightn't", | |
"mightnt've": "mightn't've", | |
"mightn'tve": "mightn't've", | |
"mightve": "might've", | |
"mustnt": "mustn't", | |
"mustve": "must've", | |
"neednt": "needn't", | |
"notve": "not've", | |
"oclock": "o'clock", | |
"oughtnt": "oughtn't", | |
"ow's'at": "'ow's'at", | |
"'ows'at": "'ow's'at", | |
"'ow'sat": "'ow's'at", | |
"shant": "shan't", | |
"shed've": "she'd've", | |
"she'dve": "she'd've", | |
"she's": "she's", | |
"shouldve": "should've", | |
"shouldnt": "shouldn't", | |
"shouldnt've": "shouldn't've", | |
"shouldn'tve": "shouldn't've", | |
"somebody'd": "somebodyd", | |
"somebodyd've": "somebody'd've", | |
"somebody'dve": "somebody'd've", | |
"somebodyll": "somebody'll", | |
"somebodys": "somebody's", | |
"someoned": "someone'd", | |
"someoned've": "someone'd've", | |
"someone'dve": "someone'd've", | |
"someonell": "someone'll", | |
"someones": "someone's", | |
"somethingd": "something'd", | |
"somethingd've": "something'd've", | |
"something'dve": "something'd've", | |
"somethingll": "something'll", | |
"thats": "that's", | |
"thered": "there'd", | |
"thered've": "there'd've", | |
"there'dve": "there'd've", | |
"therere": "there're", | |
"theres": "there's", | |
"theyd": "they'd", | |
"theyd've": "they'd've", | |
"they'dve": "they'd've", | |
"theyll": "they'll", | |
"theyre": "they're", | |
"theyve": "they've", | |
"twas": "'twas", | |
"wasnt": "wasn't", | |
"wed've": "we'd've", | |
"we'dve": "we'd've", | |
"weve": "we've", | |
"werent": "weren't", | |
"whatll": "what'll", | |
"whatre": "what're", | |
"whats": "what's", | |
"whatve": "what've", | |
"whens": "when's", | |
"whered": "where'd", | |
"wheres": "where's", | |
"whereve": "where've", | |
"whod": "who'd", | |
"whod've": "who'd've", | |
"who'dve": "who'd've", | |
"wholl": "who'll", | |
"whos": "who's", | |
"whove": "who've", | |
"whyll": "why'll", | |
"whyre": "why're", | |
"whys": "why's", | |
"wont": "won't", | |
"wouldve": "would've", | |
"wouldnt": "wouldn't", | |
"wouldnt've": "wouldn't've", | |
"wouldn'tve": "wouldn't've", | |
"yall": "y'all", | |
"yall'll": "y'all'll", | |
"y'allll": "y'all'll", | |
"yall'd've": "y'all'd've", | |
"y'alld've": "y'all'd've", | |
"y'all'dve": "y'all'd've", | |
"youd": "you'd", | |
"youd've": "you'd've", | |
"you'dve": "you'd've", | |
"youll": "you'll", | |
"youre": "you're", | |
"youve": "you've", | |
} | |
NUMBER_MAP = { | |
"none": "0", | |
"zero": "0", | |
"one": "1", | |
"two": "2", | |
"three": "3", | |
"four": "4", | |
"five": "5", | |
"six": "6", | |
"seven": "7", | |
"eight": "8", | |
"nine": "9", | |
"ten": "10", | |
} | |
ARTICLES = ["a", "an", "the"] | |
PERIOD_STRIP = re.compile("(?!<=\d)(\.)(?!\d)") | |
COMMA_STRIP = re.compile("(?<=\d)(\,)+(?=\d)") | |
PUNCTUATIONS = [ | |
";", | |
r"/", | |
"[", | |
"]", | |
'"', | |
"{", | |
"}", | |
"(", | |
")", | |
"=", | |
"+", | |
"\\", | |
"_", | |
"-", | |
">", | |
"<", | |
"@", | |
"`", | |
",", | |
"?", | |
"!", | |
] | |
def __init__(self, *args, **kwargs): | |
pass | |
def word_tokenize(self, word): | |
word = word.lower() | |
word = word.replace(",", "").replace("?", "").replace("'s", " 's") | |
return word.strip() | |
def process_punctuation(self, in_text): | |
out_text = in_text | |
for p in self.PUNCTUATIONS: | |
if (p + " " in in_text or " " + p in in_text) or ( | |
re.search(self.COMMA_STRIP, in_text) is not None | |
): | |
out_text = out_text.replace(p, "") | |
else: | |
out_text = out_text.replace(p, " ") | |
out_text = self.PERIOD_STRIP.sub("", out_text, re.UNICODE) | |
return out_text | |
def process_digit_article(self, in_text): | |
out_text = [] | |
temp_text = in_text.lower().split() | |
for word in temp_text: | |
word = self.NUMBER_MAP.setdefault(word, word) | |
if word not in self.ARTICLES: | |
out_text.append(word) | |
else: | |
pass | |
for word_id, word in enumerate(out_text): | |
if word in self.CONTRACTIONS: | |
out_text[word_id] = self.CONTRACTIONS[word] | |
out_text = " ".join(out_text) | |
return out_text | |
def __call__(self, item): | |
item = self.word_tokenize(item) | |
item = item.replace("\n", " ").replace("\t", " ").strip() | |
item = self.process_punctuation(item) | |
item = self.process_digit_article(item) | |
return item | |
class TextVQAEvaluator: | |
def __init__(self, dataset_json_file): | |
self.answer_processor = EvalAIAnswerProcessor() | |
with open(dataset_json_file) as f: | |
dataset_json = json.load(f) | |
self.dataset_answer_scores = {} | |
for entry in dataset_json['data']: | |
question_id = entry['question_id'] | |
answer_scores = self._compute_answer_scores( | |
entry['answers'] | |
) | |
self.dataset_answer_scores[question_id] = answer_scores | |
def _compute_answer_scores(self, raw_answers): | |
""" | |
compute the accuracy (soft score) of human answers | |
""" | |
answers = [self.answer_processor(a) for a in raw_answers] | |
gt_answers = list(enumerate(answers)) | |
unique_answers = set(answers) | |
unique_answer_scores = {} | |
for unique_answer in unique_answers: | |
accs = [] | |
for gt_answer in gt_answers: | |
other_answers = [ | |
item for item in gt_answers if item != gt_answer | |
] | |
matching_answers = [ | |
item for item in other_answers if item[1] == unique_answer | |
] | |
acc = min(1, float(len(matching_answers)) / 3) | |
accs.append(acc) | |
unique_answer_scores[unique_answer] = sum(accs) / len(accs) | |
return unique_answer_scores | |
def evaluate_pred_file(self, pred_json_file): | |
""" | |
evaluate a textvqa prediction file | |
""" | |
with open(pred_json_file) as f: | |
pred_json = json.load(f) | |
pred_question_ids = set(entry['question_id'] for entry in pred_json) | |
assert pred_question_ids == self.dataset_answer_scores.keys() | |
pred_scores = [] | |
for entry in pred_json: | |
question_id = entry['question_id'] | |
pred_answer = self.answer_processor(entry['answer']) | |
score = self.dataset_answer_scores[question_id].get( | |
pred_answer, 0. | |
) | |
pred_scores.append(score) | |
accuracy = sum(pred_scores) / len(pred_scores) | |
return accuracy | |
if __name__ == '__main__': | |
""" | |
Usage example: | |
python textvqa_eval.py \ | |
--dataset /your/path/to/TextVQA_0.5_val.json \ | |
--pred /your/path/to/textvqa_lorra_val_pred_2019-10-10T09-35-38.json | |
""" | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--dataset', type=str, required=True) | |
parser.add_argument('--pred', type=str, required=True) | |
args = parser.parse_args() | |
evaluator = TextVQAEvaluator(args.dataset) | |
accuracy = evaluator.evaluate_pred_file(args.pred) | |
print('accuracy: {:.4f}'.format(accuracy)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment