Last active
August 29, 2015 14:05
-
-
Save minhoryang/434769cc3b38309d553d to your computer and use it in GitHub Desktop.
(Snippet) Reading File with Progressbar(TQDM), but it's okay to not have.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!env python3 | |
"""TestResult scorer for MusicID (in-house project).""" | |
import mmap | |
import os | |
import re | |
import sys | |
try: | |
from tqdm import tqdm | |
except ImportError: | |
tqdm = lambda *i, **kwargs: i[0] # pylint:disable=invalid-name | |
# pylint: disable=too-few-public-methods | |
class FileReaderWithTQDM(object): | |
"""Iterater for file reading with TQDM library.""" | |
def __init__(self, filename, **kwargs): | |
self.tqdm = tqdm(open(filename, "r"), | |
total=self.get_line_number(filename), **kwargs) | |
def __iter__(self): | |
return self | |
@staticmethod | |
def get_line_number(this): | |
"""Fast line number getter.""" | |
# from http://stackoverflow.com/a/850962/1877942 | |
fp = open(this, "r+") | |
buf = mmap.mmap(fp.fileno(), 0) | |
lines = 0 | |
while buf.readline(): | |
lines += 1 | |
return lines | |
def __next__(self): | |
"""Need to Implement with self.tqdm.__next__().""" | |
raise NotImplementedError | |
class MappingTableLoader(FileReaderWithTQDM): | |
"""MusicID Mapping Table Format.""" | |
REGEX = re.compile(r"\"(.*)\" \"(.*)\"") | |
def __next__(self): | |
cur = self.REGEX.findall(self.tqdm.__next__()) | |
try: | |
return cur[0] | |
except IndexError: | |
return | |
class ResultLoader(FileReaderWithTQDM): | |
"""MusicID TestResult Format.""" | |
REGEX = {'findTest': re.compile("Seek Time = .*"), | |
'getTestRank': re.compile("[0-9]+\t[0-9]+\t(.*)")} | |
def __next__(self): | |
for line in self.tqdm: # spend trash data. | |
if len(self.REGEX['findTest'].findall(line)): | |
break # found! | |
data = self.tqdm.__next__().strip() # use next line. | |
results = [] | |
for line in self.tqdm: | |
query = self.REGEX['getTestRank'].findall(line) | |
if len(query): | |
results.extend(query) | |
else: | |
break | |
return {'Data': data, 'Results': results} | |
def musicid_results_scorer(files): | |
"""Load tables, and Score results while reading.""" | |
train_table = {key: value for key, value in MappingTableLoader( | |
files[0], desc='TrainData', leave=True)} | |
test_table = {key: value for key, value in MappingTableLoader( | |
files[1], desc='TestData ', leave=True)} | |
scores = [0 for _ in range(0, 11)] | |
for test in ResultLoader(files[2], desc='Ranking ', leave=True): | |
target = os.path.basename(test_table[test['Data'] + '.pcm']) | |
score = 0 | |
for cur in test['Results']: | |
if target == os.path.basename(train_table[cur + '.pcm']): | |
break | |
else: | |
score += 1 | |
scores[score] += 1 | |
total = sum(scores) | |
# XXX([email protected]) : Chunghee asked to print like these. | |
print('Scores:') | |
for i in range(0, 10): | |
print('\tHit %sth - %s' % (i+1, scores[i])) | |
print('\tNotFound - %s' % (scores[10],)) | |
print("Top 1: %s" % (scores[0] / total,)) | |
print("Top 5: %s" % (sum(scores[:5]) / total,)) | |
print("Top 10: %s" % (sum(scores[:10]) / total,)) | |
if __name__ == "__main__": | |
if len(sys.argv) != 4: | |
print("%s $TrainMappingTable $TestMappingTable $result" % ( | |
sys.argv[0],)) | |
else: | |
musicid_results_scorer(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment