Skip to content

Instantly share code, notes, and snippets.

@y3nr1ng
Created May 19, 2016 17:49
Show Gist options
  • Save y3nr1ng/75fd0f04abd40f169f5ef01baa0e7b77 to your computer and use it in GitHub Desktop.
Save y3nr1ng/75fd0f04abd40f169f5ef01baa0e7b77 to your computer and use it in GitHub Desktop.
Task 2 - Voting
#!/usr/bin/env python3
import os, sys, argparse, logging
baseline = '/tmp2/b03902036/train-punc.pro'
result = '/tmp2/b03902036/result.cmb'
ground_truth = dict()
with open(baseline, 'r') as in_file :
for line in in_file :
# ignore lines with empty text after the filtering process
sid, content = line.strip().split('\t', maxsplit=1)
sid = int(sid)
try :
emot, txt= content.strip().split('\t', maxsplit=1)
except ValueError :
print('{:d} has value error'.format(sid))
ground_truth[sid] = int(emot)
eval_weight=[1, 0.5, 0.333]
total_score = 0
total_trial = 0
with open(result, 'r') as in_file :
next(in_file)
for line in in_file :
sid, emot = line.strip().split(',', maxsplit=1)
emot_cand = emot.strip().split(' ')
for j, weight in enumerate(eval_weight) :
if int(emot_cand[j]) == ground_truth[int(sid)] :
print('{:d} -> {:s}'.format(ground_truth[int(sid)], str(emot_cand)))
total_score += weight
break
total_trial += 1
print('{:f} / {:f} = {:f}'.format(total_score, total_trial, (total_score/total_trial)))
print('accuracy = {:f}'.format(total_score/total_trial))
#!/usr/bin/env python3
import os, sys, argparse, logging
# search for files
import glob
TOTAL_EMOTICON_TYPES = 40
N_TOP = 3
logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s')
def load_file(file_path, sent_dict, weight=[1, 1, 1], logger=None) :
with open(file_path, 'r') as in_file :
# skip the header
next(in_file)
for line in in_file :
# ignore lines with empty text after the filtering process
sid, emot = line.strip().split(',', maxsplit=1)
emot = [int(x) for x in emot.split()]
sid = int(sid)
if sid not in sent_dict :
sent_dict[sid] = [0 for i in range(1, TOTAL_EMOTICON_TYPES+1)]
for i, e in enumerate(emot) :
if i < len(weight) :
sent_dict[sid][e-1] += weight[i]
else :
break
if logger :
logger.debug('{:d} {:s}'.format(sid, str(sent_dict[sid])))
return sent_dict
def get_args() :
parser = argparse.ArgumentParser(description='Train the classifier using trained doc2vec model.')
parser.add_argument('--verbose', '-v', dest='verbose',
action='count', default=0,
help='control the display level of output logs')
parser.add_argument('--outdir', '-o', dest='out_dir',
default='/tmp2/b03902036',
help='destination directory for the model file')
parser.add_argument('ans_dir', nargs='+',
help='The directory that stores the answer files')
return parser.parse_args()
if __name__ == '__main__' :
# parse the command line arguments
args = get_args()
# get the logger object
logger = logging.getLogger()
# set the log level
if args.verbose >= 2 :
logger.setLevel(logging.DEBUG)
elif args.verbose >= 1 :
logger.setLevel(logging.INFO)
else :
logger.setLevel(logging.WARNING)
if len(args.ans_dir) > 1 :
logger.warning('currently only 1 directory is supported')
args.ans_dir = args.ans_dir[0]
logger.info('scanning in {:s}'.format(args.ans_dir))
file_list = glob.glob(os.path.join(args.ans_dir, '**/*.ans'), recursive=True)
sent_dict = dict()
# traverse all the .ans file
for file_path in file_list:
logger.info('processing "{:s}"...'.format(file_path))
sent_dict = load_file(file_path, sent_dict, logger=logger)
# find the top N emoticons
logger.info('voting in progress')
for key, value in sent_dict.items() :
value_sorted = sorted(range(1, len(value)+1), key=lambda i: value[i-1])[-N_TOP:]
sent_dict[key] = value_sorted[::-1]
logger.debug('{:d} {:s}'.format(key, str(sent_dict[key])))
new_filepath = os.path.join(args.out_dir, 'result.cmb')
with open(new_filepath, 'w') as out_file :
out_file.write('Id,Emoticon\n')
for key, value in sent_dict.items() :
prediction = ' '.join([str(x) for x in value])
out_file.write('{:d},{:s}\n'.format(key, prediction))
logger.info('saved to {:s}'.format(new_filepath))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment