y3nr1ng · May 19, 2016 17:49
diff --git a/cmb-test.py b/cmb-test.py
 #!/usr/bin/env python3

 import os, sys, argparse, logging

 baseline = '/tmp2/b03902036/train-punc.pro'
 result = '/tmp2/b03902036/result.cmb'

 ground_truth = dict()
 with open(baseline, 'r') as in_file :
    for line in in_file :
        # ignore lines with empty text after the filtering process
        sid, content = line.strip().split('\t', maxsplit=1)
        sid = int(sid)
        try :
            emot, txt= content.strip().split('\t', maxsplit=1)
        except ValueError :
            print('{:d} has value error'.format(sid))
        ground_truth[sid] = int(emot)

 eval_weight=[1, 0.5, 0.333]

 total_score = 0
 total_trial = 0

 with open(result, 'r') as in_file :
    next(in_file)
    for line in in_file :
        sid, emot = line.strip().split(',', maxsplit=1)
        emot_cand = emot.strip().split(' ')
        for j, weight in enumerate(eval_weight) :
            if int(emot_cand[j]) == ground_truth[int(sid)] :
                print('{:d} -> {:s}'.format(ground_truth[int(sid)], str(emot_cand)))
                total_score += weight
                break
        total_trial += 1
        print('{:f} / {:f} = {:f}'.format(total_score, total_trial, (total_score/total_trial)))

 print('accuracy = {:f}'.format(total_score/total_trial))
diff --git a/vote b/vote
 #!/usr/bin/env python3

 import os, sys, argparse, logging
 # search for files
 import glob

 TOTAL_EMOTICON_TYPES = 40
 N_TOP = 3

 logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s')

 def load_file(file_path, sent_dict, weight=[1, 1, 1], logger=None) :
    with open(file_path, 'r') as in_file :
        # skip the header
        next(in_file)

        for line in in_file :
            # ignore lines with empty text after the filtering process
            sid, emot = line.strip().split(',', maxsplit=1)
            emot = [int(x) for x in emot.split()]

            sid = int(sid)
            if sid not in sent_dict :
                sent_dict[sid] = [0 for i in range(1, TOTAL_EMOTICON_TYPES+1)]

            for i, e in enumerate(emot) :
                if i < len(weight) :
                    sent_dict[sid][e-1] += weight[i]
                else :
                    break

            if logger :
                logger.debug('{:d} {:s}'.format(sid, str(sent_dict[sid])))

    return sent_dict

 def get_args() :
    parser = argparse.ArgumentParser(description='Train the classifier using trained doc2vec model.')
    parser.add_argument('--verbose', '-v', dest='verbose',
                        action='count', default=0,
                        help='control the display level of output logs')
    parser.add_argument('--outdir', '-o', dest='out_dir',
                        default='/tmp2/b03902036',
                        help='destination directory for the model file')
    parser.add_argument('ans_dir', nargs='+',
                        help='The directory that stores the answer files')

    return parser.parse_args()

 if __name__ == '__main__' :
    # parse the command line arguments
    args = get_args()
    # get the logger object
    logger = logging.getLogger()
    # set the log level
    if args.verbose >= 2 :
        logger.setLevel(logging.DEBUG)
    elif args.verbose >= 1 :
        logger.setLevel(logging.INFO)
    else :
        logger.setLevel(logging.WARNING)

    if len(args.ans_dir) > 1 :
        logger.warning('currently only 1 directory is supported')
    args.ans_dir = args.ans_dir[0]
    logger.info('scanning in {:s}'.format(args.ans_dir))

    file_list = glob.glob(os.path.join(args.ans_dir, '**/*.ans'), recursive=True)

    sent_dict = dict()
    # traverse all the .ans file
    for file_path in file_list:
        logger.info('processing "{:s}"...'.format(file_path))
        sent_dict = load_file(file_path, sent_dict, logger=logger)

    # find the top N emoticons
    logger.info('voting in progress')
    for key, value in sent_dict.items() :
        value_sorted = sorted(range(1, len(value)+1), key=lambda i: value[i-1])[-N_TOP:]
        sent_dict[key] = value_sorted[::-1]

        logger.debug('{:d} {:s}'.format(key, str(sent_dict[key])))

    new_filepath = os.path.join(args.out_dir, 'result.cmb')
    with open(new_filepath, 'w') as out_file :
        out_file.write('Id,Emoticon\n')
        for key, value in sent_dict.items() :
            prediction = ' '.join([str(x) for x in value])
            out_file.write('{:d},{:s}\n'.format(key, prediction))

    logger.info('saved to {:s}'.format(new_filepath))
	#!/usr/bin/env python3

	import os, sys, argparse, logging

	baseline = '/tmp2/b03902036/train-punc.pro'
	result = '/tmp2/b03902036/result.cmb'

	ground_truth = dict()
	with open(baseline, 'r') as in_file :
	for line in in_file :
	# ignore lines with empty text after the filtering process
	sid, content = line.strip().split('\t', maxsplit=1)
	sid = int(sid)
	try :
	emot, txt= content.strip().split('\t', maxsplit=1)
	except ValueError :
	print('{:d} has value error'.format(sid))
	ground_truth[sid] = int(emot)

	eval_weight=[1, 0.5, 0.333]

	total_score = 0
	total_trial = 0

	with open(result, 'r') as in_file :
	next(in_file)
	for line in in_file :
	sid, emot = line.strip().split(',', maxsplit=1)
	emot_cand = emot.strip().split(' ')
	for j, weight in enumerate(eval_weight) :
	if int(emot_cand[j]) == ground_truth[int(sid)] :
	print('{:d} -> {:s}'.format(ground_truth[int(sid)], str(emot_cand)))
	total_score += weight
	break
	total_trial += 1
	print('{:f} / {:f} = {:f}'.format(total_score, total_trial, (total_score/total_trial)))

	print('accuracy = {:f}'.format(total_score/total_trial))
	#!/usr/bin/env python3

	import os, sys, argparse, logging
	# search for files
	import glob

	TOTAL_EMOTICON_TYPES = 40
	N_TOP = 3

	logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s')

	def load_file(file_path, sent_dict, weight=[1, 1, 1], logger=None) :
	with open(file_path, 'r') as in_file :
	# skip the header
	next(in_file)

	for line in in_file :
	# ignore lines with empty text after the filtering process
	sid, emot = line.strip().split(',', maxsplit=1)
	emot = [int(x) for x in emot.split()]

	sid = int(sid)
	if sid not in sent_dict :
	sent_dict[sid] = [0 for i in range(1, TOTAL_EMOTICON_TYPES+1)]

	for i, e in enumerate(emot) :
	if i < len(weight) :
	sent_dict[sid][e-1] += weight[i]
	else :
	break

	if logger :
	logger.debug('{:d} {:s}'.format(sid, str(sent_dict[sid])))

	return sent_dict

	def get_args() :
	parser = argparse.ArgumentParser(description='Train the classifier using trained doc2vec model.')
	parser.add_argument('--verbose', '-v', dest='verbose',
	action='count', default=0,
	help='control the display level of output logs')
	parser.add_argument('--outdir', '-o', dest='out_dir',
	default='/tmp2/b03902036',
	help='destination directory for the model file')
	parser.add_argument('ans_dir', nargs='+',
	help='The directory that stores the answer files')

	return parser.parse_args()

	if __name__ == '__main__' :
	# parse the command line arguments
	args = get_args()
	# get the logger object
	logger = logging.getLogger()
	# set the log level
	if args.verbose >= 2 :
	logger.setLevel(logging.DEBUG)
	elif args.verbose >= 1 :
	logger.setLevel(logging.INFO)
	else :
	logger.setLevel(logging.WARNING)

	if len(args.ans_dir) > 1 :
	logger.warning('currently only 1 directory is supported')
	args.ans_dir = args.ans_dir[0]
	logger.info('scanning in {:s}'.format(args.ans_dir))

	file_list = glob.glob(os.path.join(args.ans_dir, '*/.ans'), recursive=True)

	sent_dict = dict()
	# traverse all the .ans file
	for file_path in file_list:
	logger.info('processing "{:s}"...'.format(file_path))
	sent_dict = load_file(file_path, sent_dict, logger=logger)

	# find the top N emoticons
	logger.info('voting in progress')
	for key, value in sent_dict.items() :
	value_sorted = sorted(range(1, len(value)+1), key=lambda i: value[i-1])[-N_TOP:]
	sent_dict[key] = value_sorted[::-1]

	logger.debug('{:d} {:s}'.format(key, str(sent_dict[key])))

	new_filepath = os.path.join(args.out_dir, 'result.cmb')
	with open(new_filepath, 'w') as out_file :
	out_file.write('Id,Emoticon\n')
	for key, value in sent_dict.items() :
	prediction = ' '.join([str(x) for x in value])
	out_file.write('{:d},{:s}\n'.format(key, prediction))

	logger.info('saved to {:s}'.format(new_filepath))