Created
November 6, 2015 18:58
-
-
Save ramntry/b572a3073cf994ee4f9c to your computer and use it in GitHub Desktop.
codeforces parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| # Python 2->3 libraries that were renamed. | |
| try: | |
| from urllib2 import urlopen | |
| except: | |
| from urllib.request import urlopen | |
| try: | |
| from HTMLParser import HTMLParser | |
| except: | |
| from html.parser import HTMLParser | |
| # Other libraries. | |
| from sys import argv | |
| from subprocess import call | |
| from functools import partial, wraps | |
| import re | |
| # User modifiable constants: | |
| TEMPLATE='main.cc' | |
| COMPILE_CMD='g++-4.9 -g -Wall -Wextra -pedantic -std=c++11 -O2 -Wshadow -Wformat=2 -Wfloat-equal -Wconversion -Wlogical-op -Wcast-qual -Wcast-align -D_GLIBCXX_DEBUG -D_GLIBCXX_DEBUG_PEDANTIC -fstack-protector -lmcheck -D_FORTIFY_SOURCE=2 $DBG' | |
| SAMPLE_INPUT='input' | |
| SAMPLE_OUTPUT='output' | |
| MY_OUTPUT='my_output' | |
| # Do not modify these! | |
| VERSION='CodeForces Parser v1.4.1: https://github.com/johnathan79717/codeforces-parser' | |
| RED_F='\033[31m' | |
| GREEN_F='\033[32m' | |
| BOLD='\033[1m' | |
| NORM='\033[0m' | |
| TIME_CMD='`which time` -o time.out -f "(%es)"' | |
| TIME_AP='`cat time.out`' | |
| # Problems parser. | |
| class CodeforcesProblemParser(HTMLParser): | |
| def __init__(self, folder): | |
| HTMLParser.__init__(self) | |
| self.folder = folder | |
| self.num_tests = 0 | |
| self.testcase = None | |
| self.start_copy = False | |
| def handle_starttag(self, tag, attrs): | |
| if tag == 'div': | |
| if attrs == [('class', 'input')]: | |
| self.num_tests += 1 | |
| self.testcase = open( | |
| '%s/%s%d' % (self.folder, SAMPLE_INPUT, self.num_tests), 'w') | |
| elif attrs == [('class', 'output')]: | |
| self.testcase = open( | |
| '%s/%s%d' % (self.folder, SAMPLE_OUTPUT, self.num_tests), 'w') | |
| elif tag == 'pre': | |
| if self.testcase != None: | |
| self.start_copy = True | |
| def handle_endtag(self, tag): | |
| if tag == 'br': | |
| if self.start_copy: | |
| self.testcase.write('\n') | |
| self.end_line = True | |
| if tag == 'pre': | |
| if self.start_copy: | |
| if not self.end_line: | |
| self.testcase.write('\n') | |
| self.testcase.close() | |
| self.testcase = None | |
| self.start_copy = False | |
| def handle_entityref(self, name): | |
| if self.start_copy: | |
| self.testcase.write(self.unescape(('&%s;' % name))) | |
| def handle_data(self, data): | |
| if self.start_copy: | |
| self.testcase.write(data) | |
| self.end_line = False | |
| # Contest parser. | |
| class CodeforcesContestParser(HTMLParser): | |
| def __init__(self, contest): | |
| HTMLParser.__init__(self) | |
| self.contest = contest | |
| self.start_contest = False | |
| self.start_problem = False | |
| self.name = '' | |
| self.problems = [] | |
| self.problem_names = [] | |
| def handle_starttag(self, tag, attrs): | |
| if self.name == '' and attrs == [('style', 'color: black'), ('href', '/contest/%s' % (self.contest))]: | |
| self.start_contest = True | |
| elif tag == 'option': | |
| if len(attrs) == 1: | |
| regexp = re.compile(r"u'[A-Z]'") | |
| string = str(attrs[0]) | |
| search = regexp.search(string) | |
| if search is not None: | |
| self.problems.append(search.group(0).split("'")[-2]) | |
| self.start_problem = True | |
| def handle_endtag(self, tag): | |
| if tag == 'a' and self.start_contest: | |
| self.start_contest = False | |
| elif self.start_problem: | |
| self.start_problem = False | |
| def handle_data(self, data): | |
| if self.start_contest: | |
| self.name = data | |
| elif self.start_problem: | |
| self.problem_names.append(data) | |
| # Parses each problem page. | |
| def parse_problem(folder, contest, problem): | |
| url = 'http://codeforces.com/contest/%s/problem/%s' % (contest, problem) | |
| html = urlopen(url).read() | |
| parser = CodeforcesProblemParser(folder) | |
| parser.feed(html.decode('utf-8').encode('utf-8')) # Should fix special chars problems. | |
| return parser.num_tests | |
| # Parses the contest page. | |
| def parse_contest(contest): | |
| url = 'http://codeforces.com/contest/%s' % (contest) | |
| html = urlopen(url).read() | |
| parser = CodeforcesContestParser(contest) | |
| parser.feed(html.decode('utf-8')) | |
| return parser | |
| # Generates the test script. | |
| def generate_test_script(folder, num_tests, problem): | |
| with open(folder + 'test.sh', 'w') as test: | |
| test.write( | |
| ('#!/bin/bash\n' | |
| 'DBG=""\n' | |
| 'while getopts ":d" opt; do\n' | |
| ' case $opt in\n' | |
| ' d)\n' | |
| ' echo "-d was selected; compiling in DEBUG mode!" >&2\n' | |
| ' DBG="-DDEBUG"\n' | |
| ' ;;\n' | |
| ' \?)\n' | |
| ' echo "Invalid option: -$OPTARG" >&2\n' | |
| ' ;;\n' | |
| ' esac\n' | |
| 'done\n' | |
| '\n' | |
| 'if ! '+COMPILE_CMD+' {0}.cc; then\n' | |
| ' exit\n' | |
| 'fi\n' | |
| 'INPUT_NAME='+SAMPLE_INPUT+'\n' | |
| 'OUTPUT_NAME='+SAMPLE_OUTPUT+'\n' | |
| 'MY_NAME='+MY_OUTPUT+'\n').format(problem)) | |
| test.write( | |
| 'for test_file in $INPUT_NAME*\n' | |
| 'do\n' | |
| ' i=$((${{#INPUT_NAME}}))\n' | |
| ' test_case=${{test_file:$i}}\n' | |
| ' rm -R $MY_NAME*\n' | |
| ' if ! {5} ./a.out < $INPUT_NAME$test_case > $MY_NAME$test_case; then\n' | |
| ' echo {1}{4}Sample test \#$test_case: Runtime Error{2} {6}\n' | |
| ' echo ========================================\n' | |
| ' echo Sample Input \#$test_case\n' | |
| ' cat $INPUT_NAME$test_case\n' | |
| ' else\n' | |
| ' if diff --brief $MY_NAME$test_case $OUTPUT_NAME$test_case; then\n' | |
| ' echo {1}{3}Sample test \#$test_case: Accepted{2} {6}\n' | |
| ' else\n' | |
| ' echo {1}{4}Sample test \#$test_case: Wrong Answer{2} {6}\n' | |
| ' echo ========================================\n' | |
| ' echo Sample Input \#$test_case\n' | |
| ' cat $INPUT_NAME$test_case\n' | |
| ' echo ========================================\n' | |
| ' echo Sample Output \#$test_case\n' | |
| ' cat $OUTPUT_NAME$test_case\n' | |
| ' echo ========================================\n' | |
| ' echo My Output \#$test_case\n' | |
| ' cat $MY_NAME$test_case\n' | |
| ' echo ========================================\n' | |
| ' fi\n' | |
| ' fi\n' | |
| 'done\n' | |
| .format(num_tests, BOLD, NORM, GREEN_F, RED_F, TIME_CMD, TIME_AP)) | |
| call(['chmod', '+x', folder + 'test.sh']) | |
| # Main function. | |
| def main(): | |
| print (VERSION) | |
| if(len(argv) < 2): | |
| print('USAGE: ./parse.py 435') | |
| return | |
| contest = argv[1] | |
| # Find contest and problems. | |
| print ('Parsing contest %s, please wait...' % contest) | |
| content = parse_contest(contest) | |
| print (BOLD+GREEN_F+'*** Round name: '+content.name+' ***'+NORM) | |
| print ('Found %d problems!' % (len(content.problems))) | |
| # Find problems and test cases. | |
| for index, problem in enumerate(content.problems): | |
| print ('Downloading Problem %s: %s...' % (problem, content.problem_names[index])) | |
| folder = '%s/%s/' % (contest, problem) | |
| call(['mkdir', '-p', folder]) | |
| call(['cp', '-n', TEMPLATE, '%s/%s/%s.cc' % (contest, problem, problem)]) | |
| num_tests = parse_problem(folder, contest, problem) | |
| print('%d sample test(s) found.' % num_tests) | |
| generate_test_script(folder, num_tests, problem) | |
| print ('========================================') | |
| print ('Use ./test.sh to run sample tests in each directory.') | |
| if __name__ == '__main__': | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment