Created
August 8, 2010 14:06
-
-
Save cou929/514061 to your computer and use it in GitHub Desktop.
Fetch TopCoder problem statement, test cases and expected result of system test. And save these data to file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
''' | |
tcget.py | |
Kosei Moriyama <[email protected]> | |
''' | |
import BeautifulSoup | |
import html2text | |
import urllib, urllib2, cookielib, re, os, sys | |
from optparse import OptionParser | |
tc_user_name = 'write your username of topcoder account here, or input it via command-line' | |
tc_password = 'write your password of topcoder account here, or input it via command-line' | |
prefix_search_from = 'http://www.topcoder.com/tc?module=ProblemArchive&class=' | |
prefix_problem_statement = 'http://www.topcoder.com/stat?c=problem_statement&pm=' | |
url_round_result_page = 'http://www.topcoder.com/tc?module=ProblemDetail&rd=<rd>&pm=<pm>' | |
url_tc_secure = 'https://www.topcoder.com/tc' | |
url_problem_solution = 'http://www.topcoder.com/stat?c=problem_solution&cr=<cr>&rd=<rd>&pm=<pm>' | |
opener = '' | |
def connection(user_name, password): | |
url = 'http://www.topcoder.com/' | |
cj = cookielib.CookieJar() | |
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) | |
opener.addheaders = [('User-agent', 'Mozilla/4.0 (compatible MSIE 6.0 Windows NT 5.1)')] | |
params = {} | |
params['module'] = 'Login' | |
params['nextpage'] = url | |
params['username'] = user_name | |
params['password'] = password | |
param = urllib.urlencode(params) | |
res = opener.open(url_tc_secure, param) | |
res_str = str(res.read()) | |
if res_str.find('Username or password incorrect') != -1: | |
opener = False | |
return opener | |
def searchProblem(search_query): | |
res = opener.open(prefix_search_from + search_query) | |
return res.read() | |
def getProblemAndRoundId(html): | |
# <A HREF="/tc?module=ProblemDetail&rd=10767&pm=7968" class="statText"> | |
return map(lambda x: {'round_id': x[0], 'problem_id': x[1]}, | |
re.findall('"/tc\?module=ProblemDetail&rd=([0-9]+?)&pm=([0-9]+?)"\sclass="statText"', html, re.I)) | |
def getProblemParameters(search_query): | |
return getProblemAndRoundId(searchProblem(search_query)) | |
def getProblemStatementPage(problem_id): | |
#return ''.join(urllib2.urlopen(prefix_problem_statement + problem_id).readlines()) | |
return opener.open(prefix_problem_statement + problem_id).read() | |
def getRoundResultPage(problem_id, round_id): | |
url = url_round_result_page.replace('<pm>', problem_id).replace('<rd>', round_id) | |
res = opener.open(url) | |
return res.read() | |
def getTopSubmissionId(html): | |
# <a href="/stat?c=problem_solution&cr=14970299&rd=14156&pm=10880" class="statText">view</a> | |
return re.findall('href="/stat\?c=problem_solution&(?:amp;)*cr=([0-9]+?)&(?:amp;)*rd=[0-9]+?&(?:amp;)*pm=[0-9]+?" class="statText"', html, re.I) | |
def getTopSubmissionPage(problem_id, round_id, top_submission_id): | |
url = url_problem_solution.replace('<pm>', problem_id).replace('<rd>', round_id).replace('<cr>', top_submission_id) | |
res = opener.open(url) | |
return res.read() | |
def getPages(search_query): | |
ret = {} | |
params = getProblemParameters(search_query) | |
if not params: | |
return False | |
top_html = getRoundResultPage(params[0]['problem_id'], params[0]['round_id']) | |
top_ids = getTopSubmissionId(top_html) | |
ret['problem_statement_page'] = getProblemStatementPage(params[0]['problem_id']) | |
ret['top_submission_page'] = getTopSubmissionPage(params[0]['problem_id'], params[0]['round_id'], top_ids[0]) | |
return ret | |
def getProblemStatement(html): | |
soup = BeautifulSoup.BeautifulSoup(html) | |
title = soup.find('td', {'class': 'statTextBig'}).contents[0] | |
problem_statement = soup.find('td', {'class': 'problemText'}).table | |
return {'title': str(title)[28:], 'problem_statement': html2text.html2text(str(problem_statement))} | |
def getSystemTestData(html): | |
soup = BeautifulSoup.BeautifulSoup(html) | |
input = [] | |
output = [] | |
lines = soup.findAll('tr', {'valign': 'top'}) | |
for line in lines: | |
input.append(line.contents[3].contents[0].replace(',', '').replace('{', '').replace('}', '').replace('"', '')) | |
output.append(line.contents[7].contents[0].replace(',', '').replace('{', '').replace('}', '').replace('"', '')) | |
return {'system_input': input, 'system_output': output} | |
def fetchProblemData(query): | |
res = {} | |
p = getPages(query) | |
if not p: | |
return False | |
tmp = getProblemStatement(p['problem_statement_page']) | |
for k, v in tmp.iteritems(): | |
res[k] = v | |
tmp = getSystemTestData(p['top_submission_page']) | |
for k, v in tmp.iteritems(): | |
res[k] = v | |
return res | |
def saveFiles(data): | |
if os.path.isdir(data['title']): | |
print 'The directory', data['title'], 'already exists.' | |
return | |
os.mkdir(data['title']) | |
f = open(data['title'] + '/problem_statement.txt', 'w') | |
f.write(data['problem_statement']) | |
f.close() | |
f = open(data['title'] + '/system_input.txt', 'w') | |
f.write('\n'.join(data['system_input'])) | |
f.close() | |
f = open(data['title'] + '/system_output.txt', 'w') | |
f.write('\n'.join(data['system_output'])) | |
f.close() | |
def tcget(query): | |
res = fetchProblemData(query) | |
if not res: | |
return False | |
saveFiles(res) | |
return True | |
# ============== | |
# main | |
# ============== | |
## parse options | |
argc = len(sys.argv) | |
argv = sys.argv | |
usage = 'usage: %prog <PROBLEM NAME> [options]' | |
description = 'Search the specified problem of TopCoder, and save the problem statement and it\'s input/output of system test as a file.' | |
parser = OptionParser(usage=usage, description=description) | |
parser.add_option('-u', '--user_name', dest='user_name', help='user name of topcoder account', default=tc_user_name) | |
parser.add_option('-p', '--password', dest='password', help='password of topcoder account', default=tc_password) | |
if argc <= 1: | |
parser.print_help() | |
sys.exit() | |
(options, args) = parser.parse_args() | |
problem_name = argv[1] | |
user_name = options.user_name | |
password = options.password | |
## search problem and save to file | |
opener = connection(user_name, password) | |
if not opener: | |
print 'Username or password incorrect.' | |
sys.exit() | |
if not tcget(problem_name): | |
print 'The problem', "'" + problem_name + "'", 'did not find' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment