Skip to content

Instantly share code, notes, and snippets.

@cou929
Created August 8, 2010 14:06
Show Gist options
  • Save cou929/514061 to your computer and use it in GitHub Desktop.
Save cou929/514061 to your computer and use it in GitHub Desktop.
Fetch TopCoder problem statement, test cases and expected result of system test. And save these data to file.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
'''
tcget.py
Kosei Moriyama <[email protected]>
'''
import BeautifulSoup
import html2text
import urllib, urllib2, cookielib, re, os, sys
from optparse import OptionParser
tc_user_name = 'write your username of topcoder account here, or input it via command-line'
tc_password = 'write your password of topcoder account here, or input it via command-line'
prefix_search_from = 'http://www.topcoder.com/tc?module=ProblemArchive&class='
prefix_problem_statement = 'http://www.topcoder.com/stat?c=problem_statement&pm='
url_round_result_page = 'http://www.topcoder.com/tc?module=ProblemDetail&rd=<rd>&pm=<pm>'
url_tc_secure = 'https://www.topcoder.com/tc'
url_problem_solution = 'http://www.topcoder.com/stat?c=problem_solution&cr=<cr>&rd=<rd>&pm=<pm>'
opener = ''
def connection(user_name, password):
url = 'http://www.topcoder.com/'
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/4.0 (compatible MSIE 6.0 Windows NT 5.1)')]
params = {}
params['module'] = 'Login'
params['nextpage'] = url
params['username'] = user_name
params['password'] = password
param = urllib.urlencode(params)
res = opener.open(url_tc_secure, param)
res_str = str(res.read())
if res_str.find('Username or password incorrect') != -1:
opener = False
return opener
def searchProblem(search_query):
res = opener.open(prefix_search_from + search_query)
return res.read()
def getProblemAndRoundId(html):
# <A HREF="/tc?module=ProblemDetail&rd=10767&pm=7968" class="statText">
return map(lambda x: {'round_id': x[0], 'problem_id': x[1]},
re.findall('"/tc\?module=ProblemDetail&rd=([0-9]+?)&pm=([0-9]+?)"\sclass="statText"', html, re.I))
def getProblemParameters(search_query):
return getProblemAndRoundId(searchProblem(search_query))
def getProblemStatementPage(problem_id):
#return ''.join(urllib2.urlopen(prefix_problem_statement + problem_id).readlines())
return opener.open(prefix_problem_statement + problem_id).read()
def getRoundResultPage(problem_id, round_id):
url = url_round_result_page.replace('<pm>', problem_id).replace('<rd>', round_id)
res = opener.open(url)
return res.read()
def getTopSubmissionId(html):
# <a href="/stat?c=problem_solution&amp;cr=14970299&amp;rd=14156&amp;pm=10880" class="statText">view</a>
return re.findall('href="/stat\?c=problem_solution&(?:amp;)*cr=([0-9]+?)&(?:amp;)*rd=[0-9]+?&(?:amp;)*pm=[0-9]+?" class="statText"', html, re.I)
def getTopSubmissionPage(problem_id, round_id, top_submission_id):
url = url_problem_solution.replace('<pm>', problem_id).replace('<rd>', round_id).replace('<cr>', top_submission_id)
res = opener.open(url)
return res.read()
def getPages(search_query):
ret = {}
params = getProblemParameters(search_query)
if not params:
return False
top_html = getRoundResultPage(params[0]['problem_id'], params[0]['round_id'])
top_ids = getTopSubmissionId(top_html)
ret['problem_statement_page'] = getProblemStatementPage(params[0]['problem_id'])
ret['top_submission_page'] = getTopSubmissionPage(params[0]['problem_id'], params[0]['round_id'], top_ids[0])
return ret
def getProblemStatement(html):
soup = BeautifulSoup.BeautifulSoup(html)
title = soup.find('td', {'class': 'statTextBig'}).contents[0]
problem_statement = soup.find('td', {'class': 'problemText'}).table
return {'title': str(title)[28:], 'problem_statement': html2text.html2text(str(problem_statement))}
def getSystemTestData(html):
soup = BeautifulSoup.BeautifulSoup(html)
input = []
output = []
lines = soup.findAll('tr', {'valign': 'top'})
for line in lines:
input.append(line.contents[3].contents[0].replace(',', '').replace('{', '').replace('}', '').replace('"', ''))
output.append(line.contents[7].contents[0].replace(',', '').replace('{', '').replace('}', '').replace('"', ''))
return {'system_input': input, 'system_output': output}
def fetchProblemData(query):
res = {}
p = getPages(query)
if not p:
return False
tmp = getProblemStatement(p['problem_statement_page'])
for k, v in tmp.iteritems():
res[k] = v
tmp = getSystemTestData(p['top_submission_page'])
for k, v in tmp.iteritems():
res[k] = v
return res
def saveFiles(data):
if os.path.isdir(data['title']):
print 'The directory', data['title'], 'already exists.'
return
os.mkdir(data['title'])
f = open(data['title'] + '/problem_statement.txt', 'w')
f.write(data['problem_statement'])
f.close()
f = open(data['title'] + '/system_input.txt', 'w')
f.write('\n'.join(data['system_input']))
f.close()
f = open(data['title'] + '/system_output.txt', 'w')
f.write('\n'.join(data['system_output']))
f.close()
def tcget(query):
res = fetchProblemData(query)
if not res:
return False
saveFiles(res)
return True
# ==============
# main
# ==============
## parse options
argc = len(sys.argv)
argv = sys.argv
usage = 'usage: %prog <PROBLEM NAME> [options]'
description = 'Search the specified problem of TopCoder, and save the problem statement and it\'s input/output of system test as a file.'
parser = OptionParser(usage=usage, description=description)
parser.add_option('-u', '--user_name', dest='user_name', help='user name of topcoder account', default=tc_user_name)
parser.add_option('-p', '--password', dest='password', help='password of topcoder account', default=tc_password)
if argc <= 1:
parser.print_help()
sys.exit()
(options, args) = parser.parse_args()
problem_name = argv[1]
user_name = options.user_name
password = options.password
## search problem and save to file
opener = connection(user_name, password)
if not opener:
print 'Username or password incorrect.'
sys.exit()
if not tcget(problem_name):
print 'The problem', "'" + problem_name + "'", 'did not find'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment