Skip to content

Instantly share code, notes, and snippets.

@kyle-go
Last active January 26, 2016 12:50
Show Gist options
  • Save kyle-go/4e2e1aa3c49eb41b3345 to your computer and use it in GitHub Desktop.
Save kyle-go/4e2e1aa3c49eb41b3345 to your computer and use it in GitHub Desktop.
python小爬虫
from urllib import urlencode
import cookielib, urllib2, urllib, re, os
_loginweb = "http://0.0.0.0:8080/"
_userinfo = { 'os_username': "wangkangan",
'os_password': "***",
'os_cookie':'true'}
def issue(opener, issue):
op = opener.open(_loginweb + "browse/" + issue + "?page=com.atlassian.jira.plugin.ext.subversion:subversion-commits-tabpanel")
text = op.read()
reg = re.compile('<td bgcolor="#ffffff" width="10%" valign="top" rowspan="3">[a-z]+[0-9]*</td>')
result = {}
authors = reg.findall(text)
for v in authors:
v = v.replace('<td bgcolor="#ffffff" width="10%" valign="top" rowspan="3">', "")
v = v.replace('</td>', "")
if not result.has_key(v):
result[v] = v
return result
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders=[('User-agent','Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')]
data = urllib.urlencode(_userinfo)
opener.open(_loginweb + "login.jsp", data)
if os.path.exists('result.txt'):
os.remove("result.txt")
wfile = open("result.txt", 'w')
rfile = open("input.txt", 'r')
line = rfile.readline()
while line:
line = line.replace(" ", "")
line = line.replace("\t", "")
line = line.replace("\r", "")
line = line.replace("\n", "")
result = issue(opener, line)
authors = ""
for v in result:
authors = authors + v + ","
wfile.write(line + ":" + authors + "\r\n")
line = rfile.readline()
rfile.close()
wfile.close()
print("****** Everything is OK! ******")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment