iamued · June 6, 2016 16:24 · iamued · Jan 15, 2014
diff --git a/cnzz.py b/cnzz.py
 __author__ = 'richie'
 # -*- coding: utf-8 -*-
 import urllib, urllib2, cookielib, re, os, codecs, time, subprocess,sys,json
 #import simplejson as json
 import dateutil
 class cnzz(object):
    def __init__(self, username = '', password = '',othername=''):
        self.__username = username
        self.__othername = othername
        self.__password = password
        self.__opener = ''
        self.__sitelist = []
    def login(self):
        myCookie = urllib2.HTTPCookieProcessor(cookielib.CookieJar());
        self.__opener = urllib2.build_opener(myCookie)
        post_data = {
            'username': self.__username,
            'password': self.__password
        }
        req = urllib2.Request('http://new.cnzz.com/user/login.php', urllib.urlencode(post_data))
        #print 'var _username = "'+self.__username+'";'
        loginhtml= self.__opener.open(req).read()
        #print loginhtml
        if(loginhtml.find('_username') > 1):
            #self.__opener=opener
            return True
        else:
            return False
    def getSiteListPageCount(self):
        url="http://new.cnzz.com/v1/main.php?s=site_list&sort=0&everypage=30&setpage"
        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
        #print html
        match=re.compile(r'第1/(?P<pagecount>\d+?)页 ').search(html)
        if match:
            #print match.group('pagecount')
            return int(match.group('pagecount'))

        #html.find('第1/3页')
        #html=self.__opener(urllib2.Request(url)).read()
        #print html
    def getSiteList(self):
        pagecount=self.getSiteListPageCount()
        print "count page :"+str(pagecount)
        for i in range(1,pagecount+1):
            url="http://new.cnzz.com/v1/main.php?s=site_list&sort=0&page="+str(i)+"&everypage=30"
            #print url
            req=urllib2.Request(url)
            html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
            #print html
            match2=re.compile(r'<script>site_data\(\'(.+)\'\);</script>').findall(html)
            match3=re.compile(r'<div class="col-1">(?P<sitename>.+)').findall(html)
            if(match2 and match3):
                #print match2
                #print len(match2)
                #print len(match3)
                #print match3
                for i in range(0,len(match3)):
                    print match3[i][0:-1]+"@@@"+match2[i]
                    self.__sitelist.append(match3[i][0:-1]+"@@@"+match2[i])

            else:
                print 'getSiteList Error'
                sys.exit()
        print 'getSiteList OK'
        return self.__sitelist

    def yesterdayinfo(self,siteid=''):
        if(siteid != ''):
            url ="http://new.cnzz.com/v1/data/site_list_data.php?siteid="+siteid

            req=urllib2.Request(url)
            try:
                html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
            except :
                return self.yesterdayinfo(siteid)
            #print siteid+'ok'
            #data=eval(html)[1]
            data=json.loads(html)
            #print data
            return data
            #data=json.loads("{"+html+"}")
            #print data
        else:
            print 'no siteid'
    def getSiteInfoByDate(self,siteid='',startdate='',enddate=''):
        #url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=timeflux&st=" + startdate + "&et=" + enddate
        url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=timeflux&st=" + startdate + "&et=" + enddate

        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
        siteinfo={}
        for i in reversed(dateutil.getDays(startdate, enddate)):
            pn = re.compile(
                r'<td>' + i + '</td>\s+<td class="num1">(?P<pv>.+)</td>\s+<td class="num1">(?P<uv>.+)</td>\s+<td class="num1">(?P<ip>.+)</td>'
                , re.I)
            mn = pn.search(html)
            #print html_src2.decode('gbk').encode('utf-8')
            #sitesinfo[key][i] = mn.group('uv')
            #print i+"uv:"+mn.group('uv')
            if(mn):
                siteinfo[i.replace('星期六','').replace('星期天','')]=[mn.group('pv'),mn.group('uv'),mn.group('ip')]
        #print siteinfo
        return siteinfo
    def getHistoryKeyByDate(self,siteid='',startdate='',enddate='',page=1):
        url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=key&st=" + startdate + "&et=" + enddate
        print url
        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
        match=re.compile(r'第1/(?P<pagecount>\d+?)页 ').search(html)
        #pagecount=1
        if match:
            #print match.group('pagecount')
            pagecount= int(match.group('pagecount'))
       # match2=re.compile(r'<td title=\'(.+)\'>').findall(html)
        #for i in range(0,len(match2)):
        #    print match2[i]
        pagecount=3
        keyinfos=[]
        for i in range(1,pagecount+1):
            print '正在抓取关键词列表第'+str(i)+'页'
            url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=key&st=" + startdate + "&et=" + enddate +"&page="+str(i)
            req=urllib2.Request(url)
            html= self.__opener.open(req).read().decode('gbk','ignore').encode('utf-8')
            mt=re.compile(r'<td title=\'(?P<key>.+) \'>(?P<key1>.+)</td>\s+<td class=\'all_right\'>(?P<snum>.+)</td>\s+<td class=\'all_right\'>(?P<uv>.+)</td>')
            mn=mt.findall(html)

            for i in range(0,len(mn)):
                #print mn[i][0],'\t',mn[i][2],'\t',mn[i][3]
                keyinfos.append([mn[i][0],mn[i][2],mn[i][3]])
        '''
        <td title='www.jxeea.cn '>www.jxeea.cn</td>
 			<td class='all_right'>38024</td>
 			<td class='all_right'>29859</td>
 			<td class='all_right'>28771</td>
 			<td class='all_right'>24385</td>
        '''
        print '共抓取关键词'+str(len(keyinfos))+'个'
        return keyinfos
        #print html
    def getKeyHistory(self,siteid='',startdate='',enddate='',key=''):
        #通过key查询时间段内最高，和最低的搜索量
        #http://new.cnzz.com/v1/main.php?siteid=2918556&s=history_key&st=2013-04-08&et=2013-05-07&t=30&url=%C9%EE%DB%DA%BB%E1%BC%C6%B4%D3%D2%B5%D7%CA%B8%F1%BF%BC%CA%D4%B1%A8%C3%FB%CF%B5%CD%B3
        url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=history_key&st=" + startdate + "&et=" + enddate+"&url="+urllib2.quote(key.decode('utf-8').encode('gbk'))
        #print url
        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')

        #print html
       # match2=re.compile(r'<td title=\'(.+)\'>').findall(html)
        #for i in range(0,len(match2)):
        #    print match2[i]
        #mt=re.compile(r'<td title=\'(?P<key>.+) \'>(?P<key1>.+)</td>\s+<td class=\'all_right\'>(?P<snum>.+)</td>\s+<td class=\'all_right\'>(?P<uv>.+)</td>')
        #mn=mt.findall(html)
        datelist=reversed(dateutil.getDays2(startdate, enddate))
        uvlist={}
        for i in datelist:
            #print i
            pn = re.compile(
                r'<td> '+i+' </td>\s+<td class="num1">(?P<snum>.+)</td>\s+<td class="num1">(?P<uv>.+)</td>'
                #\s+<td class="num1">(?P<uv>.+)</td>\s+<td class="num1">(?P<ip>.+)</td>\s+<td class="num2">(?P<newuv>.+)</td>
                , re.I)
            mn = pn.search(html)
            #print html_src2.decode('gbk').encode('utf-8')
            #sitesinfo[key][i] = mn.group('uv')
            #print i+"uv:"+mn.group('uv')
            #print mn
            if(mn):
                #siteinfo[i.replace('星期六','').replace('星期天','')]=
                #print i,'\t',mn.group('snum'),'uv：',mn.group('uv')
                uvlist[i.replace('星期六','').replace('星期天','')]=int(mn.group('uv'))
        #print uvlist
        #print 'max:',self.sort_by_value(uvlist)[-1],'min:',self.sort_by_value(uvlist)[0]
        return [self.sort_by_value(uvlist)[-1],self.sort_by_value(uvlist)[0]]
    def sort_by_value(self,d):
        return sorted(d.items(), key=lambda d:d[1])
 '''
 if __name__ == '__main__':

    CnzzTool=cnzz('username','password','')
    if(CnzzTool.login()):
        print "LoginOk"

    else:
        print "LoginError"
 '''
diff --git a/newcnzz.py b/newcnzz.py
 __author__ = 'richie'
 # -*- coding: utf-8 -*-
 import urllib, urllib2, cookielib, re, os, codecs, time, subprocess,sys,json,math
 #import simplejson as json
 import dateutil
 class cnzz(object):
    def __init__(self, username = '', password = '',othername=''):
        self.__username = username
        self.__othername = othername
        self.__password = password
        self.__opener = ''
        self.__sitelist = []
    def login(self):
        myCookie = urllib2.HTTPCookieProcessor(cookielib.CookieJar());
        self.__opener = urllib2.build_opener(myCookie)
        post_data = {
            'username': self.__username,
            'password': self.__password
        }
        req = urllib2.Request('http://new.cnzz.com/user/login.php', urllib.urlencode(post_data))
        #print 'var _username = "'+self.__username+'";'
        loginhtml= self.__opener.open(req).read()
        #print loginhtml#.decode('gbk').encode('utf-8')
        if(loginhtml.find('_username') > 1 or loginhtml.find('登陆进入旧版站长')>1):
            #self.__opener=opener
            return True
        else:
            return False
    def getSiteListPageCount(self):
        url="http://tongji.cnzz.com/main.php?c=site&a=show&ajax=module=gettotallist|module=list|module=isOpenTongji&sort=1&search=&currentPage=1&pageType=30&_="+str(int(time.time()))
        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
        totalsite= int(json.loads(html)['data']['gettotallist']['totalsite'])
        #print (155/90.0)
        return math.ceil(totalsite/90.0)
        #print html
        #match=re.compile(r'第1/(?P<pagecount>\d+?)页 ').search(html)
        #if match:
            #print match.group('pagecount')
        #    return int(match.group('pagecount'))

        #html.find('第1/3页')
        #html=self.__opener(urllib2.Request(url)).read()
        #print html
    def getuserdetail(self,url):
        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
        sitelist=json.loads(html)
        print html
        return sitelist
    def getSiteList(self):
        pagecount=int(self.getSiteListPageCount())
        print "count page :"+str(pagecount)
        for i in range(1,pagecount+1):
            #url="http://new.cnzz.com/v1/main.php?s=site_list&sort=0&page="+str(i)+"&everypage=30"
            url="http://tongji.cnzz.com/main.php?c=site&a=show&ajax=module=gettotallist|module=list|module=isOpenTongji&sort=1&search=&currentPage="+str(i)+"&pageType=90&_=1385011097947"
            #print url
            req=urllib2.Request(url)
            html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
            sitelist=json.loads(html)
            sitelist=sitelist['data']['list']['items']
            for x in range(len(sitelist)):
                #print sitelist[x]['name']+"@@@"+sitelist[x]['siteid']
                #print x
                self.__sitelist.append(sitelist[x]['name']+"@@@"+sitelist[x]['siteid'])
            #print html
            #match2=re.compile(r'<script>site_data\(\'(.+)\'\);</script>').findall(html)
            #match3=re.compile(r'<div class="col-1">(?P<sitename>.+)').findall(html)
            #if(match2 and match3):
            #    #print match2
            #    #print len(match2)
            #    #print len(match3)
            #    #print match3
            #    for i in range(0,len(match3)):
            #        print match3[i][0:-1]+"@@@"+match2[i]
            #        self.__sitelist.append(match3[i][0:-1]+"@@@"+match2[i])

            #else:
            #    print 'getSiteList Error'
            #    sys.exit()
        print 'getSiteList OK'
        return self.__sitelist

    def yesterdayinfo(self,siteid=''):
        if(siteid != ''):
            #url ="http://new.cnzz.com/v1/data/site_list_data.php?siteid="+siteid
            url="http://tongji.cnzz.com/main.php?c=site&a=show&ajax=module=gettotallist|module=list|module=isOpenTongji&sort=1&search="+siteid+"&currentPage=1&pageType=90&_=1385012521584"
            req=urllib2.Request(url)
            try:
                html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
            except :
                return self.yesterdayinfo(siteid)
            siteinfo= json.loads(html)['data']['list']['items']
            siteyinfo=[(),()]
            if(len(siteinfo)==1):
                #print siteinfo[0]['y_uv']
                siteyinfo[1]=[siteinfo[0]['y_pv'],siteinfo[0]['y_uv'],siteinfo[0]['y_ip']]
            #print siteyinfo
            #print siteid+'ok'
            #data=eval(html)[1]
            #data=json.dumps(html)
            #print data

            #print data
            return list(siteyinfo)
            #data=json.loads("{"+html+"}")
            #print data
        else:
            print 'no siteid'
    def getSiteInfoByDate(self,siteid='',startdate='',enddate=''):
        #url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=timeflux&st=" + startdate + "&et=" + enddate
        #url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=timeflux&st=" + startdate + "&et=" + enddate
        url ="http://tongji.cnzz.com/main.php?c=flow&a=trend&ajax=module%3Dsummary%7Cmodule%3DfluxList_currentPage%3D1_pageType%3D90&siteid="+siteid+"&st="+ startdate +"&et="+ enddate+"&_=1385013202955"
        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
        siteinfoitems= json.loads(html)['data']['fluxList']['items']
        siteinfo={}
        for x in range(len(siteinfoitems)):
            print siteinfoitems[x]['key']
            siteinfo[siteinfoitems[x]['key']]=[siteinfoitems[x]['pv'],siteinfoitems[x]['uv'],siteinfoitems[x]['ip']]
        #print siteinfo
        #exit()
        return siteinfo
    def getHistoryKeyByDate(self,siteid='',startdate='',enddate='',page=1):
        url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=key&st=" + startdate + "&et=" + enddate
        print url
        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
        match=re.compile(r'第1/(?P<pagecount>\d+?)页 ').search(html)
        #pagecount=1
        if match:
            #print match.group('pagecount')
            pagecount= int(match.group('pagecount'))
       # match2=re.compile(r'<td title=\'(.+)\'>').findall(html)
        #for i in range(0,len(match2)):
        #    print match2[i]
        pagecount=3
        keyinfos=[]
        for i in range(1,pagecount+1):
            print '正在抓取关键词列表第'+str(i)+'页'
            url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=key&st=" + startdate + "&et=" + enddate +"&page="+str(i)
            req=urllib2.Request(url)
            html= self.__opener.open(req).read().decode('gbk','ignore').encode('utf-8')
            mt=re.compile(r'<td title=\'(?P<key>.+) \'>(?P<key1>.+)</td>\s+<td class=\'all_right\'>(?P<snum>.+)</td>\s+<td class=\'all_right\'>(?P<uv>.+)</td>')
            mn=mt.findall(html)

            for i in range(0,len(mn)):
                #print mn[i][0],'\t',mn[i][2],'\t',mn[i][3]
                keyinfos.append([mn[i][0],mn[i][2],mn[i][3]])
        '''
        <td title='www.jxeea.cn '>www.jxeea.cn</td>
 			<td class='all_right'>38024</td>
 			<td class='all_right'>29859</td>
 			<td class='all_right'>28771</td>
 			<td class='all_right'>24385</td>
        '''
        print '共抓取关键词'+str(len(keyinfos))+'个'
        return keyinfos
        #print html
    def getKeyHistory(self,siteid='',startdate='',enddate='',key=''):
        #通过key查询时间段内最高，和最低的搜索量
        #http://new.cnzz.com/v1/main.php?siteid=2918556&s=history_key&st=2013-04-08&et=2013-05-07&t=30&url=%C9%EE%DB%DA%BB%E1%BC%C6%B4%D3%D2%B5%D7%CA%B8%F1%BF%BC%CA%D4%B1%A8%C3%FB%CF%B5%CD%B3
        url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=history_key&st=" + startdate + "&et=" + enddate+"&url="+urllib2.quote(key.decode('utf-8').encode('gbk'))
        #print url
        req=urllib2.Request(url)
        html= self.__opener.open(req).read().decode('gbk').encode('utf-8')

        #print html
       # match2=re.compile(r'<td title=\'(.+)\'>').findall(html)
        #for i in range(0,len(match2)):
        #    print match2[i]
        #mt=re.compile(r'<td title=\'(?P<key>.+) \'>(?P<key1>.+)</td>\s+<td class=\'all_right\'>(?P<snum>.+)</td>\s+<td class=\'all_right\'>(?P<uv>.+)</td>')
        #mn=mt.findall(html)
        datelist=reversed(dateutil.getDays2(startdate, enddate))
        uvlist={}
        for i in datelist:
            #print i
            pn = re.compile(
                r'<td> '+i+' </td>\s+<td class="num1">(?P<snum>.+)</td>\s+<td class="num1">(?P<uv>.+)</td>'
                #\s+<td class="num1">(?P<uv>.+)</td>\s+<td class="num1">(?P<ip>.+)</td>\s+<td class="num2">(?P<newuv>.+)</td>
                , re.I)
            mn = pn.search(html)
            #print html_src2.decode('gbk').encode('utf-8')
            #sitesinfo[key][i] = mn.group('uv')
            #print i+"uv:"+mn.group('uv')
            #print mn
            if(mn):
                #siteinfo[i.replace('星期六','').replace('星期天','')]=
                #print i,'\t',mn.group('snum'),'uv：',mn.group('uv')
                uvlist[i.replace('星期六','').replace('星期天','')]=int(mn.group('uv'))
        #print uvlist
        #print 'max:',self.sort_by_value(uvlist)[-1],'min:',self.sort_by_value(uvlist)[0]
        return [self.sort_by_value(uvlist)[-1],self.sort_by_value(uvlist)[0]]
    def sort_by_value(self,d):
        return sorted(d.items(), key=lambda d:d[1])
 '''
 if __name__ == '__main__':
    CnzzTool=cnzz('cnzzusername','password','mygod')
    if(CnzzTool.login()):
        print "LoginOk"
        #CnzzTool.getSiteListPageCount()
        #CnzzTool.getSiteList()
        #print CnzzTool.yesterdayinfo('2918848')
        #CnzzTool.getSiteInfoByDate('2918848','2012-05-20','2012-05-24')
    else:
        print "LoginError"
 '''
	__author__ = 'richie'
	# -- coding: utf-8 --
	import urllib, urllib2, cookielib, re, os, codecs, time, subprocess,sys,json
	#import simplejson as json
	import dateutil
	class cnzz(object):
	def __init__(self, username = '', password = '',othername=''):
	self.__username = username
	self.__othername = othername
	self.__password = password
	self.__opener = ''
	self.__sitelist = []
	def login(self):
	myCookie = urllib2.HTTPCookieProcessor(cookielib.CookieJar());
	self.__opener = urllib2.build_opener(myCookie)
	post_data = {
	'username': self.__username,
	'password': self.__password
	}
	req = urllib2.Request('http://new.cnzz.com/user/login.php', urllib.urlencode(post_data))
	#print 'var _username = "'+self.__username+'";'
	loginhtml= self.__opener.open(req).read()
	#print loginhtml
	if(loginhtml.find('_username') > 1):
	#self.__opener=opener
	return True
	else:
	return False
	def getSiteListPageCount(self):
	url="http://new.cnzz.com/v1/main.php?s=site_list&sort=0&everypage=30&setpage"
	req=urllib2.Request(url)
	html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
	#print html
	match=re.compile(r'第1/(?P<pagecount>\d+?)页 ').search(html)
	if match:
	#print match.group('pagecount')
	return int(match.group('pagecount'))

	#html.find('第1/3页')
	#html=self.__opener(urllib2.Request(url)).read()
	#print html
	def getSiteList(self):
	pagecount=self.getSiteListPageCount()
	print "count page :"+str(pagecount)
	for i in range(1,pagecount+1):
	url="http://new.cnzz.com/v1/main.php?s=site_list&sort=0&page="+str(i)+"&everypage=30"
	#print url
	req=urllib2.Request(url)
	html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
	#print html
	match2=re.compile(r'<script>site_data\(\'(.+)\'\);</script>').findall(html)
	match3=re.compile(r'<div class="col-1">(?P<sitename>.+)').findall(html)
	if(match2 and match3):
	#print match2
	#print len(match2)
	#print len(match3)
	#print match3
	for i in range(0,len(match3)):
	print match3[i][0:-1]+"@@@"+match2[i]
	self.__sitelist.append(match3[i][0:-1]+"@@@"+match2[i])

	else:
	print 'getSiteList Error'
	sys.exit()
	print 'getSiteList OK'
	return self.__sitelist

	def yesterdayinfo(self,siteid=''):
	if(siteid != ''):
	url ="http://new.cnzz.com/v1/data/site_list_data.php?siteid="+siteid

	req=urllib2.Request(url)
	try:
	html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
	except :
	return self.yesterdayinfo(siteid)
	#print siteid+'ok'
	#data=eval(html)[1]
	data=json.loads(html)
	#print data
	return data
	#data=json.loads("{"+html+"}")
	#print data
	else:
	print 'no siteid'
	def getSiteInfoByDate(self,siteid='',startdate='',enddate=''):
	#url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=timeflux&st=" + startdate + "&et=" + enddate
	url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=timeflux&st=" + startdate + "&et=" + enddate

	req=urllib2.Request(url)
	html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
	siteinfo={}
	for i in reversed(dateutil.getDays(startdate, enddate)):
	pn = re.compile(
	r'<td>' + i + '</td>\s+<td class="num1">(?P<pv>.+)</td>\s+<td class="num1">(?P<uv>.+)</td>\s+<td class="num1">(?P<ip>.+)</td>'
	, re.I)
	mn = pn.search(html)
	#print html_src2.decode('gbk').encode('utf-8')
	#sitesinfo[key][i] = mn.group('uv')
	#print i+"uv:"+mn.group('uv')
	if(mn):
	siteinfo[i.replace('星期六','').replace('星期天','')]=[mn.group('pv'),mn.group('uv'),mn.group('ip')]
	#print siteinfo
	return siteinfo
	def getHistoryKeyByDate(self,siteid='',startdate='',enddate='',page=1):
	url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=key&st=" + startdate + "&et=" + enddate
	print url
	req=urllib2.Request(url)
	html= self.__opener.open(req).read().decode('gbk').encode('utf-8')
	match=re.compile(r'第1/(?P<pagecount>\d+?)页 ').search(html)
	#pagecount=1
	if match:
	#print match.group('pagecount')
	pagecount= int(match.group('pagecount'))
	# match2=re.compile(r'<td title=\'(.+)\'>').findall(html)
	#for i in range(0,len(match2)):
	# print match2[i]
	pagecount=3
	keyinfos=[]
	for i in range(1,pagecount+1):
	print '正在抓取关键词列表第'+str(i)+'页'
	url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=key&st=" + startdate + "&et=" + enddate +"&page="+str(i)
	req=urllib2.Request(url)
	html= self.__opener.open(req).read().decode('gbk','ignore').encode('utf-8')
	mt=re.compile(r'<td title=\'(?P<key>.+) \'>(?P<key1>.+)</td>\s+<td class=\'all_right\'>(?P<snum>.+)</td>\s+<td class=\'all_right\'>(?P<uv>.+)</td>')
	mn=mt.findall(html)

	for i in range(0,len(mn)):
	#print mn[i][0],'\t',mn[i][2],'\t',mn[i][3]
	keyinfos.append([mn[i][0],mn[i][2],mn[i][3]])
	'''
	<td title='www.jxeea.cn '>www.jxeea.cn</td>
	<td class='all_right'>38024</td>
	<td class='all_right'>29859</td>
	<td class='all_right'>28771</td>
	<td class='all_right'>24385</td>
	'''
	print '共抓取关键词'+str(len(keyinfos))+'个'
	return keyinfos
	#print html
	def getKeyHistory(self,siteid='',startdate='',enddate='',key=''):
	#通过key查询时间段内最高，和最低的搜索量
	#http://new.cnzz.com/v1/main.php?siteid=2918556&s=history_key&st=2013-04-08&et=2013-05-07&t=30&url=%C9%EE%DB%DA%BB%E1%BC%C6%B4%D3%D2%B5%D7%CA%B8%F1%BF%BC%CA%D4%B1%A8%C3%FB%CF%B5%CD%B3
	url = "http://new.cnzz.com/v1/main.php?siteid=" + siteid + "&s=history_key&st=" + startdate + "&et=" + enddate+"&url="+urllib2.quote(key.decode('utf-8').encode('gbk'))
	#print url
	req=urllib2.Request(url)
	html= self.__opener.open(req).read().decode('gbk').encode('utf-8')

	#print html
	# match2=re.compile(r'<td title=\'(.+)\'>').findall(html)
	#for i in range(0,len(match2)):
	# print match2[i]
	#mt=re.compile(r'<td title=\'(?P<key>.+) \'>(?P<key1>.+)</td>\s+<td class=\'all_right\'>(?P<snum>.+)</td>\s+<td class=\'all_right\'>(?P<uv>.+)</td>')
	#mn=mt.findall(html)
	datelist=reversed(dateutil.getDays2(startdate, enddate))
	uvlist={}
	for i in datelist:
	#print i
	pn = re.compile(
	r'<td> '+i+' </td>\s+<td class="num1">(?P<snum>.+)</td>\s+<td class="num1">(?P<uv>.+)</td>'
	#\s+<td class="num1">(?P<uv>.+)</td>\s+<td class="num1">(?P<ip>.+)</td>\s+<td class="num2">(?P<newuv>.+)</td>
	, re.I)
	mn = pn.search(html)
	#print html_src2.decode('gbk').encode('utf-8')
	#sitesinfo[key][i] = mn.group('uv')
	#print i+"uv:"+mn.group('uv')
	#print mn
	if(mn):
	#siteinfo[i.replace('星期六','').replace('星期天','')]=
	#print i,'\t',mn.group('snum'),'uv：',mn.group('uv')
	uvlist[i.replace('星期六','').replace('星期天','')]=int(mn.group('uv'))
	#print uvlist
	#print 'max:',self.sort_by_value(uvlist)[-1],'min:',self.sort_by_value(uvlist)[0]
	return [self.sort_by_value(uvlist)[-1],self.sort_by_value(uvlist)[0]]
	def sort_by_value(self,d):
	return sorted(d.items(), key=lambda d:d[1])
	'''
	if __name__ == '__main__':

	CnzzTool=cnzz('username','password','')
	if(CnzzTool.login()):
	print "LoginOk"

	else:
	print "LoginError"
	'''