-
-
Save viruthagiri/cce6593c2b81ed21306c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
import MySQLdb | |
import Queue | |
import re | |
import requests | |
import threading | |
import time | |
import sys | |
class WorkManager(object): | |
def __init__(self,start_url,thread_num=10): | |
self.threads = [] | |
self.__init_work_queue(start_url) | |
self.__init_thread_pool(thread_num) | |
def __init_work_queue(self,start_url): | |
try: | |
r = requests.get(start_url,timeout = 30) | |
text = r.text.encode('utf-8','//ignore') | |
p = re.compile(r'<td id="rk\d+?" class="chart">\s*?<a href="(.*?)">(.*?)</a>') | |
for m in p.finditer(text): | |
queue.put((m.group(1),m.group(2))) | |
except Exception as e: | |
print e | |
sys.exit() | |
def __init_thread_pool(self,thread_num): | |
for i in range(thread_num): | |
self.threads.append(Work()) | |
def wait_all_complete(self): | |
for item in self.threads: | |
item.join() | |
class Work(threading.Thread): | |
def __init__(self): | |
threading.Thread.__init__(self) | |
self.start() | |
def run(self): | |
while True: | |
try: | |
time.sleep(0.5) | |
print "thread:"+threading.currentThread().getName() | |
url,mobile = queue.get(block=False) | |
spider(url,mobile) | |
queue.task_done() | |
except Exception as e: | |
print e | |
break | |
def spider(url,mobile): | |
try: | |
r = requests.get(domain_name+url,timeout = 10) | |
text = r.text.encode('utf-8','//ignore') | |
p = re.compile(r'<span class="test_labels">(CPU Type|Number of Cores|Total RAM)</span></td>[\s|\S]*?<td class="altcompare">(.*?)</td>') | |
for m in p.finditer(text): | |
if m.group(1) == "CPU Type": | |
cpu = m.group(2) | |
elif m.group(1) == "Number of Cores": | |
num = m.group(2) | |
else: | |
ram = m.group(2) | |
f = open("./data.txt",'a') | |
f.write(mobile+"###"+cpu+"###"+num+"###"+ram+"\r\n") | |
f.close() | |
except Exception as e: | |
queue.put(url,mobile) | |
print e | |
def data(): | |
f = open("./data.txt",'r') | |
while True: | |
line = f.readline().strip() | |
if not line: | |
break | |
products,cpu,num,ram = line.split("###") | |
brand,product = products.split(" ",1) | |
try: | |
cur.execute("insert into benchmarks (brand,product,cpu,num,ram) values ('%s','%s','%s','%s','%s')" %(brand,product,cpu,num,ram)) | |
except Exception as e: | |
print e | |
if __name__ == "__main__": | |
queue = Queue.Queue() | |
start_url = "http://www.androidbenchmark.net/cpumark_chart.html" | |
domain_name = "http://www.androidbenchmark.net/" | |
conn=MySQLdb.connect(host='localhost',user='root',passwd='',db='test',port=3306) | |
cur=conn.cursor() | |
wm = WorkManager(start_url) | |
wm.wait_all_complete() | |
data() | |
cur.close() | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment