Created
September 11, 2013 08:20
-
-
Save tdoly/6520724 to your computer and use it in GitHub Desktop.
python线程
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding:utf-8 -*- | |
''' | |
Created on 2013-8-1 | |
@author: mingdong.li | |
use thread | |
创建一个 Queue.Queue() 的实例,然后使用数据对它进行填充。 | |
将经过填充数据的实例传递给线程类,后者是通过继承 threading.Thread 的方式创建的。 | |
生成守护线程池。 | |
每次从队列中取出一个项目,并使用该线程中的数据和 run 方法以执行相应的工作。 | |
在完成这项工作之后,使用 queue.task_done() 函数向任务已经完成的队列发送一个信号。 | |
对队列执行 join 操作,实际上意味着等到队列为空,再退出主程序。 | |
在使用这个模式时需要注意一点:通过将守护线程设置为 true,将允许主线程或者程序仅在守护线程处于活动状态时才能够退出。 | |
这种方式创建了一种简单的方式以控制程序流程,因为在退出之前,您可以对队列执行 join 操作、或者等到队列为空。 | |
[http://www.ibm.com/developerworks/cn/aix/library/au-threadingpython/index.html#resources] | |
join() | |
保持阻塞状态,直到处理了队列中的所有项目为止。在将一个项目添加到该队列时,未完成的任务的总数就会增加。 | |
当使用者线程调用 task_done() 以表示检索了该项目、并完成了所有的工作时,那么未完成的任务的总数就会减少。 | |
当未完成的任务的总数减少到零时,join() 就会结束阻塞状态。 | |
''' | |
import Queue | |
import threading | |
import urllib2 | |
import time | |
from beautifulsoup.BeautifulSoup import BeautifulSoup | |
hosts = ["http://weibo.com/", "http://google.com.hk", "http://csdn.net", | |
"http://moyoyo.com", "http://www.baidu.com"] | |
queue = Queue.Queue() | |
out_queue = Queue.Queue() | |
class ThreadUrl(threading.Thread): | |
'''Threaded Url Grab''' | |
def __init__(self, queue, out_queue): | |
threading.Thread.__init__(self) | |
self.queue = queue | |
self.out_queue = out_queue | |
def run(self): | |
while True: | |
#grabs host from queue | |
host = self.queue.get() | |
#grabs urls of hosts and prints first 1024 bytes of page | |
htmlText = urllib2.urlopen(host) | |
chunk = htmlText.read() | |
#place chunk into out_queue | |
self.out_queue.put(chunk) | |
#signals to queue job is done | |
self.queue.task_done() | |
class DatamineThread(threading.Thread): | |
'''Thread Url Grab''' | |
def __init__(self, out_queue): | |
threading.Thread.__init__(self) | |
self.out_queue = out_queue | |
def run(self): | |
while True: | |
#grabs host from queue | |
chunk = out_queue.get() | |
#parse the chunk | |
soup = BeautifulSoup(chunk) | |
print soup.findAll(['title']) | |
#signals to queue job is done | |
self.out_queue.task_done() | |
start = time.time() | |
def main(): | |
#spawn a pool of theads, and pass them queue instance | |
for i in range(5): | |
t = ThreadUrl(queue, out_queue) | |
t.setDaemon(True) #守护线程 | |
t.start() | |
#populate queue with data | |
for host in hosts: | |
queue.put(host) | |
for i in range(5): | |
t = DatamineThread(out_queue) | |
t.setDaemon(True) | |
t.start() | |
#wait on the queue until everything has been processed | |
queue.join() | |
out_queue.join() | |
main() | |
print "Elapsed Time: %s" % (time.time() - start) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2013-8-1 | |
@author: mingdong.li | |
thread introduce | |
''' | |
import threading | |
import datetime | |
class ThreadClass(threading.Thread): | |
def run(self): | |
now = datetime.datetime.now() | |
print "%s run time is: %s" % (self.getName(), now) | |
for i in range(2): | |
t = ThreadClass() | |
t.start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment