Skip to content

Instantly share code, notes, and snippets.

@JeffreyZhao
Created July 17, 2009 08:39
Show Gist options
  • Save JeffreyZhao/148949 to your computer and use it in GitHub Desktop.
Save JeffreyZhao/148949 to your computer and use it in GitHub Desktop.
namespace StrongCrawling
{
using System;
using System.Collections.Generic;
using System.Net;
using ActorLite;
public interface ICrawlRequestHandler
{
void Crawl(Monitor monitor, string url);
}
public interface ICrawlResponseHandler
{
void Crawled(Crawler crawler, string url, string content);
void Crawl(string url);
}
public class Monitor : Actor<Action<ICrawlResponseHandler>>, ICrawlResponseHandler
{
protected override void Receive(Action<ICrawlResponseHandler> message)
{
message(this);
}
private Stack<Crawler> m_crawlers;
private Queue<string> m_readyToCrawl;
public Monitor(int crawlerCount)
{
this.m_readyToCrawl = new Queue<string>();
this.m_crawlers = new Stack<Crawler>();
for (int i = 0; i < crawlerCount; i++)
{
this.m_crawlers.Push(new Crawler());
}
}
#region ICrawlResponseHandler Members
void ICrawlResponseHandler.Crawled(Crawler crawler, string url, string content)
{
if (this.m_readyToCrawl.Count > 0) // 如果有任务
{
// 分配新任务
var nextUrl = this.m_readyToCrawl.Dequeue();
crawler.Post(c => c.Crawl(this, nextUrl));
}
else
{
// 否则就让crawler待命
this.m_crawlers.Push(crawler);
}
}
void ICrawlResponseHandler.Crawl(string url)
{
// 如果有剩余爬虫
if (this.m_crawlers.Count > 0)
{
// 就让爬虫去爬
this.m_crawlers.Pop().Post(c => c.Crawl(this, url));
}
else
{
// 否则把url放到队列中去
this.m_readyToCrawl.Enqueue(url);
}
}
#endregion
}
public class Crawler : Actor<Action<ICrawlRequestHandler>>, ICrawlRequestHandler
{
protected override void Receive(Action<ICrawlRequestHandler> message)
{
message(this);
}
#region ICrawlRequestHandler Members
void ICrawlRequestHandler.Crawl(Monitor monitor, string url)
{
// 抓取
string content = new WebClient().DownloadString(url);
// 把结果发送给Monitor
monitor.Post(m => m.Crawled(this, url, content));
}
#endregion
}
static class Program
{
static void Main(string[] args)
{
var monitor = new Monitor(5); // 五个爬虫
// 爬1000遍
for (int i = 0; i < 1000; i++)
{
monitor.Post(m => m.Crawl("http://www.cnblogs.com/"));
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment