Last active
November 2, 2016 18:13
-
-
Save mclosson/7ca1004dff86af8edf47f775373a25b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate hyper; | |
extern crate time; | |
use hyper::Url; | |
use hyper::client::*; | |
use std::fs::File; | |
use std::io::BufRead; | |
use std::io::BufReader; | |
use std::sync::{Arc, Mutex}; | |
use std::thread; | |
use time::{PreciseTime}; | |
fn main() { | |
let seedfile = "seedurls.txt"; | |
let thread_count = 4; | |
let mut threads = Vec::new(); | |
let file = File::open(seedfile).unwrap(); | |
let reader = BufReader::new(&file); | |
let seeds: Vec<String> = reader.lines().map(|line| line.unwrap()).collect(); | |
let total_urls = seeds.len(); | |
let silo = Arc::new(Mutex::new(seeds)); | |
let start = PreciseTime::now(); | |
for i in 0..thread_count { | |
let silo = silo.clone(); | |
threads.push(thread::spawn(move || { | |
let client = Client::new(); | |
let mut urls_processed = 0; | |
loop { | |
let url = match silo.lock().unwrap().pop() { | |
Some(url) => url, | |
None => break | |
}; | |
match Url::parse(&url) { | |
Ok(url) => url, | |
Err(url) => panic!("Malformed url: {}", url) | |
}; | |
match client.get(&url).send() { | |
Ok(response) => println!("Thread {}: {} - {}", i, response.status, url), | |
Err(r) => println!("Thread {}: Problem with request! - {}", i, r) | |
}; | |
urls_processed += 1; | |
} | |
urls_processed | |
})); | |
} | |
let mut thread_id = 0; | |
while let Some(thread) = threads.pop() { | |
let urls_processed = thread.join().unwrap(); | |
println!("Thread {} processed {} total URL's", thread_id, urls_processed); | |
thread_id += 1; | |
} | |
let end = PreciseTime::now(); | |
let seconds = start.to(end).num_milliseconds() as f64 / 1000.0; | |
println!("{} threads processed {} URL's in {} seconds.", thread_count, total_urls, seconds); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Compiling crawler v0.1.0 (file:///Users/matt/projects/rust/crawler) | |
Finished debug [unoptimized + debuginfo] target(s) in 0.88 secs | |
Running `target/debug/crawler` | |
Thread 1: Problem with request! - An error in the OpenSSL library: certificate verify failed | |
Thread 1: 200 OK - https://www.rust-lang.org | |
Thread 3: 200 OK - https://epic.org | |
Thread 1: 200 OK - http://www.example.com | |
Thread 2: 200 OK - https://aclu.org | |
Thread 0: 200 OK - https://eff.org | |
Thread 1: 200 OK - https://bing.com | |
Thread 2: 200 OK - https://yahoo.com | |
Thread 1: 200 OK - https://duckduckgo.com | |
Thread 0: 200 OK - https://google.com | |
Thread 2: 200 OK - https://brevarddiapers.com | |
Thread 1: 200 OK - https://mclosson.com | |
Thread 0: 200 OK - https://freebsd.org | |
Thread 3: 200 OK - http://www.floridatoday.com | |
Thread 0 processed 2 total URL's | |
Thread 1 processed 3 total URL's | |
Thread 2 processed 6 total URL's | |
Thread 3 processed 3 total URL's | |
4 threads processed 14 URL's in 3.875 seconds. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment