Last active
December 7, 2016 16:32
-
-
Save paulohrpinheiro/08e44d04d1c516efd50484d1d6f66916 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! Crawler — My own crawler in Rust! | |
extern crate hyper; // biblioteca (crate) não padrão | |
use std::env; // argumentos env::args | |
use std::io::{Read, Write}; // para IO de arquivos | |
use std::fs::File; // para criar arquivos | |
use std::path::Path; // configurar nome de arquivo | |
use std::thread; // concorrência | |
const ROBOT_NAME: &'static str = "paulohrpinheiro-crawler"; | |
const BUFFER_SIZE: usize = 512; | |
#[derive(Debug)] | |
enum DownloadError { | |
CantGet, | |
CantRead, | |
CantWrite, | |
CantCreate, | |
InvalidName, | |
} | |
fn download_content(url: &str) -> Result<String, DownloadError> { | |
// Somos um respeitável e conhecido bot | |
let mut headers = hyper::header::Headers::new(); | |
headers.set(hyper::header::UserAgent(ROBOT_NAME.to_string())); | |
// Pega cabeçalhos (e possivelmente algum dado já) | |
let client = hyper::Client::new(); | |
let mut response = match client.get(url).headers(headers).send() { | |
Err(_) => return Err(DownloadError::CantGet), | |
Ok(r) => r, | |
}; | |
let local_filename = filename(url)?; | |
let mut localfile = create_localfile(&local_filename)?; | |
// pega conteúdo e salva em arquivo | |
loop { | |
let mut buffer = [0; BUFFER_SIZE]; | |
// conseguimos ler? | |
let bytes_read = match response.read(&mut buffer) { | |
Err(_) => return Err(DownloadError::CantRead), | |
Ok(b) => b, | |
}; | |
// não tem mais nada? | |
if bytes_read == 0 { | |
break; | |
} | |
// vamos tentar escrever o que pegamos | |
let bytes_write = match localfile.write(&buffer[0..bytes_read]) { | |
Err(_) => return Err(DownloadError::CantWrite), | |
Ok(b) => b, | |
}; | |
// conseguiu escrever o que leu? | |
if bytes_write != bytes_read { | |
return Err(DownloadError::CantWrite); | |
} | |
} | |
return Ok(local_filename); | |
} | |
fn filename(url: &str) -> Result<String, DownloadError> { | |
match Path::new(&url).file_name() { | |
None => Err(DownloadError::InvalidName), | |
Some(name) => { | |
match name.to_str() { | |
None => Err(DownloadError::InvalidName), | |
Some(r) => Ok(String::from(r)), | |
} | |
} | |
} | |
} | |
fn create_localfile(name: &str) -> Result<File, DownloadError> { | |
match File::create(&name) { | |
Err(_) => Err(DownloadError::CantCreate), | |
Ok(f) => Ok(f), | |
} | |
} | |
fn main() { | |
// Pega os argumentos, mas ignorando o primeiro | |
// que é o nome do programa. | |
let mut args = env::args(); | |
args.next(); | |
// Vetor para as threads que serão criadas | |
let mut workers = vec![]; | |
// Pega o conteúdo de cada URL | |
for url in args { | |
// Cria thread para cada URL | |
workers.push(thread::spawn(move || { | |
print!("{} - ", url); | |
match download_content(&url) { | |
Err(e) => println!("ERR: {:?}", e), | |
Ok(f) => println!("OK: saved as {:?}", f), | |
} | |
print!("\n\n"); | |
})); | |
} | |
// Espera as threads terminarem suas tarefas | |
for worker in workers { | |
let _ = worker.join(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment