Last active
September 25, 2021 14:31
-
-
Save raibima/8f5a667616c7179fc9fc994f38c2eb5d to your computer and use it in GitHub Desktop.
A Rust program to get the list of Traveloka's marketing job vacancies, compile and send the result via email.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use failure::Fallible; | |
| use headless_chrome::Browser; | |
| use lettre::Transport; | |
| use lettre::{smtp::authentication::Credentials, SmtpClient}; | |
| use lettre_email::EmailBuilder; | |
| use serde::{Deserialize, Serialize}; | |
| use std::env; | |
| use std::time::{SystemTime, UNIX_EPOCH}; | |
| fn main() -> Fallible<()> { | |
| scrape() | |
| } | |
| const URL: &str = "https://www.traveloka.com/en-id/careers/vacancies?department_id=4010566003&page=1&limit=20&location=1&job_type=Full-time"; | |
| const LIST_SELECTOR: &str = "ul[class*=style_result_list__]"; | |
| const ITEM_SELECTOR: &str = "h3[class*=style_vacancy_title__]"; | |
| const BASE_URL: &str = "https://boards.greenhouse.io/traveloka/jobs/"; | |
| const JS_FUNC: &str = " | |
| function() { | |
| const children = Array.from(this.children); | |
| return children.map(child => JSON.stringify({ | |
| title: child.querySelector('h3').innerText, | |
| link: child.querySelector('a').getAttribute('href').match(/jobs\\/(.\\d*)/)[1], | |
| })); | |
| } | |
| "; | |
| const EMAIL_FROM: &str = "EMAIL_FROM"; | |
| const EMAIL_FROM_NAME: &str = "EMAIL_FROM_NAME"; | |
| const EMAIL_TO: &str = "EMAIL_TO"; | |
| const EMAIL_SUBJECT: &str = "EMAIL_SUBJECT"; | |
| const SMTP_SERVER: &str = "SMTP_SERVER"; | |
| const MAX_RETRIES: u8 = 5; | |
| #[derive(Serialize, Deserialize, Debug)] | |
| struct Vacancy { | |
| title: String, | |
| link: String, | |
| } | |
| fn scrape() -> Fallible<()> { | |
| println!("Begin scraping..."); | |
| let start = now(); | |
| // CLI flag handling: | |
| // --no-send-email: scrape without sending email | |
| let args: Vec<String> = env::args().collect(); | |
| let flag = args.get(1); | |
| let no_send_email = flag.is_some() && flag.unwrap() == "--no-send-email"; | |
| // Init headless chrome | |
| let browser = Browser::default()?; | |
| let tab = browser.wait_for_initial_tab()?; | |
| let mut num_retries: u8 = 0; | |
| while num_retries <= MAX_RETRIES { | |
| // Navigate to the marketing career page | |
| // and wait till the vacancy list is loaded. | |
| // If error happens, retry till MAX_RETRIES. | |
| let element = tab.navigate_to(URL)?.wait_for_element(ITEM_SELECTOR); | |
| if element.is_ok() { | |
| break; | |
| } | |
| num_retries += 1; | |
| } | |
| // Run some JS code to get the vacancy data | |
| // (using the DOM API directly) | |
| let js_preview = tab | |
| .find_element(LIST_SELECTOR)? | |
| .call_js_fn(JS_FUNC, false)? | |
| .preview; | |
| let mut vacancies: Vec<Vacancy> = Vec::new(); | |
| // Here we try to read / deserialize values returned | |
| // from the JS code and covert it to a Vec<Vacancy> | |
| match js_preview { | |
| None => println!("No data found"), | |
| Some(preview) => { | |
| for prop in preview.properties { | |
| match prop.value { | |
| None => continue, | |
| Some(value) => { | |
| let mut vacancy: Vacancy = serde_json::from_str(&value)?; | |
| vacancy.link = format!("{}{}", BASE_URL, vacancy.link); | |
| vacancies.push(vacancy); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| println!("Scraping done in {} ms", now() - start); | |
| // if --no-send-email is given | |
| // exit the program without sending email | |
| if no_send_email { | |
| return Ok(()) | |
| } | |
| println!("Sending mail..."); | |
| let start = now(); | |
| let mail_content = vacancies | |
| .iter() | |
| .enumerate() | |
| .map(|(i, vac)| format!("{}. {} - {}", i + 1, vac.title, vac.link)) | |
| .collect::<Vec<String>>() | |
| .join("\n"); | |
| // Send email | |
| let email = EmailBuilder::new() | |
| .from((EMAIL_FROM, EMAIL_FROM_NAME)) | |
| .to(EMAIL_TO) | |
| .subject(EMAIL_SUBJECT) | |
| .body(mail_content) | |
| .build() | |
| .unwrap(); | |
| let creds = Credentials::new( | |
| env::var("SMTP_MAIL_USERNAME").unwrap(), | |
| env::var("SMTP_MAIL_PASSWORD").unwrap(), | |
| ); | |
| let mut mailer = SmtpClient::new_simple(SMTP_SERVER) | |
| .unwrap() | |
| .credentials(creds) | |
| .transport(); | |
| match mailer.send(email.into()) { | |
| Ok(_) => { | |
| println!("Email sent in {} ms", now() - start); | |
| } | |
| Err(e) => println!("Error: {}", e), | |
| } | |
| Ok(()) | |
| } | |
| fn now() -> u128 { | |
| let time = SystemTime::now(); | |
| let test = time.duration_since(UNIX_EPOCH).unwrap(); | |
| test.as_millis() | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment