Skip to content

Instantly share code, notes, and snippets.

@raibima
Last active September 25, 2021 14:31
Show Gist options
  • Select an option

  • Save raibima/8f5a667616c7179fc9fc994f38c2eb5d to your computer and use it in GitHub Desktop.

Select an option

Save raibima/8f5a667616c7179fc9fc994f38c2eb5d to your computer and use it in GitHub Desktop.
A Rust program to get the list of Traveloka's marketing job vacancies, compile and send the result via email.
use failure::Fallible;
use headless_chrome::Browser;
use lettre::Transport;
use lettre::{smtp::authentication::Credentials, SmtpClient};
use lettre_email::EmailBuilder;
use serde::{Deserialize, Serialize};
use std::env;
use std::time::{SystemTime, UNIX_EPOCH};
fn main() -> Fallible<()> {
scrape()
}
const URL: &str = "https://www.traveloka.com/en-id/careers/vacancies?department_id=4010566003&page=1&limit=20&location=1&job_type=Full-time";
const LIST_SELECTOR: &str = "ul[class*=style_result_list__]";
const ITEM_SELECTOR: &str = "h3[class*=style_vacancy_title__]";
const BASE_URL: &str = "https://boards.greenhouse.io/traveloka/jobs/";
const JS_FUNC: &str = "
function() {
const children = Array.from(this.children);
return children.map(child => JSON.stringify({
title: child.querySelector('h3').innerText,
link: child.querySelector('a').getAttribute('href').match(/jobs\\/(.\\d*)/)[1],
}));
}
";
// Email
const EMAIL_FROM: &str = "EMAIL_FROM";
const EMAIL_FROM_NAME: &str = "EMAIL_FROM_NAME";
const EMAIL_TO: &str = "EMAIL_TO";
const EMAIL_SUBJECT: &str = "EMAIL_SUBJECT";
const SMTP_SERVER: &str = "SMTP_SERVER";
const MAX_RETRIES: u8 = 5;
#[derive(Serialize, Deserialize, Debug)]
struct Vacancy {
title: String,
link: String,
}
fn scrape() -> Fallible<()> {
println!("Begin scraping...");
let start = now();
// CLI flag handling:
// --no-send-email: scrape without sending email
let args: Vec<String> = env::args().collect();
let flag = args.get(1);
let no_send_email = flag.is_some() && flag.unwrap() == "--no-send-email";
// Init headless chrome
let browser = Browser::default()?;
let tab = browser.wait_for_initial_tab()?;
let mut num_retries: u8 = 0;
while num_retries <= MAX_RETRIES {
// Navigate to the marketing career page
// and wait till the vacancy list is loaded.
// If error happens, retry till MAX_RETRIES.
let element = tab.navigate_to(URL)?.wait_for_element(ITEM_SELECTOR);
if element.is_ok() {
break;
}
num_retries += 1;
}
// Run some JS code to get the vacancy data
// (using the DOM API directly)
let js_preview = tab
.find_element(LIST_SELECTOR)?
.call_js_fn(JS_FUNC, false)?
.preview;
let mut vacancies: Vec<Vacancy> = Vec::new();
// Here we try to read / deserialize values returned
// from the JS code and covert it to a Vec<Vacancy>
match js_preview {
None => println!("No data found"),
Some(preview) => {
for prop in preview.properties {
match prop.value {
None => continue,
Some(value) => {
let mut vacancy: Vacancy = serde_json::from_str(&value)?;
vacancy.link = format!("{}{}", BASE_URL, vacancy.link);
vacancies.push(vacancy);
}
}
}
}
}
println!("Scraping done in {} ms", now() - start);
// if --no-send-email is given
// exit the program without sending email
if no_send_email {
return Ok(())
}
println!("Sending mail...");
let start = now();
let mail_content = vacancies
.iter()
.enumerate()
.map(|(i, vac)| format!("{}. {} - {}", i + 1, vac.title, vac.link))
.collect::<Vec<String>>()
.join("\n");
// Send email
let email = EmailBuilder::new()
.from((EMAIL_FROM, EMAIL_FROM_NAME))
.to(EMAIL_TO)
.subject(EMAIL_SUBJECT)
.body(mail_content)
.build()
.unwrap();
let creds = Credentials::new(
env::var("SMTP_MAIL_USERNAME").unwrap(),
env::var("SMTP_MAIL_PASSWORD").unwrap(),
);
let mut mailer = SmtpClient::new_simple(SMTP_SERVER)
.unwrap()
.credentials(creds)
.transport();
match mailer.send(email.into()) {
Ok(_) => {
println!("Email sent in {} ms", now() - start);
}
Err(e) => println!("Error: {}", e),
}
Ok(())
}
fn now() -> u128 {
let time = SystemTime::now();
let test = time.duration_since(UNIX_EPOCH).unwrap();
test.as_millis()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment