Skip to content

Instantly share code, notes, and snippets.

@jayhuang75
Last active September 13, 2020 22:26
Show Gist options
  • Save jayhuang75/15dbeea7ef03f5fff0d355e1d7dbefe7 to your computer and use it in GitHub Desktop.
Save jayhuang75/15dbeea7ef03f5fff0d355e1d7dbefe7 to your computer and use it in GitHub Desktop.
rust parsing MapReduce and Hive logs
lazy_static! {
static ref MAP_REDUCE: Regex = Regex::new(r"[a-zA-Z\(\)]+=[0-9]+").unwrap();
static ref HIVE: Regex = Regex::new(r"(?x)entity*").unwrap();
}
#[allow(dead_code)]
pub fn run(tx: channel::Sender<BatchCTL>, app: &NewApp) -> Result<(), Box<dyn Error>> {
let output = Command::new("hdfs")
.arg("dfs")
.arg("-cat")
.arg(&app.app_id)
.output()?;
let mut metrics_value: Vec<String> = Vec::new();
String::from_utf8_lossy(&output.stdout)
.lines()
.filter(|line| HIVE.is_match(&line) || MAP_REDUCE.is_match(&line))
.filter(|line| {
!line.contains("java.class.path")
&& !line.contains("[WARN]")
&& !line.contains("INFO")
&& !line.contains("PSYoungGen")
&& !line.contains("fontsize")
})
.map(|line| line.replace("\u{1}", ""))
.for_each(|line| {
if line.contains("entity") {
metrics_value.push(line);
} else {
let item: Vec<&str> = line.trim().trim_end().split("=").collect();
let _key: String;
let _value: String;
match item.get(0) {
Some(key) => _key = key.to_string(),
None => _key = "".to_string(),
};
match item.get(1) {
Some(value) => _value = value.to_string(),
None => _value = "".to_string(),
};
let json_string = serde_json::to_string(&item).unwrap();
metrics_value.push(json_string);
}
});
let res: BatchCTL = BatchCTL {
app: NewApp {
app_id: app.app_id.to_string(),
mtime: app.mtime.to_string(),
user_id: app.user_id.to_string(),
},
metrics: metrics_value,
};
tx.send(res)?;
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment