Created
August 17, 2018 11:32
-
-
Save alastairparagas/7cf0a606ef1404694d470d8d1e7cad58 to your computer and use it in GitHub Desktop.
Clustering and Ranking Insurgency Attacks in Afghanistan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #![feature(proc_macro, wasm_custom_section, wasm_import_module)] | |
| #![feature(use_extern_macros)] | |
| extern crate wasm_bindgen; | |
| extern crate regex; | |
| extern crate serde_json; | |
| use wasm_bindgen::prelude::*; | |
| use std::collections::HashMap; | |
| use regex::Regex; | |
| use std::u32::MAX; | |
| #[wasm_bindgen] | |
| pub fn clusterer_and_rankerer(file_contents: &str) -> String { | |
| let mut file_contents_iterator = file_contents.lines(); | |
| let normalized_city_regex = Regex::new(r"/^[A-Za-z]+$/").unwrap(); | |
| let row_titles: Vec<&str> = match file_contents_iterator.next() { | |
| Some(line) => line.split(',').collect(), | |
| None => vec![] | |
| }; | |
| let afghan_cities_incidents: HashMap<String, Vec<HashMap<&str, &str>>> = match row_titles.len() { | |
| 0 => HashMap::new(), | |
| _ => file_contents_iterator | |
| .map(|line| | |
| row_titles | |
| .iter() | |
| .zip(line.split(',')) | |
| .map(|(x, y)| (*x, y)) | |
| .collect::<HashMap<&str, &str>>() | |
| ) | |
| .filter(|incident_record| match incident_record.get("country") { | |
| Some(country) => *country == "4", | |
| None => false | |
| }) | |
| .fold(HashMap::new(), |mut city_incidents, incident_record| match incident_record.get("city") { | |
| Some(city) => { | |
| let normalized_city = normalized_city_regex.replace_all(city, "").into_owned().to_lowercase(); | |
| city_incidents | |
| .entry(normalized_city) | |
| .or_insert(vec![]) | |
| .push(incident_record.clone()); | |
| return city_incidents; | |
| }, | |
| None => { | |
| city_incidents.insert(String::from("unclassified"), vec![]); | |
| return city_incidents; | |
| } | |
| }) | |
| }; | |
| let starting_tuplet = (MAX, 0); | |
| let afghan_city_minmaxincident_pair: (u32, u32) = afghan_cities_incidents | |
| .iter() | |
| .fold(starting_tuplet, |(least_incidents_sofar, greatest_incidents_sofar), (_, incidents_list)| { | |
| let current_city_incidents = incidents_list.len() as u32; | |
| let mut true_least_incidents = least_incidents_sofar; | |
| let mut true_greatest_incidents = greatest_incidents_sofar; | |
| if current_city_incidents > greatest_incidents_sofar { | |
| true_greatest_incidents = current_city_incidents; | |
| } | |
| if current_city_incidents < least_incidents_sofar { | |
| true_least_incidents = current_city_incidents; | |
| } | |
| return (true_least_incidents, true_greatest_incidents); | |
| }); | |
| if afghan_city_minmaxincident_pair == starting_tuplet { | |
| return String::from("null"); | |
| } | |
| let mut rank_step_size = (afghan_city_minmaxincident_pair.1 - afghan_city_minmaxincident_pair.0) / 5; | |
| if rank_step_size == 0 { | |
| rank_step_size = 1; | |
| } | |
| let clustered_and_ranked_incidents = afghan_cities_incidents | |
| .iter() | |
| .map(|(city, incidents_list)| (city.to_owned(), incidents_list.len() as u32 / rank_step_size)) | |
| .collect::<HashMap<String, u32>>(); | |
| return match serde_json::to_string(&clustered_and_ranked_incidents) { | |
| Ok(json) => json, | |
| Err(_) => String::from("null") | |
| }; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment