Created
December 15, 2017 17:10
-
-
Save nightpool/b471cafff0216d822df2ba50d3fa4d07 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashSet; | |
use std::fmt; | |
use std::fs::File; | |
use std::io::BufReader; | |
use std::io::prelude::*; | |
#[derive(Clone, Debug)] | |
enum Edit { | |
None, | |
Add, | |
Del, | |
Sub | |
} | |
struct Problem<'a> { | |
needle: &'a [u8], | |
haystack: &'a [u8], | |
max_edit: usize | |
} | |
impl <'a> Problem<'a> { | |
fn last_index(&self) -> usize { | |
return self.needle.len() - 1; | |
} | |
} | |
type Table = Vec<Vec<(usize, Edit)>>; | |
fn main() { | |
let file = File::open("rosalind_ksim.txt").unwrap(); | |
let mut lines = BufReader::new(file).lines().map(|l| l.unwrap()); | |
let max_edit_str = lines.next().expect("should be 3 lines!"); | |
let needle = lines.next().expect("should be 3 lines!"); | |
let haystack = lines.next().expect("should be 3 lines!"); | |
let max_edit = max_edit_str.parse::<usize>().expect("max_edit should be a string"); | |
let problem = Problem { | |
needle: needle.as_bytes(), | |
haystack: haystack.as_bytes(), | |
max_edit: max_edit | |
}; | |
let mut results = HashSet::new(); | |
for result in search(&problem, false) { | |
results.insert(result); | |
}; | |
let rev_needle: String = needle.chars().rev().collect(); | |
let rev_haystack: String = haystack.chars().rev().collect(); | |
let rev_problem = Problem { | |
needle: rev_needle.as_bytes(), | |
haystack: rev_haystack.as_bytes(), | |
max_edit: max_edit | |
}; | |
search(&rev_problem, true).iter() | |
.map(|&(start, len)| (haystack.len() - (start - 1) - len + 1, len)) | |
.for_each(|i| {results.insert(i);}); | |
for (start, len) in results { | |
println!("{} {}", start, len); | |
} | |
} | |
fn search(problem: &Problem, _debug: bool) -> Vec<(usize, usize)> { | |
let mut table: Table = vec![vec![(0, Edit::None); problem.haystack.len()]; problem.needle.len()]; | |
for (n_index, n) in problem.needle.iter().enumerate() { | |
for (h_index, h) in problem.haystack.iter().enumerate() { | |
let (add_cost, del_cost, sub_cost); | |
{ | |
let prev_row = table.get(n_index - 1).map(|r| r[h_index].0); | |
let prev_col = table[n_index].get(h_index - 1).map(|i| i.0); | |
let prev_row_col = table.get(n_index - 1).map(|i| i.get(h_index - 1).map(|i| i.0)); | |
let match_cost = if n == h { 0 } else { 1 }; | |
add_cost = prev_row.unwrap_or(0) + 1; | |
del_cost = prev_col.unwrap_or(n_index + 1) + 1; | |
sub_cost = prev_row_col.map_or(0, |i| i.unwrap_or(n_index + 1)) + match_cost; | |
} | |
if add_cost < sub_cost && add_cost < del_cost { | |
table[n_index][h_index] = (add_cost, Edit::Add); | |
} else if del_cost < sub_cost { | |
table[n_index][h_index] = (del_cost, Edit::Del); | |
} else { | |
table[n_index][h_index] = (sub_cost, Edit::Sub); | |
} | |
} | |
} | |
let mut result = Vec::new(); | |
for (h_index, &(value, _)) in table[problem.last_index()].iter().enumerate() { | |
if value <= problem.max_edit { | |
let len = reconstruct_len( | |
&table, &problem, | |
problem.last_index(), h_index | |
); | |
result.push((h_index + 2 - len, len)); | |
// let (needle_sub, haystack_sub) = reconstruct_strings(&table, problem, problem.last_index(), h_index); | |
// println!("{:?}", (h_index + 2 - len, len)); | |
// println!("{}/{}\n{}\n{}\n", value.0, len, needle_sub, haystack_sub); | |
} | |
} | |
result | |
} | |
fn reconstruct_len(table: &Table, problem: &Problem, | |
n_index: usize, h_index: usize) -> usize { | |
let value = table.get(n_index) | |
.and_then(|i| i.get(h_index)) | |
.unwrap_or(&(0, Edit::None)); | |
match value.1 { | |
Edit::None => 0, | |
Edit::Add => 0 + reconstruct_len(table, problem, n_index - 1, h_index), | |
Edit::Del => 1 + reconstruct_len(table, problem, n_index, h_index - 1), | |
Edit::Sub => 1 + reconstruct_len(table, problem, n_index - 1, h_index - 1) | |
} | |
} | |
/*** | |
*** Some debugging code | |
***/ | |
#[allow(dead_code)] | |
fn reconstruct_strings(table: &Table, problem: &Problem, n_index: usize, h_index: usize) -> (String, String) { | |
// note-- this is subtly broken because we "fake" h_index = 0 to be (n_index, Edit::Add) | |
// so reconstructions won't look exactly right even though they have the right cost/length | |
let value = table.get(n_index).and_then(|i| i.get(h_index)).unwrap_or(&(0, Edit::None)); | |
// println!("{:?}", value.1); | |
match value.1 { | |
Edit::None => (String::new(), String::new()), | |
Edit::Add => { | |
let (mut top, mut bottom) = reconstruct_strings(table, problem, n_index - 1, h_index); | |
top.push(char::from(problem.needle[n_index])); | |
bottom.push('-'); | |
(top, bottom) | |
}, | |
Edit::Del => { | |
let (mut top, mut bottom) = reconstruct_strings(table, problem, n_index, h_index - 1); | |
top.push('-'); | |
bottom.push(char::from(problem.haystack[h_index])); | |
(top, bottom) | |
}, | |
Edit::Sub => { | |
let (mut top, mut bottom) = reconstruct_strings(table, problem, n_index - 1, h_index - 1); | |
top.push(char::from(problem.needle[n_index])); | |
bottom.push(char::from(problem.haystack[h_index])); | |
(top, bottom) | |
} | |
} | |
} | |
impl fmt::Display for Edit { | |
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
write!(f, "{}", match self { | |
&Edit::Add => "^", | |
&Edit::Del => "<", | |
&Edit::Sub => "\\", | |
&Edit::None => "*", | |
}) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment