mirror of https://github.com/encounter/objdiff.git
Simplify Affix::find (#24)
* Rewrite Affix::find to be much simpler * Rename Affix::find parameters to not be string * Remove unused `LevMatchingBlock` struct * Make `Affix` type simpler
This commit is contained in:
parent
6afc535fad
commit
3406c76973
|
@ -40,25 +40,18 @@ pub struct LevEditOp {
|
||||||
pub second_start: usize, /* destination position */
|
pub second_start: usize, /* destination position */
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
pub struct LevMatchingBlock {
|
|
||||||
pub first_start: usize,
|
|
||||||
pub second_start: usize,
|
|
||||||
pub len: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn editops_find<T>(query: &[T], choice: &[T]) -> Vec<LevEditOp>
|
pub fn editops_find<T>(query: &[T], choice: &[T]) -> Vec<LevEditOp>
|
||||||
where T: PartialEq {
|
where T: PartialEq {
|
||||||
let string_affix = Affix::find(query, choice);
|
let Affix {
|
||||||
|
prefix_len,
|
||||||
|
suffix_len,
|
||||||
|
} = Affix::find(query, choice);
|
||||||
|
|
||||||
let first_string_len = string_affix.first_string_len;
|
let first_string = &query[prefix_len..query.len() - suffix_len];
|
||||||
let second_string_len = string_affix.second_string_len;
|
let second_string = &choice[prefix_len..choice.len() - suffix_len];
|
||||||
let prefix_len = string_affix.prefix_len;
|
|
||||||
let first_string = &query[prefix_len..prefix_len + first_string_len];
|
|
||||||
let second_string = &choice[prefix_len..prefix_len + second_string_len];
|
|
||||||
|
|
||||||
let matrix_columns = first_string_len + 1;
|
let matrix_columns = first_string.len() + 1;
|
||||||
let matrix_rows = second_string_len + 1;
|
let matrix_rows = second_string.len() + 1;
|
||||||
|
|
||||||
// TODO maybe use an actual matrix for readability
|
// TODO maybe use an actual matrix for readability
|
||||||
let mut cache_matrix: Vec<usize> = vec![0; matrix_rows * matrix_columns];
|
let mut cache_matrix: Vec<usize> = vec![0; matrix_rows * matrix_columns];
|
||||||
|
@ -186,73 +179,25 @@ where
|
||||||
|
|
||||||
pub struct Affix {
|
pub struct Affix {
|
||||||
pub prefix_len: usize,
|
pub prefix_len: usize,
|
||||||
pub first_string_len: usize,
|
pub suffix_len: usize,
|
||||||
pub second_string_len: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Affix {
|
impl Affix {
|
||||||
pub fn find<T>(first_string: &[T], second_string: &[T]) -> Affix
|
pub fn find<T>(s1: &[T], s2: &[T]) -> Affix
|
||||||
where T: PartialEq {
|
where T: PartialEq {
|
||||||
// remove common prefix and suffix (linear vs square runtime for levensthein)
|
let prefix_len = s1.iter()
|
||||||
let mut first_iter = first_string.iter();
|
.zip(s2.iter())
|
||||||
let mut second_iter = second_string.iter();
|
.take_while(|t| t.0 == t.1)
|
||||||
|
.count();
|
||||||
|
let suffix_len = s1[prefix_len..].iter()
|
||||||
|
.rev()
|
||||||
|
.zip(s2[prefix_len..].iter().rev())
|
||||||
|
.take_while(|t| t.0 == t.1)
|
||||||
|
.count();
|
||||||
|
|
||||||
let mut limit_start = 0;
|
Affix {
|
||||||
|
prefix_len,
|
||||||
let mut first_iter_char = first_iter.next();
|
suffix_len,
|
||||||
let mut second_iter_char = second_iter.next();
|
|
||||||
while first_iter_char.is_some() && first_iter_char == second_iter_char {
|
|
||||||
first_iter_char = first_iter.next();
|
|
||||||
second_iter_char = second_iter.next();
|
|
||||||
limit_start += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// save char since the iterator was already consumed
|
|
||||||
let first_iter_cache = first_iter_char;
|
|
||||||
let second_iter_cache = second_iter_char;
|
|
||||||
|
|
||||||
if second_iter_char.is_some() && first_iter_char.is_some() {
|
|
||||||
first_iter_char = first_iter.next_back();
|
|
||||||
second_iter_char = second_iter.next_back();
|
|
||||||
while first_iter_char.is_some() && first_iter_char == second_iter_char {
|
|
||||||
first_iter_char = first_iter.next_back();
|
|
||||||
second_iter_char = second_iter.next_back();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
match (first_iter_char, second_iter_char) {
|
|
||||||
(None, None) => {
|
|
||||||
// characters might not match even though they were consumed
|
|
||||||
let remaining_char = (first_iter_cache != second_iter_cache) as usize;
|
|
||||||
Affix {
|
|
||||||
prefix_len: limit_start,
|
|
||||||
first_string_len: remaining_char,
|
|
||||||
second_string_len: remaining_char,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(None, _) => {
|
|
||||||
let remaining_char =
|
|
||||||
(first_iter_cache.is_some() && first_iter_cache != second_iter_char) as usize;
|
|
||||||
Affix {
|
|
||||||
prefix_len: limit_start,
|
|
||||||
first_string_len: remaining_char,
|
|
||||||
second_string_len: second_iter.count() + 1 + remaining_char,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(_, None) => {
|
|
||||||
let remaining_char =
|
|
||||||
(second_iter_cache.is_some() && second_iter_cache != first_iter_char) as usize;
|
|
||||||
Affix {
|
|
||||||
prefix_len: limit_start,
|
|
||||||
first_string_len: first_iter.count() + 1 + remaining_char,
|
|
||||||
second_string_len: remaining_char,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => Affix {
|
|
||||||
prefix_len: limit_start,
|
|
||||||
first_string_len: first_iter.count() + 2,
|
|
||||||
second_string_len: second_iter.count() + 2,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue