Simplify Affix::find (#24)

* Rewrite Affix::find to be much simpler

* Rename Affix::find parameters to not be string

* Remove unused `LevMatchingBlock` struct

* Make `Affix` type simpler
This commit is contained in:
Nick Condron 2023-01-21 01:28:33 -05:00 committed by GitHub
parent 6afc535fad
commit 3406c76973
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 22 additions and 77 deletions

View File

@ -40,25 +40,18 @@ pub struct LevEditOp {
pub second_start: usize, /* destination position */
}
#[derive(Debug, PartialEq, Eq)]
pub struct LevMatchingBlock {
pub first_start: usize,
pub second_start: usize,
pub len: usize,
}
pub fn editops_find<T>(query: &[T], choice: &[T]) -> Vec<LevEditOp>
where T: PartialEq {
let string_affix = Affix::find(query, choice);
let Affix {
prefix_len,
suffix_len,
} = Affix::find(query, choice);
let first_string_len = string_affix.first_string_len;
let second_string_len = string_affix.second_string_len;
let prefix_len = string_affix.prefix_len;
let first_string = &query[prefix_len..prefix_len + first_string_len];
let second_string = &choice[prefix_len..prefix_len + second_string_len];
let first_string = &query[prefix_len..query.len() - suffix_len];
let second_string = &choice[prefix_len..choice.len() - suffix_len];
let matrix_columns = first_string_len + 1;
let matrix_rows = second_string_len + 1;
let matrix_columns = first_string.len() + 1;
let matrix_rows = second_string.len() + 1;
// TODO maybe use an actual matrix for readability
let mut cache_matrix: Vec<usize> = vec![0; matrix_rows * matrix_columns];
@ -186,73 +179,25 @@ where
pub struct Affix {
pub prefix_len: usize,
pub first_string_len: usize,
pub second_string_len: usize,
pub suffix_len: usize,
}
impl Affix {
pub fn find<T>(first_string: &[T], second_string: &[T]) -> Affix
pub fn find<T>(s1: &[T], s2: &[T]) -> Affix
where T: PartialEq {
// remove common prefix and suffix (linear vs square runtime for levensthein)
let mut first_iter = first_string.iter();
let mut second_iter = second_string.iter();
let prefix_len = s1.iter()
.zip(s2.iter())
.take_while(|t| t.0 == t.1)
.count();
let suffix_len = s1[prefix_len..].iter()
.rev()
.zip(s2[prefix_len..].iter().rev())
.take_while(|t| t.0 == t.1)
.count();
let mut limit_start = 0;
let mut first_iter_char = first_iter.next();
let mut second_iter_char = second_iter.next();
while first_iter_char.is_some() && first_iter_char == second_iter_char {
first_iter_char = first_iter.next();
second_iter_char = second_iter.next();
limit_start += 1;
}
// save char since the iterator was already consumed
let first_iter_cache = first_iter_char;
let second_iter_cache = second_iter_char;
if second_iter_char.is_some() && first_iter_char.is_some() {
first_iter_char = first_iter.next_back();
second_iter_char = second_iter.next_back();
while first_iter_char.is_some() && first_iter_char == second_iter_char {
first_iter_char = first_iter.next_back();
second_iter_char = second_iter.next_back();
}
}
match (first_iter_char, second_iter_char) {
(None, None) => {
// characters might not match even though they were consumed
let remaining_char = (first_iter_cache != second_iter_cache) as usize;
Affix {
prefix_len: limit_start,
first_string_len: remaining_char,
second_string_len: remaining_char,
}
}
(None, _) => {
let remaining_char =
(first_iter_cache.is_some() && first_iter_cache != second_iter_char) as usize;
Affix {
prefix_len: limit_start,
first_string_len: remaining_char,
second_string_len: second_iter.count() + 1 + remaining_char,
}
}
(_, None) => {
let remaining_char =
(second_iter_cache.is_some() && second_iter_cache != first_iter_char) as usize;
Affix {
prefix_len: limit_start,
first_string_len: first_iter.count() + 1 + remaining_char,
second_string_len: remaining_char,
}
}
_ => Affix {
prefix_len: limit_start,
first_string_len: first_iter.count() + 2,
second_string_len: second_iter.count() + 2,
},
Affix {
prefix_len,
suffix_len,
}
}
}