mirror of https://github.com/encounter/objdiff.git
163 lines
5.2 KiB
Rust
163 lines
5.2 KiB
Rust
/// Adapted from https://crates.io/crates/rapidfuzz
|
|
// Copyright 2020 maxbachmann
|
|
//
|
|
// Permission is hereby granted, free of charge, to any
|
|
// person obtaining a copy of this software and associated
|
|
// documentation files (the "Software"), to deal in the
|
|
// Software without restriction, including without
|
|
// limitation the rights to use, copy, modify, merge,
|
|
// publish, distribute, sublicense, and/or sell copies of
|
|
// the Software, and to permit persons to whom the Software
|
|
// is furnished to do so, subject to the following
|
|
// conditions:
|
|
//
|
|
// The above copyright notice and this permission notice
|
|
// shall be included in all copies or substantial portions
|
|
// of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
|
// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
|
// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
|
// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
|
// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
// DEALINGS IN THE SOFTWARE.
|
|
|
|
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
|
|
pub enum LevEditType {
|
|
Replace,
|
|
Insert,
|
|
Delete,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub struct LevEditOp {
|
|
pub op_type: LevEditType, /* editing operation type */
|
|
pub first_start: usize, /* source block position */
|
|
pub second_start: usize, /* destination position */
|
|
}
|
|
|
|
pub fn editops_find<T>(query: &[T], choice: &[T]) -> Vec<LevEditOp>
|
|
where T: PartialEq {
|
|
let Affix { prefix_len, suffix_len } = Affix::find(query, choice);
|
|
|
|
let first_string = &query[prefix_len..query.len() - suffix_len];
|
|
let second_string = &choice[prefix_len..choice.len() - suffix_len];
|
|
|
|
let matrix_columns = first_string.len() + 1;
|
|
let matrix_rows = second_string.len() + 1;
|
|
|
|
// TODO maybe use an actual matrix for readability
|
|
let mut cache_matrix: Vec<usize> = vec![0; matrix_rows * matrix_columns];
|
|
for (i, elem) in cache_matrix.iter_mut().enumerate().take(matrix_rows) {
|
|
*elem = i;
|
|
}
|
|
for i in 1..matrix_columns {
|
|
cache_matrix[matrix_rows * i] = i;
|
|
}
|
|
|
|
for (i, char1) in first_string.iter().enumerate() {
|
|
let mut prev = i * matrix_rows;
|
|
let current = prev + matrix_rows;
|
|
let mut x = i + 1;
|
|
for (p, char2p) in second_string.iter().enumerate() {
|
|
let mut c3 = cache_matrix[prev] + (char1 != char2p) as usize;
|
|
prev += 1;
|
|
x += 1;
|
|
if x >= c3 {
|
|
x = c3;
|
|
}
|
|
c3 = cache_matrix[prev] + 1;
|
|
if x > c3 {
|
|
x = c3;
|
|
}
|
|
cache_matrix[current + 1 + p] = x;
|
|
}
|
|
}
|
|
editops_from_cost_matrix(matrix_columns, matrix_rows, prefix_len, cache_matrix)
|
|
}
|
|
|
|
fn editops_from_cost_matrix(
|
|
len1: usize,
|
|
len2: usize,
|
|
prefix_len: usize,
|
|
cache_matrix: Vec<usize>,
|
|
) -> Vec<LevEditOp> {
|
|
let mut ops = Vec::with_capacity(cache_matrix[len1 * len2 - 1]);
|
|
let mut dir = 0;
|
|
let mut i = len1 - 1;
|
|
let mut j = len2 - 1;
|
|
let mut p = len1 * len2 - 1;
|
|
|
|
//TODO this is still pretty ugly
|
|
while i > 0 || j > 0 {
|
|
let current_value = cache_matrix[p];
|
|
|
|
// More than one operation can be possible at a time. We use `dir` to
|
|
// decide when ambiguous.
|
|
let is_insert = j > 0 && current_value == cache_matrix[p - 1] + 1;
|
|
let is_delete = i > 0 && current_value == cache_matrix[p - len2] + 1;
|
|
let is_replace = i > 0 && j > 0 && current_value == cache_matrix[p - len2 - 1] + 1;
|
|
|
|
let (op_type, new_dir) = match (dir, is_insert, is_delete, is_replace) {
|
|
(_, false, false, false) => (None, 0),
|
|
(-1, true, _, _) => (Some(LevEditType::Insert), -1),
|
|
(1, _, true, _) => (Some(LevEditType::Delete), 1),
|
|
(_, _, _, true) => (Some(LevEditType::Replace), 0),
|
|
(0, true, _, _) => (Some(LevEditType::Insert), -1),
|
|
(0, _, true, _) => (Some(LevEditType::Delete), 1),
|
|
_ => panic!("something went terribly wrong"),
|
|
};
|
|
|
|
match new_dir {
|
|
-1 => {
|
|
j -= 1;
|
|
p -= 1;
|
|
}
|
|
1 => {
|
|
i -= 1;
|
|
p -= len2;
|
|
}
|
|
0 => {
|
|
i -= 1;
|
|
j -= 1;
|
|
p -= len2 + 1;
|
|
}
|
|
_ => panic!("something went terribly wrong"),
|
|
};
|
|
dir = new_dir;
|
|
|
|
if let Some(op_type) = op_type {
|
|
ops.insert(0, LevEditOp {
|
|
op_type,
|
|
first_start: i + prefix_len,
|
|
second_start: j + prefix_len,
|
|
});
|
|
}
|
|
}
|
|
|
|
ops
|
|
}
|
|
|
|
pub struct Affix {
|
|
pub prefix_len: usize,
|
|
pub suffix_len: usize,
|
|
}
|
|
|
|
impl Affix {
|
|
pub fn find<T>(s1: &[T], s2: &[T]) -> Affix
|
|
where T: PartialEq {
|
|
let prefix_len = s1.iter().zip(s2.iter()).take_while(|t| t.0 == t.1).count();
|
|
let suffix_len = s1[prefix_len..]
|
|
.iter()
|
|
.rev()
|
|
.zip(s2[prefix_len..].iter().rev())
|
|
.take_while(|t| t.0 == t.1)
|
|
.count();
|
|
|
|
Affix { prefix_len, suffix_len }
|
|
}
|
|
}
|