From 5a5a07c7fad31641d032a743e4e87ffb58ade17d Mon Sep 17 00:00:00 2001 From: 魏曹先生 <1992414357@qq.com> Date: Fri, 22 May 2026 22:10:19 +0800 Subject: Initial commit --- rola-utils/functions/src/levenshtein_distance.rs | 117 +++++++++++++++++++++++ rola-utils/functions/src/lib.rs | 2 + 2 files changed, 119 insertions(+) create mode 100644 rola-utils/functions/src/levenshtein_distance.rs create mode 100644 rola-utils/functions/src/lib.rs (limited to 'rola-utils/functions/src') diff --git a/rola-utils/functions/src/levenshtein_distance.rs b/rola-utils/functions/src/levenshtein_distance.rs new file mode 100644 index 0000000..7b8be6c --- /dev/null +++ b/rola-utils/functions/src/levenshtein_distance.rs @@ -0,0 +1,117 @@ +pub struct LevenshteinDistance; + +impl LevenshteinDistance { + pub fn filter_similar<'a>(str: &str, slice: &[&'a str], threshold: usize) -> Vec<&'a str> { + slice + .iter() + .filter(|s| levenshtein_distance(str, s) <= threshold) + .copied() + .collect() + } + + pub fn compare(a: impl AsRef, b: impl AsRef) -> usize { + let a = a.as_ref(); + let b = b.as_ref(); + levenshtein_distance(a, b) + } +} + +fn levenshtein_distance(a: &str, b: &str) -> usize { + let a_chars: Vec = a.chars().collect(); + let b_chars: Vec = b.chars().collect(); + let m = a_chars.len(); + let n = b_chars.len(); + if m == 0 { + return n; + } + if n == 0 { + return m; + } + + let mut prev: Vec = (0..=n).collect(); + let mut curr = vec![0; n + 1]; + for (i, ca) in a_chars.iter().enumerate() { + curr[0] = i + 1; + for (j, cb) in b_chars.iter().enumerate() { + let cost = if ca == cb { 0 } else { 1 }; + let del = prev[j + 1] + 1; + let ins = curr[j] + 1; + let rep = prev[j] + cost; + curr[j + 1] = del.min(ins).min(rep); + } + std::mem::swap(&mut prev, &mut curr); + } + prev[n] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_levenshtein_distance_identical_strings() { + assert_eq!(LevenshteinDistance::compare("hello", "hello"), 0); + } + + #[test] + fn test_levenshtein_distance_completely_different() { + let dist = LevenshteinDistance::compare("abc", "xyz"); + assert_eq!(dist, 3); + } + + #[test] + fn test_levenshtein_distance_one_insertion() { + assert_eq!(LevenshteinDistance::compare("cat", "cats"), 1); + } + + #[test] + fn test_levenshtein_distance_one_deletion() { + assert_eq!(LevenshteinDistance::compare("dogs", "dog"), 1); + } + + #[test] + fn test_levenshtein_distance_one_substitution() { + assert_eq!(LevenshteinDistance::compare("cat", "cut"), 1); + } + + #[test] + fn test_levenshtein_distance_empty_strings() { + assert_eq!(LevenshteinDistance::compare("", ""), 0); + } + + #[test] + fn test_levenshtein_distance_empty_vs_nonempty() { + assert_eq!(LevenshteinDistance::compare("", "abc"), 3); + assert_eq!(LevenshteinDistance::compare("xyz", ""), 3); + } + + #[test] + fn test_levenshtein_distance_unicode() { + // Chinese characters + assert_eq!(LevenshteinDistance::compare("你好", "你好"), 0); + assert_eq!(LevenshteinDistance::compare("你好", "您好"), 1); + assert_eq!(LevenshteinDistance::compare("你好", "您不好"), 2); + } + + #[test] + fn test_filter_similar_exact_threshold() { + let words = vec!["cat", "cart", "ca", "cats", "cut"]; + let result = LevenshteinDistance::filter_similar("cat", &words, 2); + // cat -> cat: 0, cat -> cart: 1, cat -> ca: 1, cat -> cats: 1, cat -> cut: 1 + assert_eq!(result.len(), 5); + } + + #[test] + fn test_filter_similar_empty_slice() { + let words: Vec<&str> = vec![]; + let result = LevenshteinDistance::filter_similar("hello", &words, 3); + assert!(result.is_empty()); + } + + #[test] + fn test_filter_similar_threshold_zero() { + let words = vec!["hello", "hallo", "hello!"]; + let result = LevenshteinDistance::filter_similar("hello", &words, 0); + assert_eq!(result, vec!["hello"]); + } +} diff --git a/rola-utils/functions/src/lib.rs b/rola-utils/functions/src/lib.rs new file mode 100644 index 0000000..ff2ee7e --- /dev/null +++ b/rola-utils/functions/src/lib.rs @@ -0,0 +1,2 @@ +mod levenshtein_distance; +pub use levenshtein_distance::*; -- cgit