diff options
| author | 魏曹先生 <1992414357@qq.com> | 2026-05-22 22:10:19 +0800 |
|---|---|---|
| committer | 魏曹先生 <1992414357@qq.com> | 2026-06-17 21:02:05 +0800 |
| commit | 5a5a07c7fad31641d032a743e4e87ffb58ade17d (patch) | |
| tree | b6fbd33e8de82e5f9d9e6b99e3cb2102e47fe3ee /rola-utils/functions/src/levenshtein_distance.rs | |
Initial commitmain
Diffstat (limited to 'rola-utils/functions/src/levenshtein_distance.rs')
| -rw-r--r-- | rola-utils/functions/src/levenshtein_distance.rs | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/rola-utils/functions/src/levenshtein_distance.rs b/rola-utils/functions/src/levenshtein_distance.rs new file mode 100644 index 0000000..7b8be6c --- /dev/null +++ b/rola-utils/functions/src/levenshtein_distance.rs @@ -0,0 +1,117 @@ +pub struct LevenshteinDistance; + +impl LevenshteinDistance { + pub fn filter_similar<'a>(str: &str, slice: &[&'a str], threshold: usize) -> Vec<&'a str> { + slice + .iter() + .filter(|s| levenshtein_distance(str, s) <= threshold) + .copied() + .collect() + } + + pub fn compare(a: impl AsRef<str>, b: impl AsRef<str>) -> usize { + let a = a.as_ref(); + let b = b.as_ref(); + levenshtein_distance(a, b) + } +} + +fn levenshtein_distance(a: &str, b: &str) -> usize { + let a_chars: Vec<char> = a.chars().collect(); + let b_chars: Vec<char> = b.chars().collect(); + let m = a_chars.len(); + let n = b_chars.len(); + if m == 0 { + return n; + } + if n == 0 { + return m; + } + + let mut prev: Vec<usize> = (0..=n).collect(); + let mut curr = vec![0; n + 1]; + for (i, ca) in a_chars.iter().enumerate() { + curr[0] = i + 1; + for (j, cb) in b_chars.iter().enumerate() { + let cost = if ca == cb { 0 } else { 1 }; + let del = prev[j + 1] + 1; + let ins = curr[j] + 1; + let rep = prev[j] + cost; + curr[j + 1] = del.min(ins).min(rep); + } + std::mem::swap(&mut prev, &mut curr); + } + prev[n] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_levenshtein_distance_identical_strings() { + assert_eq!(LevenshteinDistance::compare("hello", "hello"), 0); + } + + #[test] + fn test_levenshtein_distance_completely_different() { + let dist = LevenshteinDistance::compare("abc", "xyz"); + assert_eq!(dist, 3); + } + + #[test] + fn test_levenshtein_distance_one_insertion() { + assert_eq!(LevenshteinDistance::compare("cat", "cats"), 1); + } + + #[test] + fn test_levenshtein_distance_one_deletion() { + assert_eq!(LevenshteinDistance::compare("dogs", "dog"), 1); + } + + #[test] + fn test_levenshtein_distance_one_substitution() { + assert_eq!(LevenshteinDistance::compare("cat", "cut"), 1); + } + + #[test] + fn test_levenshtein_distance_empty_strings() { + assert_eq!(LevenshteinDistance::compare("", ""), 0); + } + + #[test] + fn test_levenshtein_distance_empty_vs_nonempty() { + assert_eq!(LevenshteinDistance::compare("", "abc"), 3); + assert_eq!(LevenshteinDistance::compare("xyz", ""), 3); + } + + #[test] + fn test_levenshtein_distance_unicode() { + // Chinese characters + assert_eq!(LevenshteinDistance::compare("你好", "你好"), 0); + assert_eq!(LevenshteinDistance::compare("你好", "您好"), 1); + assert_eq!(LevenshteinDistance::compare("你好", "您不好"), 2); + } + + #[test] + fn test_filter_similar_exact_threshold() { + let words = vec!["cat", "cart", "ca", "cats", "cut"]; + let result = LevenshteinDistance::filter_similar("cat", &words, 2); + // cat -> cat: 0, cat -> cart: 1, cat -> ca: 1, cat -> cats: 1, cat -> cut: 1 + assert_eq!(result.len(), 5); + } + + #[test] + fn test_filter_similar_empty_slice() { + let words: Vec<&str> = vec![]; + let result = LevenshteinDistance::filter_similar("hello", &words, 3); + assert!(result.is_empty()); + } + + #[test] + fn test_filter_similar_threshold_zero() { + let words = vec!["hello", "hallo", "hello!"]; + let result = LevenshteinDistance::filter_similar("hello", &words, 0); + assert_eq!(result, vec!["hello"]); + } +} |
