diff options
| author | 魏曹先生 <1992414357@qq.com> | 2026-06-17 21:33:40 +0800 |
|---|---|---|
| committer | 魏曹先生 <1992414357@qq.com> | 2026-06-17 21:57:13 +0800 |
| commit | ad0943f88ba9f5ed6eae198ecb4835c0f44701de (patch) | |
| tree | 6a8eb1fee42e7ee173e74788dafeb27c52429683 /rola-utils/functions | |
chore: initialize project structure and add core modules
Diffstat (limited to 'rola-utils/functions')
| -rw-r--r-- | rola-utils/functions/Cargo.toml | 8 | ||||
| -rw-r--r-- | rola-utils/functions/src/levenshtein_distance.rs | 117 | ||||
| -rw-r--r-- | rola-utils/functions/src/lib.rs | 2 |
3 files changed, 127 insertions, 0 deletions
diff --git a/rola-utils/functions/Cargo.toml b/rola-utils/functions/Cargo.toml new file mode 100644 index 0000000..dd482c4 --- /dev/null +++ b/rola-utils/functions/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "shared_functions" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true + +[dependencies] diff --git a/rola-utils/functions/src/levenshtein_distance.rs b/rola-utils/functions/src/levenshtein_distance.rs new file mode 100644 index 0000000..7b8be6c --- /dev/null +++ b/rola-utils/functions/src/levenshtein_distance.rs @@ -0,0 +1,117 @@ +pub struct LevenshteinDistance; + +impl LevenshteinDistance { + pub fn filter_similar<'a>(str: &str, slice: &[&'a str], threshold: usize) -> Vec<&'a str> { + slice + .iter() + .filter(|s| levenshtein_distance(str, s) <= threshold) + .copied() + .collect() + } + + pub fn compare(a: impl AsRef<str>, b: impl AsRef<str>) -> usize { + let a = a.as_ref(); + let b = b.as_ref(); + levenshtein_distance(a, b) + } +} + +fn levenshtein_distance(a: &str, b: &str) -> usize { + let a_chars: Vec<char> = a.chars().collect(); + let b_chars: Vec<char> = b.chars().collect(); + let m = a_chars.len(); + let n = b_chars.len(); + if m == 0 { + return n; + } + if n == 0 { + return m; + } + + let mut prev: Vec<usize> = (0..=n).collect(); + let mut curr = vec![0; n + 1]; + for (i, ca) in a_chars.iter().enumerate() { + curr[0] = i + 1; + for (j, cb) in b_chars.iter().enumerate() { + let cost = if ca == cb { 0 } else { 1 }; + let del = prev[j + 1] + 1; + let ins = curr[j] + 1; + let rep = prev[j] + cost; + curr[j + 1] = del.min(ins).min(rep); + } + std::mem::swap(&mut prev, &mut curr); + } + prev[n] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_levenshtein_distance_identical_strings() { + assert_eq!(LevenshteinDistance::compare("hello", "hello"), 0); + } + + #[test] + fn test_levenshtein_distance_completely_different() { + let dist = LevenshteinDistance::compare("abc", "xyz"); + assert_eq!(dist, 3); + } + + #[test] + fn test_levenshtein_distance_one_insertion() { + assert_eq!(LevenshteinDistance::compare("cat", "cats"), 1); + } + + #[test] + fn test_levenshtein_distance_one_deletion() { + assert_eq!(LevenshteinDistance::compare("dogs", "dog"), 1); + } + + #[test] + fn test_levenshtein_distance_one_substitution() { + assert_eq!(LevenshteinDistance::compare("cat", "cut"), 1); + } + + #[test] + fn test_levenshtein_distance_empty_strings() { + assert_eq!(LevenshteinDistance::compare("", ""), 0); + } + + #[test] + fn test_levenshtein_distance_empty_vs_nonempty() { + assert_eq!(LevenshteinDistance::compare("", "abc"), 3); + assert_eq!(LevenshteinDistance::compare("xyz", ""), 3); + } + + #[test] + fn test_levenshtein_distance_unicode() { + // Chinese characters + assert_eq!(LevenshteinDistance::compare("你好", "你好"), 0); + assert_eq!(LevenshteinDistance::compare("你好", "您好"), 1); + assert_eq!(LevenshteinDistance::compare("你好", "您不好"), 2); + } + + #[test] + fn test_filter_similar_exact_threshold() { + let words = vec!["cat", "cart", "ca", "cats", "cut"]; + let result = LevenshteinDistance::filter_similar("cat", &words, 2); + // cat -> cat: 0, cat -> cart: 1, cat -> ca: 1, cat -> cats: 1, cat -> cut: 1 + assert_eq!(result.len(), 5); + } + + #[test] + fn test_filter_similar_empty_slice() { + let words: Vec<&str> = vec![]; + let result = LevenshteinDistance::filter_similar("hello", &words, 3); + assert!(result.is_empty()); + } + + #[test] + fn test_filter_similar_threshold_zero() { + let words = vec!["hello", "hallo", "hello!"]; + let result = LevenshteinDistance::filter_similar("hello", &words, 0); + assert_eq!(result, vec!["hello"]); + } +} diff --git a/rola-utils/functions/src/lib.rs b/rola-utils/functions/src/lib.rs new file mode 100644 index 0000000..ff2ee7e --- /dev/null +++ b/rola-utils/functions/src/lib.rs @@ -0,0 +1,2 @@ +mod levenshtein_distance; +pub use levenshtein_distance::*; |
