summaryrefslogtreecommitdiff
path: root/rola-utils/functions
diff options
context:
space:
mode:
author魏曹先生 <1992414357@qq.com>2026-05-22 22:10:19 +0800
committer魏曹先生 <1992414357@qq.com>2026-06-17 21:02:05 +0800
commit5a5a07c7fad31641d032a743e4e87ffb58ade17d (patch)
treeb6fbd33e8de82e5f9d9e6b99e3cb2102e47fe3ee /rola-utils/functions
Initial commitmain
Diffstat (limited to 'rola-utils/functions')
-rw-r--r--rola-utils/functions/Cargo.toml8
-rw-r--r--rola-utils/functions/src/levenshtein_distance.rs117
-rw-r--r--rola-utils/functions/src/lib.rs2
3 files changed, 127 insertions, 0 deletions
diff --git a/rola-utils/functions/Cargo.toml b/rola-utils/functions/Cargo.toml
new file mode 100644
index 0000000..dd482c4
--- /dev/null
+++ b/rola-utils/functions/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "shared_functions"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+
+[dependencies]
diff --git a/rola-utils/functions/src/levenshtein_distance.rs b/rola-utils/functions/src/levenshtein_distance.rs
new file mode 100644
index 0000000..7b8be6c
--- /dev/null
+++ b/rola-utils/functions/src/levenshtein_distance.rs
@@ -0,0 +1,117 @@
+pub struct LevenshteinDistance;
+
+impl LevenshteinDistance {
+ pub fn filter_similar<'a>(str: &str, slice: &[&'a str], threshold: usize) -> Vec<&'a str> {
+ slice
+ .iter()
+ .filter(|s| levenshtein_distance(str, s) <= threshold)
+ .copied()
+ .collect()
+ }
+
+ pub fn compare(a: impl AsRef<str>, b: impl AsRef<str>) -> usize {
+ let a = a.as_ref();
+ let b = b.as_ref();
+ levenshtein_distance(a, b)
+ }
+}
+
+fn levenshtein_distance(a: &str, b: &str) -> usize {
+ let a_chars: Vec<char> = a.chars().collect();
+ let b_chars: Vec<char> = b.chars().collect();
+ let m = a_chars.len();
+ let n = b_chars.len();
+ if m == 0 {
+ return n;
+ }
+ if n == 0 {
+ return m;
+ }
+
+ let mut prev: Vec<usize> = (0..=n).collect();
+ let mut curr = vec![0; n + 1];
+ for (i, ca) in a_chars.iter().enumerate() {
+ curr[0] = i + 1;
+ for (j, cb) in b_chars.iter().enumerate() {
+ let cost = if ca == cb { 0 } else { 1 };
+ let del = prev[j + 1] + 1;
+ let ins = curr[j] + 1;
+ let rep = prev[j] + cost;
+ curr[j + 1] = del.min(ins).min(rep);
+ }
+ std::mem::swap(&mut prev, &mut curr);
+ }
+ prev[n]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_levenshtein_distance_identical_strings() {
+ assert_eq!(LevenshteinDistance::compare("hello", "hello"), 0);
+ }
+
+ #[test]
+ fn test_levenshtein_distance_completely_different() {
+ let dist = LevenshteinDistance::compare("abc", "xyz");
+ assert_eq!(dist, 3);
+ }
+
+ #[test]
+ fn test_levenshtein_distance_one_insertion() {
+ assert_eq!(LevenshteinDistance::compare("cat", "cats"), 1);
+ }
+
+ #[test]
+ fn test_levenshtein_distance_one_deletion() {
+ assert_eq!(LevenshteinDistance::compare("dogs", "dog"), 1);
+ }
+
+ #[test]
+ fn test_levenshtein_distance_one_substitution() {
+ assert_eq!(LevenshteinDistance::compare("cat", "cut"), 1);
+ }
+
+ #[test]
+ fn test_levenshtein_distance_empty_strings() {
+ assert_eq!(LevenshteinDistance::compare("", ""), 0);
+ }
+
+ #[test]
+ fn test_levenshtein_distance_empty_vs_nonempty() {
+ assert_eq!(LevenshteinDistance::compare("", "abc"), 3);
+ assert_eq!(LevenshteinDistance::compare("xyz", ""), 3);
+ }
+
+ #[test]
+ fn test_levenshtein_distance_unicode() {
+ // Chinese characters
+ assert_eq!(LevenshteinDistance::compare("你好", "你好"), 0);
+ assert_eq!(LevenshteinDistance::compare("你好", "您好"), 1);
+ assert_eq!(LevenshteinDistance::compare("你好", "您不好"), 2);
+ }
+
+ #[test]
+ fn test_filter_similar_exact_threshold() {
+ let words = vec!["cat", "cart", "ca", "cats", "cut"];
+ let result = LevenshteinDistance::filter_similar("cat", &words, 2);
+ // cat -> cat: 0, cat -> cart: 1, cat -> ca: 1, cat -> cats: 1, cat -> cut: 1
+ assert_eq!(result.len(), 5);
+ }
+
+ #[test]
+ fn test_filter_similar_empty_slice() {
+ let words: Vec<&str> = vec![];
+ let result = LevenshteinDistance::filter_similar("hello", &words, 3);
+ assert!(result.is_empty());
+ }
+
+ #[test]
+ fn test_filter_similar_threshold_zero() {
+ let words = vec!["hello", "hallo", "hello!"];
+ let result = LevenshteinDistance::filter_similar("hello", &words, 0);
+ assert_eq!(result, vec!["hello"]);
+ }
+}
diff --git a/rola-utils/functions/src/lib.rs b/rola-utils/functions/src/lib.rs
new file mode 100644
index 0000000..ff2ee7e
--- /dev/null
+++ b/rola-utils/functions/src/lib.rs
@@ -0,0 +1,2 @@
+mod levenshtein_distance;
+pub use levenshtein_distance::*;