summaryrefslogtreecommitdiff
path: root/systems/sheet
diff options
context:
space:
mode:
author魏曹先生 <1992414357@qq.com>2026-03-14 16:26:26 +0800
committer魏曹先生 <1992414357@qq.com>2026-03-14 16:26:26 +0800
commit62d72b6e5f97b8b1eb1f98e3d98272cb0fc9aec7 (patch)
tree0ec4eb5993e8feb65dfa47382206a49f48089439 /systems/sheet
parent923e1752662ed381fabc0012f3d9078832a8c7c4 (diff)
Extract custom string comparison logic into separate module
Diffstat (limited to 'systems/sheet')
-rw-r--r--systems/sheet/src/compare.rs157
-rw-r--r--systems/sheet/src/lib.rs1
-rw-r--r--systems/sheet/src/mapping.rs160
3 files changed, 161 insertions, 157 deletions
diff --git a/systems/sheet/src/compare.rs b/systems/sheet/src/compare.rs
new file mode 100644
index 0000000..b5b44b4
--- /dev/null
+++ b/systems/sheet/src/compare.rs
@@ -0,0 +1,157 @@
+use std::cmp::Ordering;
+
+/// Compare two `Vec<String>` according to a specific ordering:
+/// 1. ASCII symbols (excluding letters and digits)
+/// 2. Letters (Aa-Zz, case‑sensitive, uppercase before lowercase)
+/// 3. Digits (0‑9) - compared numerically
+/// 4. All other Unicode characters (in their natural order)
+///
+/// The comparison is lexicographic: the first differing element determines the order.
+pub fn compare_vec_string(a: &Vec<String>, b: &Vec<String>) -> std::cmp::Ordering {
+ use std::cmp::Ordering;
+
+ for (left, right) in a.iter().zip(b.iter()) {
+ match compare_string(left, right) {
+ Ordering::Equal => continue,
+ ord => return ord,
+ }
+ }
+ // If all compared elements are equal, the shorter vector comes first.
+ a.len().cmp(&b.len())
+}
+
+/// Compare two individual strings with the same ordering rules.
+pub fn compare_string(a: &str, b: &str) -> std::cmp::Ordering {
+ let mut a_chars = a.chars().peekable();
+ let mut b_chars = b.chars().peekable();
+
+ loop {
+ match (a_chars.peek(), b_chars.peek()) {
+ (Some(&ca), Some(&cb)) => {
+ if ca.is_ascii_digit() && cb.is_ascii_digit() {
+ // Parse both numbers and compare numerically
+ let a_num = parse_number(&mut a_chars);
+ let b_num = parse_number(&mut b_chars);
+ match a_num.cmp(&b_num) {
+ Ordering::Equal => continue,
+ ord => return ord,
+ }
+ } else {
+ // Non-digit comparison
+ let ord = compare_char(ca, cb);
+ if ord != Ordering::Equal {
+ return ord;
+ }
+ a_chars.next();
+ b_chars.next();
+ }
+ }
+ (None, Some(_)) => return Ordering::Less,
+ (Some(_), None) => return Ordering::Greater,
+ (None, None) => return Ordering::Equal,
+ }
+ }
+}
+
+/// Parse a number from the character iterator.
+/// Consumes consecutive ASCII digits and returns the parsed u64.
+/// Assumes the first character is already verified to be a digit.
+pub fn parse_number<I: Iterator<Item = char>>(chars: &mut std::iter::Peekable<I>) -> u64 {
+ let mut num = 0;
+ while let Some(&c) = chars.peek() {
+ if c.is_ascii_digit() {
+ num = num * 10 + (c as u64 - '0' as u64);
+ chars.next();
+ } else {
+ break;
+ }
+ }
+ num
+}
+
+/// Compare two characters according to the ordering:
+/// 1. ASCII symbols (non‑letter, non‑digit)
+/// 2. Letters (A‑Z then a‑z)
+/// 3. Digits (0‑9) - note: digits are handled specially in compare_string
+/// 4. Other Unicode
+pub fn compare_char(a: char, b: char) -> std::cmp::Ordering {
+ let group_a = char_group(a);
+ let group_b = char_group(b);
+
+ if group_a != group_b {
+ return group_a.cmp(&group_b);
+ }
+
+ // Same group: compare within the group.
+ match group_a {
+ CharGroup::AsciiSymbol => a.cmp(&b), // ASCII symbols in natural order
+ CharGroup::Letter => {
+ // Uppercase letters (A‑Z) come before lowercase (a‑z)
+ if a.is_ascii_uppercase() && b.is_ascii_lowercase() {
+ Ordering::Less
+ } else if a.is_ascii_lowercase() && b.is_ascii_uppercase() {
+ Ordering::Greater
+ } else {
+ a.cmp(&b)
+ }
+ }
+ CharGroup::Digit => {
+ // Digits should be compared numerically, but this is only reached
+ // when comparing single digits (not part of a longer number).
+ a.cmp(&b)
+ }
+ CharGroup::Other => a.cmp(&b), // Other Unicode (natural order)
+ }
+}
+
+/// Classification of a character for ordering.
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum CharGroup {
+ AsciiSymbol = 0, // !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
+ Letter = 1, // A‑Z, a‑z
+ Digit = 2, // 0‑9
+ Other = 3, // Other
+}
+
+pub fn char_group(c: char) -> CharGroup {
+ if c.is_ascii_punctuation() {
+ CharGroup::AsciiSymbol
+ } else if c.is_ascii_alphabetic() {
+ CharGroup::Letter
+ } else if c.is_ascii_digit() {
+ CharGroup::Digit
+ } else {
+ CharGroup::Other
+ }
+}
+
+#[test]
+pub fn test_compare_char_groups() {
+ assert!(compare_string("!", "A") == Ordering::Less);
+ assert!(compare_string("A", "a") == Ordering::Less);
+ assert!(compare_string("a", "0") == Ordering::Less);
+ assert!(compare_string("9", "你") == Ordering::Less);
+}
+
+#[test]
+pub fn test_numeric_ordering() {
+ // Test numeric ordering
+ assert!(compare_string("0", "1") == Ordering::Less);
+ assert!(compare_string("1", "0") == Ordering::Greater);
+ assert!(compare_string("9", "10") == Ordering::Less);
+ assert!(compare_string("10", "9") == Ordering::Greater);
+ assert!(compare_string("99", "100") == Ordering::Less);
+ assert!(compare_string("100", "99") == Ordering::Greater);
+ assert!(compare_string("001", "1") == Ordering::Equal); // "001" numerically equals "1"
+ assert!(compare_string("01", "1") == Ordering::Equal); // "01" numerically equals "1"
+
+ // Test mixed strings
+ assert!(compare_string("Frame-9", "Frame-10") == Ordering::Less);
+ assert!(compare_string("Frame-10", "Frame-9") == Ordering::Greater);
+ assert!(compare_string("Frame-99", "Frame-100") == Ordering::Less);
+ assert!(compare_string("Frame-100", "Frame-99") == Ordering::Greater);
+
+ // Test that numbers are compared as whole numbers, not digit-by-digit
+ assert!(compare_string("123", "23") == Ordering::Greater); // 123 > 23
+ assert!(compare_string("23", "123") == Ordering::Less); // 23 < 123
+}
diff --git a/systems/sheet/src/lib.rs b/systems/sheet/src/lib.rs
index 84abbc9..2b6e38e 100644
--- a/systems/sheet/src/lib.rs
+++ b/systems/sheet/src/lib.rs
@@ -1,3 +1,4 @@
+pub mod compare;
pub mod index_source;
pub mod mapping;
pub mod mapping_pattern;
diff --git a/systems/sheet/src/mapping.rs b/systems/sheet/src/mapping.rs
index 3dfb67e..2e6645e 100644
--- a/systems/sheet/src/mapping.rs
+++ b/systems/sheet/src/mapping.rs
@@ -3,7 +3,9 @@ use std::cmp::Ordering;
use just_fmt::fmt_path::{PathFormatConfig, fmt_path_str, fmt_path_str_custom};
use serde::{Deserialize, Serialize};
-use crate::{index_source::IndexSource, mapping::error::ParseMappingError};
+use crate::{
+ compare::compare_vec_string, index_source::IndexSource, mapping::error::ParseMappingError,
+};
pub mod error;
pub mod parse;
@@ -651,159 +653,3 @@ impl Ord for LocalMapping {
compare_vec_string(&self.val, &other.val)
}
}
-
-/// Compare two `Vec<String>` according to a specific ordering:
-/// 1. ASCII symbols (excluding letters and digits)
-/// 2. Letters (Aa-Zz, case‑sensitive, uppercase before lowercase)
-/// 3. Digits (0‑9) - compared numerically
-/// 4. All other Unicode characters (in their natural order)
-///
-/// The comparison is lexicographic: the first differing element determines the order.
-fn compare_vec_string(a: &Vec<String>, b: &Vec<String>) -> std::cmp::Ordering {
- use std::cmp::Ordering;
-
- for (left, right) in a.iter().zip(b.iter()) {
- match compare_string(left, right) {
- Ordering::Equal => continue,
- ord => return ord,
- }
- }
- // If all compared elements are equal, the shorter vector comes first.
- a.len().cmp(&b.len())
-}
-
-/// Compare two individual strings with the same ordering rules.
-fn compare_string(a: &str, b: &str) -> std::cmp::Ordering {
- let mut a_chars = a.chars().peekable();
- let mut b_chars = b.chars().peekable();
-
- loop {
- match (a_chars.peek(), b_chars.peek()) {
- (Some(&ca), Some(&cb)) => {
- if ca.is_ascii_digit() && cb.is_ascii_digit() {
- // Parse both numbers and compare numerically
- let a_num = parse_number(&mut a_chars);
- let b_num = parse_number(&mut b_chars);
- match a_num.cmp(&b_num) {
- Ordering::Equal => continue,
- ord => return ord,
- }
- } else {
- // Non-digit comparison
- let ord = compare_char(ca, cb);
- if ord != Ordering::Equal {
- return ord;
- }
- a_chars.next();
- b_chars.next();
- }
- }
- (None, Some(_)) => return Ordering::Less,
- (Some(_), None) => return Ordering::Greater,
- (None, None) => return Ordering::Equal,
- }
- }
-}
-
-/// Parse a number from the character iterator.
-/// Consumes consecutive ASCII digits and returns the parsed u64.
-/// Assumes the first character is already verified to be a digit.
-fn parse_number<I: Iterator<Item = char>>(chars: &mut std::iter::Peekable<I>) -> u64 {
- let mut num = 0;
- while let Some(&c) = chars.peek() {
- if c.is_ascii_digit() {
- num = num * 10 + (c as u64 - '0' as u64);
- chars.next();
- } else {
- break;
- }
- }
- num
-}
-
-/// Compare two characters according to the ordering:
-/// 1. ASCII symbols (non‑letter, non‑digit)
-/// 2. Letters (A‑Z then a‑z)
-/// 3. Digits (0‑9) - note: digits are handled specially in compare_string
-/// 4. Other Unicode
-fn compare_char(a: char, b: char) -> std::cmp::Ordering {
- let group_a = char_group(a);
- let group_b = char_group(b);
-
- if group_a != group_b {
- return group_a.cmp(&group_b);
- }
-
- // Same group: compare within the group.
- match group_a {
- CharGroup::AsciiSymbol => a.cmp(&b), // ASCII symbols in natural order
- CharGroup::Letter => {
- // Uppercase letters (A‑Z) come before lowercase (a‑z)
- if a.is_ascii_uppercase() && b.is_ascii_lowercase() {
- Ordering::Less
- } else if a.is_ascii_lowercase() && b.is_ascii_uppercase() {
- Ordering::Greater
- } else {
- a.cmp(&b)
- }
- }
- CharGroup::Digit => {
- // Digits should be compared numerically, but this is only reached
- // when comparing single digits (not part of a longer number).
- a.cmp(&b)
- }
- CharGroup::Other => a.cmp(&b), // Other Unicode (natural order)
- }
-}
-
-/// Classification of a character for ordering.
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
-enum CharGroup {
- AsciiSymbol = 0, // !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
- Letter = 1, // A‑Z, a‑z
- Digit = 2, // 0‑9
- Other = 3, // Other
-}
-
-fn char_group(c: char) -> CharGroup {
- if c.is_ascii_punctuation() {
- CharGroup::AsciiSymbol
- } else if c.is_ascii_alphabetic() {
- CharGroup::Letter
- } else if c.is_ascii_digit() {
- CharGroup::Digit
- } else {
- CharGroup::Other
- }
-}
-
-#[test]
-fn test_compare_char_groups() {
- assert!(compare_string("!", "A") == Ordering::Less);
- assert!(compare_string("A", "a") == Ordering::Less);
- assert!(compare_string("a", "0") == Ordering::Less);
- assert!(compare_string("9", "你") == Ordering::Less);
-}
-
-#[test]
-fn test_numeric_ordering() {
- // Test numeric ordering
- assert!(compare_string("0", "1") == Ordering::Less);
- assert!(compare_string("1", "0") == Ordering::Greater);
- assert!(compare_string("9", "10") == Ordering::Less);
- assert!(compare_string("10", "9") == Ordering::Greater);
- assert!(compare_string("99", "100") == Ordering::Less);
- assert!(compare_string("100", "99") == Ordering::Greater);
- assert!(compare_string("001", "1") == Ordering::Equal); // "001" numerically equals "1"
- assert!(compare_string("01", "1") == Ordering::Equal); // "01" numerically equals "1"
-
- // Test mixed strings
- assert!(compare_string("Frame-9", "Frame-10") == Ordering::Less);
- assert!(compare_string("Frame-10", "Frame-9") == Ordering::Greater);
- assert!(compare_string("Frame-99", "Frame-100") == Ordering::Less);
- assert!(compare_string("Frame-100", "Frame-99") == Ordering::Greater);
-
- // Test that numbers are compared as whole numbers, not digit-by-digit
- assert!(compare_string("123", "23") == Ordering::Greater); // 123 > 23
- assert!(compare_string("23", "123") == Ordering::Less); // 23 < 123
-}