diff options
Diffstat (limited to 'systems/sheet')
| -rw-r--r-- | systems/sheet/Cargo.toml | 11 | ||||
| -rw-r--r-- | systems/sheet/macros/src/lib.rs | 49 | ||||
| -rw-r--r-- | systems/sheet/src/index_source.rs | 114 | ||||
| -rw-r--r-- | systems/sheet/src/lib.rs | 1 | ||||
| -rw-r--r-- | systems/sheet/src/mapping.rs | 391 | ||||
| -rw-r--r-- | systems/sheet/src/mapping/error.rs | 5 | ||||
| -rw-r--r-- | systems/sheet/src/mapping_pattern.rs | 18 | ||||
| -rw-r--r-- | systems/sheet/src/sheet.rs | 375 | ||||
| -rw-r--r-- | systems/sheet/src/sheet/constants.rs | 55 | ||||
| -rw-r--r-- | systems/sheet/src/sheet/error.rs | 16 | ||||
| -rw-r--r-- | systems/sheet/src/sheet/reader.rs | 627 | ||||
| -rw-r--r-- | systems/sheet/src/sheet/test.rs | 460 | ||||
| -rw-r--r-- | systems/sheet/src/sheet/writer.rs | 264 |
13 files changed, 2248 insertions, 138 deletions
diff --git a/systems/sheet/Cargo.toml b/systems/sheet/Cargo.toml index 99cc7c7..074f511 100644 --- a/systems/sheet/Cargo.toml +++ b/systems/sheet/Cargo.toml @@ -4,5 +4,16 @@ edition = "2024" version.workspace = true [dependencies] +hex_display = { path = "../../utils/hex_display" } + +constants = { path = "../_constants" } sheet_system_macros = { path = "macros" } asset_system = { path = "../_asset" } + +tokio = { version = "1.48", features = ["full"] } + +thiserror = "1.0.69" +just_fmt = "0.1" + +memmap2 = "0.9" +sha2 = "0.10.8" diff --git a/systems/sheet/macros/src/lib.rs b/systems/sheet/macros/src/lib.rs index c0e936c..b485a82 100644 --- a/systems/sheet/macros/src/lib.rs +++ b/systems/sheet/macros/src/lib.rs @@ -3,9 +3,15 @@ use proc_macro2::{Span, TokenStream as TokenStream2}; use quote::quote; use syn::parse_str; +const INDEX_SOURCE_BUF: &str = + "just_enough_vcs::system::sheet_system::index_source::IndexSourceBuf"; +const INDEX_SOURCE: &str = "just_enough_vcs::system::sheet_system::index_source::IndexSource"; + const LOCAL_MAPPING_PATH: &str = "just_enough_vcs::system::sheet_system::mapping::LocalMapping"; + const MAPPING_BUF_PATH: &str = "just_enough_vcs::system::sheet_system::mapping::MappingBuf"; const MAPPING_PATH: &str = "just_enough_vcs::system::sheet_system::mapping::Mapping"; + const LOCAL_MAPPING_FORWARD_PATH: &str = "just_enough_vcs::system::sheet_system::mapping::LocalMappingForward"; @@ -115,13 +121,14 @@ pub fn mapping_buf(input: TokenStream) -> TokenStream { let mapping_buf_path: syn::Path = parse_str(MAPPING_BUF_PATH).expect("Failed to parse MAPPING_BUF_PATH"); + let index_source_buf_path: syn::Path = + parse_str(INDEX_SOURCE_BUF).expect("Failed to parse INDEX_SOURCE_BUF"); let expanded = quote! { #mapping_buf_path::new( #sheet.to_string(), #path_vec_tokens, - #id.to_string(), - #ver.to_string() + #index_source_buf_path::new(#id.to_string(), #ver.to_string()) ) }; @@ -135,7 +142,7 @@ pub fn mapping_buf(input: TokenStream) -> TokenStream { /// let mapping = mapping!( /// // Map the `version` of index `index_id` /// // to `your_dir/your_file.suffix` in `your_sheet` -/// "your_sheet:/your_dir/your_file.suffix" => "index_id/version" +/// "your_sheet:/your_dir/your_file.suffix" => "id/ver" /// ); /// ``` #[proc_macro] @@ -176,13 +183,14 @@ pub fn mapping(input: TokenStream) -> TokenStream { let path = path_vec.join("/"); let mapping_path: syn::Path = parse_str(MAPPING_PATH).expect("Failed to parse MAPPING_PATH"); + let index_source_path: syn::Path = + parse_str(INDEX_SOURCE).expect("Failed to parse INDEX_SOURCE"); let expanded = quote! { #mapping_path::new( #sheet, #path, - #id, - #ver + #index_source_path::new(#id, #ver) ) }; @@ -305,21 +313,22 @@ pub fn local_mapping(input: TokenStream) -> TokenStream { Err(err) => return err.to_compile_error().into(), }; + let local_mapping_path: syn::Path = + parse_str(LOCAL_MAPPING_PATH).expect("Failed to parse LOCAL_MAPPING_PATH"); + let local_mapping_forward_path: syn::Path = + parse_str(LOCAL_MAPPING_FORWARD_PATH).expect("Failed to parse LOCAL_MAPPING_FORWARD_PATH"); + let index_source_buf_path: syn::Path = + parse_str(INDEX_SOURCE_BUF).expect("Failed to parse INDEX_SOURCE_BUF"); + match parts { LocalMappingParts::Latest(path_str, id, ver) => { let path_vec = parse_path_string(&path_str); let path_vec_tokens = path_vec_to_tokens(&path_vec); - let local_mapping_path: syn::Path = - parse_str(LOCAL_MAPPING_PATH).expect("Failed to parse LOCAL_MAPPING_PATH"); - let local_mapping_forward_path: syn::Path = parse_str(LOCAL_MAPPING_FORWARD_PATH) - .expect("Failed to parse LOCAL_MAPPING_FORWARD_PATH"); - let expanded = quote! { #local_mapping_path::new( #path_vec_tokens, - #id.to_string(), - #ver.to_string(), + #index_source_buf_path::new(#id.to_string(), #ver.to_string()), #local_mapping_forward_path::Latest ) }; @@ -330,16 +339,10 @@ pub fn local_mapping(input: TokenStream) -> TokenStream { let path_vec = parse_path_string(&path_str); let path_vec_tokens = path_vec_to_tokens(&path_vec); - let local_mapping_path: syn::Path = - parse_str(LOCAL_MAPPING_PATH).expect("Failed to parse LOCAL_MAPPING_PATH"); - let local_mapping_forward_path: syn::Path = parse_str(LOCAL_MAPPING_FORWARD_PATH) - .expect("Failed to parse LOCAL_MAPPING_FORWARD_PATH"); - let expanded = quote! { #local_mapping_path::new( #path_vec_tokens, - #id.to_string(), - #ver.to_string(), + #index_source_buf_path::new(#id.to_string(), #ver.to_string()), #local_mapping_forward_path::Version { version_name: #ver.to_string() } @@ -352,16 +355,10 @@ pub fn local_mapping(input: TokenStream) -> TokenStream { let path_vec = parse_path_string(&path_str); let path_vec_tokens = path_vec_to_tokens(&path_vec); - let local_mapping_path: syn::Path = - parse_str(LOCAL_MAPPING_PATH).expect("Failed to parse LOCAL_MAPPING_PATH"); - let local_mapping_forward_path: syn::Path = parse_str(LOCAL_MAPPING_FORWARD_PATH) - .expect("Failed to parse LOCAL_MAPPING_FORWARD_PATH"); - let expanded = quote! { #local_mapping_path::new( #path_vec_tokens, - #id.to_string(), - #ver.to_string(), + #index_source_buf_path::new(#id.to_string(), #ver.to_string()), #local_mapping_forward_path::Ref { sheet_name: #ref_name.to_string() } diff --git a/systems/sheet/src/index_source.rs b/systems/sheet/src/index_source.rs new file mode 100644 index 0000000..a2fc43d --- /dev/null +++ b/systems/sheet/src/index_source.rs @@ -0,0 +1,114 @@ +/// IndexSource +/// Points to a unique resource address in Vault +#[derive(Debug, Clone, Copy)] +pub struct IndexSource { + /// The index ID of the resource + id: u32, + + /// The index version of the resource + ver: u16, +} + +// Implement construction and querying for IndexSource + +impl IndexSource { + /// Create IndexSource + pub fn new(id: u32, ver: u16) -> Self { + IndexSource { id, ver } + } + + /// Get index ID from IndexSource + pub fn id(&self) -> u32 { + self.id + } + + /// Get index version from IndexSource + pub fn version(&self) -> u16 { + self.ver + } +} + +// Implement comparison for IndexSource + +impl PartialEq for IndexSource { + fn eq(&self, other: &Self) -> bool { + &self.id == &other.id && &self.ver == &other.ver + } +} + +impl Eq for IndexSource {} + +// Implement hashing for IndexSource and IndexSourceBuf + +impl std::hash::Hash for IndexSource { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.id.hash(state); + self.ver.hash(state); + } +} + +// Implement construction of IndexSource from strings + +impl<'a> TryFrom<&'a str> for IndexSource { + type Error = &'static str; + + fn try_from(value: &'a str) -> Result<Self, Self::Error> { + let parts: Vec<&str> = value.split('/').collect(); + if parts.len() != 2 { + return Err("Invalid format: expected 'id/version'"); + } + + let id_str = parts[0].trim(); + let ver_str = parts[1].trim(); + + if id_str.is_empty() || ver_str.is_empty() { + return Err("ID or version cannot be empty"); + } + + let id = id_str + .parse::<u32>() + .map_err(|_| "ID must be a valid u32")?; + let ver = ver_str + .parse::<u16>() + .map_err(|_| "Version must be a valid u16")?; + + // Check for overflow (though parsing already validates range) + // Additional bounds checks can be added here if needed + Ok(Self { id, ver }) + } +} + +impl TryFrom<String> for IndexSource { + type Error = &'static str; + + fn try_from(value: String) -> Result<Self, Self::Error> { + Self::try_from(value.as_str()) + } +} + +impl From<IndexSource> for (u32, u16) { + fn from(src: IndexSource) -> Self { + (src.id, src.ver) + } +} + +// Implement modifications for IndexSource +impl IndexSource { + /// Set the index ID of IndexSource + pub fn set_id(&mut self, index_id: u32) { + self.id = index_id; + } + + /// Set the index version of IndexSource + pub fn set_version(&mut self, version: u16) { + self.ver = version; + } +} + +// Implement Display for IndexSourceBuf and IndexSource + +impl std::fmt::Display for IndexSource { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}/{}", self.id.to_string(), self.ver.to_string()) + } +} diff --git a/systems/sheet/src/lib.rs b/systems/sheet/src/lib.rs index 94e84c5..84abbc9 100644 --- a/systems/sheet/src/lib.rs +++ b/systems/sheet/src/lib.rs @@ -1,3 +1,4 @@ +pub mod index_source; pub mod mapping; pub mod mapping_pattern; pub mod sheet; diff --git a/systems/sheet/src/mapping.rs b/systems/sheet/src/mapping.rs index b31315d..0a72a69 100644 --- a/systems/sheet/src/mapping.rs +++ b/systems/sheet/src/mapping.rs @@ -1,20 +1,23 @@ -use string_proc::{ - format_path::{PathFormatConfig, format_path_str, format_path_str_with_config}, - snake_case, -}; +use just_fmt::fmt_path::{PathFormatConfig, fmt_path_str, fmt_path_str_custom}; + +use crate::{index_source::IndexSource, mapping::error::ParseMappingError}; + +pub mod error; + +// Validation rules for LocalMapping +// LocalMapping is a key component for writing and reading SheetData +// According to the SheetData protocol specification, all variable-length fields store length information using the u8 type +// Therefore, the lengths of the index_id, version, mapping_value, and forward fields must not exceed `u8::MAX` /// Local mapping /// It is stored inside a Sheet and will be exposed externally as Mapping or MappingBuf -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, Clone)] pub struct LocalMapping { /// The value of the local mapping val: Vec<String>, - /// The ID of the local mapping - id: String, - - /// The version of the local mapping - ver: String, + /// Index source + source: IndexSource, /// The version direction of the local mapping forward: LocalMappingForward, @@ -22,9 +25,10 @@ pub struct LocalMapping { /// The forward direction of the current Mapping /// It indicates the expected asset update method for the current Mapping -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Default, Debug, PartialEq, Eq, Clone)] pub enum LocalMappingForward { /// Expect the current index version to be the latest + #[default] Latest, /// Expect the current index version to point to a specific Ref @@ -33,28 +37,78 @@ pub enum LocalMappingForward { Ref { sheet_name: String }, /// Expect the current index version to point to a specific version - Version { version_name: String }, + Version { version: u16 }, } /// Mapping /// It stores basic mapping information and only participates in comparison and parsing -#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone, Copy)] pub struct Mapping<'a> { sheet_name: &'a str, val: &'a str, - id: &'a str, - ver: &'a str, + source: IndexSource, } /// MappingBuf /// It stores complete mapping information and participates in complex mapping editing operations like storage and modification -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct MappingBuf { sheet_name: String, val: Vec<String>, val_joined: String, - id: String, - ver: String, + source: IndexSource, +} + +// Implement conversions for LocalMappingForward + +impl LocalMappingForward { + /// Check if the forward direction length is valid + pub fn is_len_valid(&self) -> bool { + match self { + LocalMappingForward::Latest => true, + LocalMappingForward::Ref { sheet_name } => sheet_name.len() <= u8::MAX as usize, + LocalMappingForward::Version { version: _ } => true, + } + } + + /// Get the forward direction type ID + pub fn forward_type_id(&self) -> u8 { + match self { + LocalMappingForward::Latest => 0, + LocalMappingForward::Ref { sheet_name: _ } => 1, + LocalMappingForward::Version { version: _ } => 2, + } + } + + /// Unpack into raw information + /// (id, len, bytes) + pub fn unpack(&self) -> (u8, u8, Vec<u8>) { + let b = match self { + LocalMappingForward::Latest => vec![], + LocalMappingForward::Ref { sheet_name } => sheet_name.as_bytes().to_vec(), + LocalMappingForward::Version { + version: version_name, + } => version_name.to_be_bytes().to_vec(), + }; + (self.forward_type_id(), b.len() as u8, b) + } + + /// Reconstruct into a forward direction + pub fn pack(id: u8, bytes: &[u8]) -> Option<Self> { + if bytes.len() > u8::MAX as usize { + return None; + } + match id { + 0 => Some(Self::Latest), + 1 => Some(Self::Ref { + sheet_name: String::from_utf8(bytes.to_vec()).ok()?, + }), + 2 => Some(Self::Version { + version: u16::from_be_bytes(bytes.try_into().ok()?), + }), + _ => None, + } + } } // Implement creation and mutual conversion for MappingBuf, LocalMapping and Mapping @@ -63,15 +117,23 @@ impl LocalMapping { /// Create a new LocalMapping pub fn new( val: Vec<String>, - id: impl Into<String>, - ver: impl Into<String>, + source: IndexSource, forward: LocalMappingForward, - ) -> Self { - Self { - val, - id: id.into(), - ver: ver.into(), - forward, + ) -> Option<Self> { + // Note: + // LocalMapping will be stored in SheetData + // Strict validity checks are required; the lengths of forward, and even val must not exceed limits + // Otherwise, errors will occur that prevent correct writing to SheetData + let valid = forward.is_len_valid() && val.join("/").len() <= u8::MAX as usize; + + if valid { + Some(Self { + val, + source, + forward, + }) + } else { + None } } @@ -80,14 +142,19 @@ impl LocalMapping { &self.val } + /// Get the IndexSource of LocalMapping + pub fn index_source(&self) -> IndexSource { + self.source + } + /// Get the mapped index ID of LocalMapping - pub fn mapped_id(&self) -> &String { - &self.id + pub fn mapped_id(&self) -> u32 { + self.source.id() } /// Get the mapped index version of LocalMapping - pub fn mapped_version(&self) -> &String { - &self.ver + pub fn mapped_version(&self) -> u16 { + self.source.version() } /// Get the forward direction of LocalMapping @@ -97,35 +164,24 @@ impl LocalMapping { /// Clone and generate a MappingBuf from LocalMapping pub fn to_mapping_buf_cloned(&self, sheet_name: impl Into<String>) -> MappingBuf { - MappingBuf::new( - sheet_name.into(), - self.val.clone(), - self.id.clone(), - self.ver.clone(), - ) + MappingBuf::new(sheet_name.into(), self.val.clone(), self.source.clone()) } /// Generate a MappingBuf from LocalMapping pub fn to_mapping_buf(self, sheet_name: impl Into<String>) -> MappingBuf { - MappingBuf::new(sheet_name.into(), self.val, self.id, self.ver) + MappingBuf::new(sheet_name.into(), self.val, self.source) } } impl MappingBuf { /// Create a new MappingBuf - pub fn new( - sheet_name: impl Into<String>, - val: Vec<String>, - id: impl Into<String>, - ver: impl Into<String>, - ) -> Self { + pub fn new(sheet_name: impl Into<String>, val: Vec<String>, source: IndexSource) -> Self { let val_joined = val.join("/"); Self { sheet_name: sheet_name.into(), val, val_joined, - id: id.into(), - ver: ver.into(), + source, } } @@ -144,45 +200,56 @@ impl MappingBuf { &self.val_joined } + /// Get the IndexSource of MappingBuf + pub fn index_source(&self) -> IndexSource { + self.source + } + /// Get the mapped index ID of MappingBuf - pub fn mapped_id(&self) -> &String { - &self.id + pub fn mapped_id(&self) -> u32 { + self.source.id() } /// Get the mapped index version of MappingBuf - pub fn mapped_version(&self) -> &String { - &self.ver + pub fn mapped_version(&self) -> u16 { + self.source.version() } /// Generate a Mapping from MappingBuf pub fn as_mapping(&self) -> Mapping<'_> { - Mapping::new(&self.sheet_name, &self.val_joined, &self.id, &self.ver) + Mapping::new(&self.sheet_name, &self.val_joined, self.source) } /// Clone and generate a LocalMapping from MappingBuf - pub fn to_local_mapping_cloned(&self, forward: &LocalMappingForward) -> LocalMapping { - LocalMapping::new( - self.val.clone(), - self.id.clone(), - self.ver.clone(), - forward.clone(), - ) + /// + /// If any of the following conditions exist in MappingBuf or its members, + /// the conversion will be invalid and return None + /// - The final length of the recorded mapping value exceeds `u8::MAX` + /// + /// Additionally, if the length of the given forward value exceeds `u8::MAX`, it will also return None + pub fn to_local_mapping_cloned(&self, forward: &LocalMappingForward) -> Option<LocalMapping> { + LocalMapping::new(self.val.clone(), self.source.clone(), forward.clone()) } /// Generate a LocalMapping from MappingBuf - pub fn to_local_mapping(self, forward: LocalMappingForward) -> LocalMapping { - LocalMapping::new(self.val, self.id, self.ver, forward) + /// + /// If any of the following conditions exist in MappingBuf or its members, + /// the conversion will be invalid and return None + /// - The final length of the recorded mapping value exceeds `u8::MAX` + /// + /// Additionally, if the length of the given forward value exceeds `u8::MAX`, it will also return None + pub fn to_local_mapping(self, forward: LocalMappingForward) -> Option<LocalMapping> { + LocalMapping::new(self.val, self.source, forward) } } impl<'a> Mapping<'a> { /// Create a new Mapping - pub fn new(sheet_name: &'a str, val: &'a str, id: &'a str, ver: &'a str) -> Self { + pub fn new(sheet_name: &'a str, val: &'a str, source: IndexSource) -> Self { Self { sheet_name, val, - id, - ver, + source, } } @@ -193,7 +260,7 @@ impl<'a> Mapping<'a> { /// Build a Vec of Mapping values from the stored address pub fn value(&self) -> Vec<String> { - format_path_str(self.val.to_string()) + fmt_path_str(self.val.to_string()) .unwrap_or_default() .split("/") .map(|s| s.to_string()) @@ -205,53 +272,90 @@ impl<'a> Mapping<'a> { &self.val } + /// Get the IndexSource of Mapping + pub fn index_source(&self) -> IndexSource { + self.source + } + /// Get the mapped index ID of Mapping - pub fn mapped_id(&self) -> &str { - &self.id + pub fn mapped_id(&self) -> u32 { + self.source.id() } /// Get the mapped index version of Mapping - pub fn mapped_version(&self) -> &str { - &self.ver + pub fn mapped_version(&self) -> u16 { + self.source.version() } /// Generate a MappingBuf from Mapping pub fn to_mapping_buf(&self) -> MappingBuf { MappingBuf::new( self.sheet_name.to_string(), - format_path_str(self.val) + fmt_path_str(self.val) .unwrap_or_default() .split('/') .into_iter() .map(|s| s.to_string()) .collect(), - self.id.to_string(), - self.ver.to_string(), + self.source, ) } - /// Generate a LocalMapping from MappingBuf - pub fn to_local_mapping(self, forward: LocalMappingForward) -> LocalMapping { + /// Generate a LocalMapping from Mapping + /// + /// If any of the following conditions exist in Mapping or its members, + /// the conversion will be invalid and return None + /// - The final length of the recorded mapping value exceeds `u8::MAX` + /// + /// Additionally, if the length of the given forward value exceeds `u8::MAX`, it will also return None + pub fn to_local_mapping(self, forward: LocalMappingForward) -> Option<LocalMapping> { LocalMapping::new( - format_path_str(self.val) + fmt_path_str(self.val) .unwrap_or_default() .split("/") .into_iter() .map(|s| s.to_string()) .collect(), - self.id.to_string(), - self.ver.to_string(), + self.source, forward, ) } } +impl<'a> From<Mapping<'a>> for Vec<String> { + fn from(mapping: Mapping<'a>) -> Vec<String> { + mapping.value() + } +} + impl<'a> From<Mapping<'a>> for MappingBuf { fn from(mapping: Mapping<'a>) -> Self { mapping.to_mapping_buf() } } +impl<'a> TryFrom<Mapping<'a>> for LocalMapping { + type Error = ParseMappingError; + + fn try_from(value: Mapping<'a>) -> Result<Self, Self::Error> { + match value.to_local_mapping(LocalMappingForward::Latest) { + Some(m) => Ok(m), + None => Err(ParseMappingError::InvalidMapping), + } + } +} + +impl TryFrom<MappingBuf> for LocalMapping { + type Error = ParseMappingError; + + fn try_from(value: MappingBuf) -> Result<Self, Self::Error> { + match value.to_local_mapping(LocalMappingForward::Latest) { + Some(m) => Ok(m), + None => Err(ParseMappingError::InvalidMapping), + } + } +} + // Implement the Display trait for Mapping, LocalMapping and MappingBuf for formatted output. // // The Display implementation only shows path information, not the complete structure information. @@ -261,31 +365,73 @@ impl<'a> From<Mapping<'a>> for MappingBuf { // When presenting, only the snake_case converted sheet_name and the path formed by joining val are shown. macro_rules! fmt_mapping { - ($f:expr, $sheet_name:expr, $val:expr) => { + ($f:expr, $sheet_name:expr, $val:expr, $source:expr) => { write!( $f, - "{}:/{}", - snake_case!($sheet_name), - format_path_str($val).unwrap_or_default() + "\"{}:/{}\" => \"{}\"", + just_fmt::snake_case!($sheet_name), + just_fmt::fmt_path::fmt_path_str($val).unwrap_or_default(), + $source ) }; } impl<'a> std::fmt::Display for Mapping<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - fmt_mapping!(f, self.sheet_name, self.val) + fmt_mapping!(f, self.sheet_name, self.val, self.source.to_string()) } } impl std::fmt::Display for MappingBuf { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - fmt_mapping!(f, self.sheet_name.to_string(), &self.val.join("/")) + fmt_mapping!( + f, + self.sheet_name.to_string(), + &self.val.join("/"), + self.source.to_string() + ) } } impl std::fmt::Display for LocalMapping { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.val.join("/")) + match &self.forward { + LocalMappingForward::Latest => { + write!( + f, + "\"{}\" => \"{}\"", + self.val.join("/"), + self.source.to_string() + ) + } + LocalMappingForward::Ref { sheet_name } => { + write!( + f, + "\"{}\" => \"{}\" => \"{}\"", + self.val.join("/"), + self.source.to_string(), + sheet_name + ) + } + LocalMappingForward::Version { version } => { + if &self.mapped_version() == version { + write!( + f, + "\"{}\" == \"{}\"", + self.val.join("/"), + self.source.to_string(), + ) + } else { + write!( + f, + "\"{}\" => \"{}\" == \"{}\"", + self.val.join("/"), + self.source.to_string(), + version + ) + } + } + } } } @@ -312,14 +458,19 @@ impl MappingBuf { self.val_joined = self.val.join("/"); } + /// Replace the current IndexSource + pub fn replace_source(&mut self, new_source: IndexSource) { + self.source = new_source; + } + /// Set the mapped index ID of the current MappingBuf - pub fn set_mapped_id(&mut self, id: impl Into<String>) { - self.id = id.into(); + pub fn set_mapped_id(&mut self, id: u32) { + self.source.set_id(id); } /// Set the mapped index version of the current MappingBuf - pub fn set_mapped_version(&mut self, version: impl Into<String>) { - self.ver = version.into(); + pub fn set_mapped_version(&mut self, version: u16) { + self.source.set_version(version); } } @@ -337,14 +488,19 @@ impl LocalMapping { self.val = val; } + /// Replace the current IndexSource + pub fn replace_source(&mut self, new_source: IndexSource) { + self.source = new_source; + } + /// Set the mapped index ID of the current LocalMapping - pub fn set_mapped_id(&mut self, id: impl Into<String>) { - self.id = id.into(); + pub fn set_mapped_id(&mut self, id: u32) { + self.source.set_id(id); } /// Set the mapped index version of the current LocalMapping - pub fn set_mapped_version(&mut self, version: impl Into<String>) { - self.ver = version.into(); + pub fn set_mapped_version(&mut self, version: u16) { + self.source.set_version(version); } /// Set the forward direction of the current LocalMapping @@ -355,7 +511,7 @@ impl LocalMapping { #[inline(always)] fn join_helper(nodes: String, mut mapping_buf_val: Vec<String>) -> Vec<String> { - let formatted = format_path_str_with_config( + let formatted = fmt_path_str_custom( nodes, &PathFormatConfig { // Do not process ".." because it is used to go up one level @@ -383,40 +539,55 @@ fn join_helper(nodes: String, mut mapping_buf_val: Vec<String>) -> Vec<String> { return mapping_buf_val; } -// Implement mutual comparison for LocalMapping, MappingBuf, and Mapping +// Implement mutual comparison for MappingBuf and Mapping +// +// Note: +// When either side's ID or Version is None, it indicates an invalid Source +// The comparison result is false -impl<'a> PartialEq<Mapping<'a>> for LocalMapping { - fn eq(&self, other: &Mapping<'a>) -> bool { - self.val.join("/") == other.val && self.id == other.id && self.ver == other.ver +impl<'a> PartialEq<MappingBuf> for Mapping<'a> { + fn eq(&self, other: &MappingBuf) -> bool { + self.val == other.val_joined + && self.source.id() == other.source.id() + && self.source.version() == other.source.version() } } -impl<'a> PartialEq<LocalMapping> for Mapping<'a> { - fn eq(&self, other: &LocalMapping) -> bool { - other == self +// Implement comparison between LocalMappings +// +// Note: +// LocalMappings are considered equal as long as their val (Node) values are the same + +impl PartialEq for LocalMapping { + fn eq(&self, other: &Self) -> bool { + self.val == other.val } } -impl PartialEq<MappingBuf> for LocalMapping { - fn eq(&self, other: &MappingBuf) -> bool { - self.val == other.val && self.id == other.id && self.ver == other.ver +impl PartialEq<Vec<String>> for LocalMapping { + fn eq(&self, other: &Vec<String>) -> bool { + &self.val == other } } -impl PartialEq<LocalMapping> for MappingBuf { - fn eq(&self, other: &LocalMapping) -> bool { - other == self +impl Eq for LocalMapping {} + +impl std::hash::Hash for LocalMapping { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.val.hash(state); } } -impl<'a> PartialEq<MappingBuf> for Mapping<'a> { - fn eq(&self, other: &MappingBuf) -> bool { - self.val == other.val_joined && self.id == other.id && self.ver == other.ver +// Implement borrowing for LocalMapping and MappingBuf + +impl std::borrow::Borrow<Vec<String>> for LocalMapping { + fn borrow(&self) -> &Vec<String> { + &self.val } } -impl<'a> PartialEq<Mapping<'a>> for MappingBuf { - fn eq(&self, other: &Mapping<'a>) -> bool { - other == self +impl std::borrow::Borrow<Vec<String>> for MappingBuf { + fn borrow(&self) -> &Vec<String> { + &self.val } } diff --git a/systems/sheet/src/mapping/error.rs b/systems/sheet/src/mapping/error.rs new file mode 100644 index 0000000..4fb3550 --- /dev/null +++ b/systems/sheet/src/mapping/error.rs @@ -0,0 +1,5 @@ +#[derive(Debug, thiserror::Error)] +pub enum ParseMappingError { + #[error("Mapping information is invalid and cannot be safely converted to LocalMapping")] + InvalidMapping, +} diff --git a/systems/sheet/src/mapping_pattern.rs b/systems/sheet/src/mapping_pattern.rs index 2b30c0d..7aba502 100644 --- a/systems/sheet/src/mapping_pattern.rs +++ b/systems/sheet/src/mapping_pattern.rs @@ -45,21 +45,24 @@ use crate::mapping::MappingBuf; pub struct MappingPattern {} -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Clone)] pub enum MappingPatternResult { Single(MappingBuf), Multi(Vec<MappingBuf>), } impl MappingPatternResult { + /// Create a new single mapping result pub fn new_single(mapping: MappingBuf) -> Self { Self::Single(mapping) } + /// Create a new multi mapping result pub fn new_multi(mappings: Vec<MappingBuf>) -> Self { Self::Multi(mappings) } + /// Check if the current result is a single mapping pub fn is_single(&self) -> bool { match self { MappingPatternResult::Single(_) => true, @@ -67,6 +70,7 @@ impl MappingPatternResult { } } + /// Check if the current result is a multi mapping pub fn is_multi(&self) -> bool { match self { MappingPatternResult::Single(_) => false, @@ -74,6 +78,7 @@ impl MappingPatternResult { } } + /// Extract the single mapping from the current result pub fn single(self) -> Option<MappingBuf> { match self { MappingPatternResult::Single(mapping) => Some(mapping), @@ -81,6 +86,7 @@ impl MappingPatternResult { } } + /// Extract the multi mapping from the current result pub fn multi(self) -> Option<Vec<MappingBuf>> { match self { MappingPatternResult::Single(_) => None, @@ -88,6 +94,7 @@ impl MappingPatternResult { } } + /// Ensure the current result is a multi mapping pub fn ensure_multi(self) -> Vec<MappingBuf> { match self { MappingPatternResult::Single(mapping) => vec![mapping], @@ -95,6 +102,7 @@ impl MappingPatternResult { } } + /// Unwrap as Single pub fn unwrap_single(self) -> MappingBuf { match self { MappingPatternResult::Single(mapping) => mapping, @@ -102,6 +110,7 @@ impl MappingPatternResult { } } + /// Unwrap as Multi pub fn unwrap_multi(self) -> Vec<MappingBuf> { match self { MappingPatternResult::Single(_) => { @@ -111,6 +120,7 @@ impl MappingPatternResult { } } + /// Unwrap as Single or return the provided single mapping pub fn unwrap_single_or(self, or: MappingBuf) -> MappingBuf { match self { MappingPatternResult::Single(mapping) => mapping, @@ -118,6 +128,7 @@ impl MappingPatternResult { } } + /// Unwrap as Multi or return the provided multi mapping pub fn unwrap_multi_or(self, or: Vec<MappingBuf>) -> Vec<MappingBuf> { match self { MappingPatternResult::Single(_) => or, @@ -125,6 +136,7 @@ impl MappingPatternResult { } } + /// Unwrap as Single or execute the provided function pub fn unwrap_single_or_else<F>(self, or: F) -> MappingBuf where F: FnOnce() -> MappingBuf, @@ -135,6 +147,7 @@ impl MappingPatternResult { } } + /// Unwrap as Multi or execute the provided function pub fn unwrap_multi_or_else<F>(self, or: F) -> Vec<MappingBuf> where F: FnOnce() -> Vec<MappingBuf>, @@ -145,6 +158,7 @@ impl MappingPatternResult { } } + /// Get the length of the current Result pub fn len(&self) -> usize { match self { MappingPatternResult::Single(_) => 1, @@ -152,6 +166,8 @@ impl MappingPatternResult { } } + /// Check if the current Result is empty + /// Only possible to be empty in Multi mode pub fn is_empty(&self) -> bool { match self { MappingPatternResult::Single(_) => false, diff --git a/systems/sheet/src/sheet.rs b/systems/sheet/src/sheet.rs index 54420ab..68c4c78 100644 --- a/systems/sheet/src/sheet.rs +++ b/systems/sheet/src/sheet.rs @@ -1 +1,374 @@ -pub struct Sheet {} +use std::{ + collections::HashSet, + fs::File, + path::{Path, PathBuf}, +}; + +use memmap2::Mmap; +use tokio::fs; + +use crate::{ + index_source::IndexSource, + mapping::{LocalMapping, LocalMappingForward, Mapping, MappingBuf}, + sheet::{ + error::{ReadSheetDataError, SheetEditError}, + reader::{read_mapping, read_sheet_data}, + writer::convert_sheet_data_to_bytes, + }, +}; + +pub mod constants; +pub mod error; +pub mod reader; +pub mod writer; + +#[cfg(test)] +pub mod test; + +#[derive(Default, Debug, Clone, PartialEq)] +pub struct Sheet { + /// Sheet Name + name: String, + + /// Data in the sheet + data: SheetData, + + /// Edit information + edit: SheetEdit, +} + +/// Full Sheet information +/// +/// Used to wrap as a Sheet object for editing and persistence +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct SheetData { + /// All local mappings + mappings: HashSet<LocalMapping>, +} + +/// Mmap for SheetData +pub struct SheetDataMmap { + mmap: Mmap, +} + +/// Editing state of the Sheet +/// +/// Stored in the Sheet, records the editing operations that **will** be performed on its SheetData +/// The content will be cleared after the edits are applied +#[derive(Default, Debug, Clone, PartialEq)] +pub struct SheetEdit { + /// Edit history + list: Vec<SheetEditItem>, +} + +#[derive(Default, Debug, Clone, PartialEq)] +pub enum SheetEditItem { + /// Do nothing, this entry is not included in checksum and audit + #[default] + DoNothing, + + /// Move a Mapping's Node to another Node + Move { + from_node: Vec<String>, + to_node: Vec<String>, + }, + + /// Swap the positions of two Mapping Nodes + Swap { + node_a: Vec<String>, + node_b: Vec<String>, + }, + + /// Erase the Mapping pointed to by a Node + EraseMapping { node: Vec<String> }, + + /// Insert a new Mapping + InsertMapping { mapping: LocalMapping }, + + /// Replace the IndexSource of a Mapping pointed to by a Node + ReplaceSource { + node: Vec<String>, + source: IndexSource, + }, + + /// Update the LocalMappingForward of a Mapping pointed to by a Node + UpdateForward { + node: Vec<String>, + forward: LocalMappingForward, + }, +} + +impl std::fmt::Display for SheetEditItem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SheetEditItem::DoNothing => write!(f, ""), + SheetEditItem::Move { from_node, to_node } => { + write!( + f, + "Move \"{}\" -> \"{}\"", + display_node_helper(from_node), + display_node_helper(to_node), + ) + } + SheetEditItem::Swap { node_a, node_b } => { + write!( + f, + "Swap \"{}\" <-> \"{}\"", + display_node_helper(node_a), + display_node_helper(node_b), + ) + } + SheetEditItem::EraseMapping { node } => { + write!(f, "Earse \"{}\"", display_node_helper(node),) + } + SheetEditItem::InsertMapping { mapping } => { + write!(f, "Insert {}", mapping.to_string()) + } + SheetEditItem::ReplaceSource { node, source } => { + write!( + f, + "Replace \"{}\" => \"{}\"", + display_node_helper(node), + source.to_string() + ) + } + SheetEditItem::UpdateForward { node, forward } => match forward { + LocalMappingForward::Latest => { + write!(f, "Update \"{}\" => Latest", display_node_helper(node)) + } + LocalMappingForward::Ref { sheet_name } => { + write!( + f, + "Update \"{}\" => Ref(\"{}\")", + display_node_helper(node), + sheet_name + ) + } + LocalMappingForward::Version { + version: version_name, + } => { + write!( + f, + "Update \"{}\" => Ver(\"{}\")", + display_node_helper(node), + version_name + ) + } + }, + } + } +} + +#[inline(always)] +fn display_node_helper(n: &Vec<String>) -> String { + n.join("/") +} + +impl SheetData { + /// Create an empty SheetData + pub fn empty() -> Self { + Self { + mappings: HashSet::new(), + } + } + + /// Read SheetData completely from the workspace + pub async fn full_read( + &mut self, + sheet_file: impl Into<PathBuf>, + ) -> Result<(), ReadSheetDataError> { + let file_data = fs::read(sheet_file.into()).await?; + let sheet_data = read_sheet_data(file_data.as_slice())?; + self.mappings = sheet_data.mappings; + Ok(()) + } + + /// Load MMAP from a Sheet file + pub fn mmap<'a>(sheet_file: impl AsRef<Path>) -> std::io::Result<SheetDataMmap> { + let file = File::open(sheet_file.as_ref())?; + + // SAFETY: The file has been successfully opened and is managed by the SheetDataMmap wrapper + let mmap = unsafe { Mmap::map(&file)? }; + Ok(SheetDataMmap { mmap }) + } + + /// Check if a mapping exists in SheetData + pub fn contains_mapping(&self, value: &Vec<String>) -> bool { + self.mappings.contains(value) + } + + /// Read local mapping information from SheetData + pub fn read_local_mapping(&self, value: &Vec<String>) -> Option<&LocalMapping> { + self.mappings.get(value) + } + + /// Wrap SheetData into a Sheet + pub fn pack(self, sheet_name: impl Into<String>) -> Sheet { + Sheet { + name: sheet_name.into(), + data: self, + edit: SheetEdit { list: Vec::new() }, + } + } +} + +impl SheetDataMmap { + /// Load mapping information from MMAP at high speed + pub fn mp<'a>( + &'a self, + node: &[&str], + ) -> Result<Option<(Mapping<'a>, LocalMappingForward)>, ReadSheetDataError> { + read_mapping(&self.mmap[..], node) + } + + /// Load mapping information from Sheet file at high speed and copy into LocalMapping + pub fn mp_c<'a>(&self, node: &[&str]) -> Result<Option<LocalMapping>, ReadSheetDataError> { + match self.mp(node)? { + Some((mapping, forward)) => { + // Note: + // Regarding the `unwrap()` here: + // Data is read from the original SheetData, it cannot produce values longer than `u8::MAX` + // It cannot trigger local_mapping's validity check, so it can be safely unwrapped + let local_mapping = mapping.to_local_mapping(forward).unwrap(); + + Ok(Some(local_mapping)) + } + None => Ok(None), + } + } +} + +impl Sheet { + /// Unpack Sheet into pure data + pub fn unpack(self) -> SheetData { + self.data + } + + /// Check if a mapping exists in the Sheet + pub fn contains_mapping(&self, value: &Vec<String>) -> bool { + self.data.contains_mapping(value) + } + + /// Read local mapping information from Sheet data + pub fn read_local_mapping(&self, value: &Vec<String>) -> Option<&LocalMapping> { + self.data.read_local_mapping(value) + } + + /// Read from Sheet data and clone into MappingBuf + pub fn read_mapping_buf(&self, value: &Vec<String>) -> Option<MappingBuf> { + match self.read_local_mapping(value) { + Some(v) => Some(v.to_mapping_buf_cloned(&self.name)), + None => None, + } + } + + /// Insert mapping modification + pub fn insert_mapping( + &mut self, + mapping: impl Into<LocalMapping>, + ) -> Result<(), SheetEditError> { + self.edit.list.push(SheetEditItem::InsertMapping { + mapping: mapping.into(), + }); + Ok(()) + } + + /// Insert mapping erasure + pub fn earse_mapping(&mut self, node: Vec<String>) -> Result<(), SheetEditError> { + self.edit.list.push(SheetEditItem::EraseMapping { node }); + Ok(()) + } + + /// Insert mapping swap + pub fn swap_mapping( + &mut self, + node_a: Vec<String>, + node_b: Vec<String>, + ) -> Result<(), SheetEditError> { + self.edit.list.push(SheetEditItem::Swap { node_a, node_b }); + Ok(()) + } + + /// Insert mapping move + pub fn move_mapping( + &mut self, + from_node: Vec<String>, + to_node: Vec<String>, + ) -> Result<(), SheetEditError> { + self.edit + .list + .push(SheetEditItem::Move { from_node, to_node }); + Ok(()) + } + + /// Replace source + pub fn replace_source( + &mut self, + node: Vec<String>, + source: IndexSource, + ) -> Result<(), SheetEditError> { + self.edit + .list + .push(SheetEditItem::ReplaceSource { node, source }); + Ok(()) + } + + /// Update forward + pub fn update_forward( + &mut self, + node: Vec<String>, + forward: LocalMappingForward, + ) -> Result<(), SheetEditError> { + self.edit + .list + .push(SheetEditItem::UpdateForward { node, forward }); + Ok(()) + } + + /// Apply changes + pub fn apply(&mut self) { + // Logic for applying changes + todo!(); + + // Clear the edit list + #[allow(unreachable_code)] // Note: Remove after todo!() is completed + self.edit.list.clear(); + } +} + +// Implement the as_bytes function for SheetData + +impl SheetData { + /// Convert SheetData to byte data for storage in the file system + pub fn as_bytes(self) -> Vec<u8> { + convert_sheet_data_to_bytes(self) + } +} + +impl From<SheetData> for Vec<u8> { + fn from(value: SheetData) -> Self { + value.as_bytes() + } +} + +impl From<&SheetData> for Vec<u8> { + fn from(value: &SheetData) -> Self { + value.clone().as_bytes() + } +} + +impl TryFrom<Vec<u8>> for SheetData { + type Error = ReadSheetDataError; + + fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> { + read_sheet_data(value.as_slice()) + } +} + +impl TryFrom<&[u8]> for SheetData { + type Error = ReadSheetDataError; + + fn try_from(value: &[u8]) -> Result<Self, Self::Error> { + read_sheet_data(value) + } +} diff --git a/systems/sheet/src/sheet/constants.rs b/systems/sheet/src/sheet/constants.rs new file mode 100644 index 0000000..69714bb --- /dev/null +++ b/systems/sheet/src/sheet/constants.rs @@ -0,0 +1,55 @@ +// Header (15: 1 + 2 + 4 + 4 + 4) +// +// [SHEET_VERSION: u8] +// [MAPPING_BUCKET_COUNT: u16] +// [INDEX_COUNT: u32] +// [OFFSET_MAPPING_DIR: u32] +// [OFFSET_INDEX_TABLE: u32] + +pub const CURRENT_SHEET_VERSION: u8 = 1; +pub const HEADER_SIZE: usize = 0 + + 1 // SHEET_VERSION + + 2 // MAPPING_BUCKET_COUNT + + 4 // INDEX_COUNT + + 4 // OFFSET_MAPPING_DIR + + 4 // OFFSET_INDEX_TABLE +; + +// Mapping Directory (12: 4 + 4 + 4) +// +// [BUCKET_HASH_PREFIX: u32] +// [BUCKET_OFFSET: u32] +// [BUCKET_LENGTH: u32] + +pub const MAPPING_DIR_ENTRY_SIZE: usize = 0 + + 4 // BUCKET_HASH_PREFIX + + 4 // BUCKET_OFFSET + + 4 // BUCKET_LENGTH +; + +// Mapping Buckets (6 + 1b + N) +// +// [KEY_LEN: u8] +// [FORWARD_TYPE: byte] +// [FORWARD_INFO_LEN: u8] +// [KEY_BYTES: ?] +// [FORWARD_INFO_BYTES: ?] +// [INDEX_OFFSET: u32] + +pub const MAPPING_BUCKET_MIN_SIZE: usize = 0 + + 1 // KEY_LEN + + 1 // FORWARD_TYPE + + 1 // FORWARD_INFO_LEN + + 2 // KEY_BYTES (MIN:1) + FORWARD_INFO_BYTES (MIN:1) + + 2 // INDEX_OFFSET +; + +// Index Table (6: 4 + 2) +// +// [INDEX_ID: u32] +// [INDEX_VERSION: u16] + +pub const INDEX_ENTRY_SIZE: usize = 0 + + 4 // INDEX_ID + + 2 // INDEX_VERSION +; diff --git a/systems/sheet/src/sheet/error.rs b/systems/sheet/src/sheet/error.rs new file mode 100644 index 0000000..79f7214 --- /dev/null +++ b/systems/sheet/src/sheet/error.rs @@ -0,0 +1,16 @@ +use crate::sheet::SheetEditItem; + +#[derive(Debug, thiserror::Error)] +pub enum SheetEditError { + #[error("Edit `{0}` Failed: Node already exists: `{1}`")] + NodeAlreadyExist(SheetEditItem, String), + + #[error("Edit `{0}` Failed: Node not found: `{1}`")] + NodeNotFound(SheetEditItem, String), +} + +#[derive(Debug, thiserror::Error)] +pub enum ReadSheetDataError { + #[error("IO error: {0}")] + IOErr(#[from] std::io::Error), +} diff --git a/systems/sheet/src/sheet/reader.rs b/systems/sheet/src/sheet/reader.rs new file mode 100644 index 0000000..d86b097 --- /dev/null +++ b/systems/sheet/src/sheet/reader.rs @@ -0,0 +1,627 @@ +use crate::{ + index_source::IndexSource, + mapping::{LocalMapping, LocalMappingForward, Mapping}, + sheet::{ + SheetData, + constants::{ + CURRENT_SHEET_VERSION, HEADER_SIZE, INDEX_ENTRY_SIZE, MAPPING_BUCKET_MIN_SIZE, + MAPPING_DIR_ENTRY_SIZE, + }, + error::ReadSheetDataError, + }, +}; +use std::collections::HashSet; + +/// Reconstruct complete SheetData from full sheet data +pub fn read_sheet_data(full_sheet_data: &[u8]) -> Result<SheetData, ReadSheetDataError> { + if full_sheet_data.len() < HEADER_SIZE { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Sheet data too small for header", + ) + .into()); + } + + // Read file header + let version = full_sheet_data[0]; + if version != CURRENT_SHEET_VERSION { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsupported sheet version: {}", version), + ) + .into()); + } + + let bucket_count = u16::from_le_bytes([full_sheet_data[1], full_sheet_data[2]]) as usize; + let index_count = u32::from_le_bytes([ + full_sheet_data[3], + full_sheet_data[4], + full_sheet_data[5], + full_sheet_data[6], + ]) as usize; + + let mapping_dir_offset = u32::from_le_bytes([ + full_sheet_data[7], + full_sheet_data[8], + full_sheet_data[9], + full_sheet_data[10], + ]) as usize; + + let index_table_offset = u32::from_le_bytes([ + full_sheet_data[11], + full_sheet_data[12], + full_sheet_data[13], + full_sheet_data[14], + ]) as usize; + + // Validate offsets + if mapping_dir_offset > full_sheet_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Mapping directory offset out of bounds", + ) + .into()); + } + + if index_table_offset > full_sheet_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Index table offset out of bounds", + ) + .into()); + } + + // Read index table + let index_sources = read_index_table(full_sheet_data, index_table_offset, index_count)?; + + // Read mapping directory and build all mappings + let mut mappings = HashSet::new(); + let mapping_dir_end = mapping_dir_offset + bucket_count * MAPPING_DIR_ENTRY_SIZE; + + if mapping_dir_end > full_sheet_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Mapping directory exceeds buffer", + ) + .into()); + } + + // Iterate through all buckets + for i in 0..bucket_count { + let dir_entry_offset = mapping_dir_offset + i * MAPPING_DIR_ENTRY_SIZE; + + // Skip BUCKET_HASH_PREFIX, directly read BUCKET_OFFSET and BUCKET_LENGTH + let bucket_offset = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset + 4], + full_sheet_data[dir_entry_offset + 5], + full_sheet_data[dir_entry_offset + 6], + full_sheet_data[dir_entry_offset + 7], + ]) as usize; + + let bucket_length = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset + 8], + full_sheet_data[dir_entry_offset + 9], + full_sheet_data[dir_entry_offset + 10], + full_sheet_data[dir_entry_offset + 11], + ]) as usize; + + // Read bucket data + if bucket_offset + bucket_length > full_sheet_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + format!("Bucket data exceeds buffer (bucket {})", i), + ) + .into()); + } + + let bucket_data = &full_sheet_data[bucket_offset..bucket_offset + bucket_length]; + let bucket_mappings = read_bucket_data(bucket_data, &index_sources)?; + + for mapping in bucket_mappings { + mappings.insert(mapping); + } + } + + Ok(SheetData { mappings }) +} + +/// Read mapping information for a specific node from complete sheet data +pub fn read_mapping<'a>( + full_sheet_data: &'a [u8], + node: &[&str], +) -> Result<Option<(Mapping<'a>, LocalMappingForward)>, ReadSheetDataError> { + if full_sheet_data.len() < HEADER_SIZE { + return Ok(None); + } + + // Read file header + let version = full_sheet_data[0]; + if version != CURRENT_SHEET_VERSION { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsupported sheet version: {}", version), + ) + .into()); + } + + let bucket_count = u16::from_le_bytes([full_sheet_data[1], full_sheet_data[2]]) as usize; + let index_count = u32::from_le_bytes([ + full_sheet_data[3], + full_sheet_data[4], + full_sheet_data[5], + full_sheet_data[6], + ]) as usize; + + let mapping_dir_offset = u32::from_le_bytes([ + full_sheet_data[7], + full_sheet_data[8], + full_sheet_data[9], + full_sheet_data[10], + ]) as usize; + + let index_table_offset = u32::from_le_bytes([ + full_sheet_data[11], + full_sheet_data[12], + full_sheet_data[13], + full_sheet_data[14], + ]) as usize; + + // Validate offsets + if mapping_dir_offset > full_sheet_data.len() || index_table_offset > full_sheet_data.len() { + return Ok(None); + } + + // Read index table + let index_sources = read_index_table(full_sheet_data, index_table_offset, index_count)?; + + // Calculate hash prefix for target node + let node_path: Vec<String> = node.iter().map(|s| s.to_string()).collect(); + let target_hash = crate::sheet::writer::calculate_path_hash(&node_path); + let target_bucket_key = target_hash >> 24; // Take high 8 bits as bucket key + + // Find corresponding bucket in mapping directory using binary search + let mapping_dir_end = mapping_dir_offset + bucket_count * MAPPING_DIR_ENTRY_SIZE; + if mapping_dir_end > full_sheet_data.len() { + return Ok(None); + } + + // Binary search for the bucket with matching hash prefix + let mut left = 0; + let mut right = bucket_count; + + while left < right { + let mid = left + (right - left) / 2; + let dir_entry_offset = mapping_dir_offset + mid * MAPPING_DIR_ENTRY_SIZE; + + let bucket_hash_prefix = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset], + full_sheet_data[dir_entry_offset + 1], + full_sheet_data[dir_entry_offset + 2], + full_sheet_data[dir_entry_offset + 3], + ]); + + if bucket_hash_prefix < target_bucket_key { + left = mid + 1; + } else if bucket_hash_prefix > target_bucket_key { + right = mid; + } else { + // Found matching bucket + let bucket_offset = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset + 4], + full_sheet_data[dir_entry_offset + 5], + full_sheet_data[dir_entry_offset + 6], + full_sheet_data[dir_entry_offset + 7], + ]) as usize; + + let bucket_length = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset + 8], + full_sheet_data[dir_entry_offset + 9], + full_sheet_data[dir_entry_offset + 10], + full_sheet_data[dir_entry_offset + 11], + ]) as usize; + + // Read bucket data and find target node + if bucket_offset + bucket_length > full_sheet_data.len() { + break; + } + + let bucket_data = &full_sheet_data[bucket_offset..bucket_offset + bucket_length]; + return find_mapping_in_bucket(bucket_data, node, &index_sources); + } + } + + Ok(None) +} + +/// Read index table +fn read_index_table( + data: &[u8], + offset: usize, + count: usize, +) -> Result<Vec<IndexSource>, ReadSheetDataError> { + let table_size = count * INDEX_ENTRY_SIZE; + if offset + table_size > data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Index table exceeds buffer", + ) + .into()); + } + + let mut sources = Vec::with_capacity(count); + let mut pos = offset; + + for _ in 0..count { + if pos + INDEX_ENTRY_SIZE > data.len() { + break; + } + + let id = u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]); + let ver = u16::from_le_bytes([data[pos + 4], data[pos + 5]]); + + sources.push(IndexSource::new(id, ver)); + pos += INDEX_ENTRY_SIZE; + } + + Ok(sources) +} + +/// Read all mappings in bucket data +fn read_bucket_data( + bucket_data: &[u8], + index_sources: &[IndexSource], +) -> Result<Vec<LocalMapping>, ReadSheetDataError> { + let mut mappings = Vec::new(); + let mut pos = 0; + + while pos < bucket_data.len() { + if pos + MAPPING_BUCKET_MIN_SIZE > bucket_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Incomplete mapping bucket entry", + ) + .into()); + } + + // Read mapping bucket entry header + let key_len = bucket_data[pos] as usize; + let forward_type = bucket_data[pos + 1]; + let forward_info_len = bucket_data[pos + 2] as usize; + + pos += 3; // KEY_LEN + FORWARD_TYPE + FORWARD_INFO_LEN + + // Check bounds + if pos + key_len > bucket_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Key data exceeds buffer", + ) + .into()); + } + + // Read key data (path) + let key_bytes = &bucket_data[pos..pos + key_len]; + let path = deserialize_path(key_bytes)?; + pos += key_len; + + // Read forward info data + if pos + forward_info_len > bucket_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Forward info data exceeds buffer", + ) + .into()); + } + + let forward_bytes = &bucket_data[pos..pos + forward_info_len]; + pos += forward_info_len; + + // Read index offset + if pos + 4 > bucket_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Index offset exceeds buffer", + ) + .into()); + } + + let index_offset = u32::from_le_bytes([ + bucket_data[pos], + bucket_data[pos + 1], + bucket_data[pos + 2], + bucket_data[pos + 3], + ]) as usize; + pos += 4; + + // Get index source + if index_offset >= index_sources.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid index offset: {}", index_offset), + ) + .into()); + } + + let source = index_sources[index_offset]; + + // Build forward info + let forward = LocalMappingForward::pack(forward_type, forward_bytes).ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Failed to unpack forward info", + ) + })?; + + // Create LocalMapping + let mapping = LocalMapping::new(path, source, forward).ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::InvalidData, "Failed to create mapping") + })?; + + mappings.push(mapping); + } + + Ok(mappings) +} + +/// Find mapping for specific node in bucket data +fn find_mapping_in_bucket<'a>( + bucket_data: &'a [u8], + node: &[&str], + index_sources: &[IndexSource], +) -> Result<Option<(Mapping<'a>, LocalMappingForward)>, ReadSheetDataError> { + let mut pos = 0; + + while pos < bucket_data.len() { + if pos + MAPPING_BUCKET_MIN_SIZE > bucket_data.len() { + break; + } + + // Read mapping bucket entry header + let key_len = bucket_data[pos] as usize; + let forward_type = bucket_data[pos + 1]; + let forward_info_len = bucket_data[pos + 2] as usize; + + let header_end = pos + 3; // KEY_LEN + FORWARD_TYPE + FORWARD_INFO_LEN + + // Check bounds + if header_end + key_len > bucket_data.len() { + break; + } + + // Read key data (path) + let key_bytes = &bucket_data[header_end..header_end + key_len]; + let current_path = deserialize_path(key_bytes)?; + + // Check if matches target node + if paths_match(¤t_path, node) { + // Read forward info data + let forward_start = header_end + key_len; + if forward_start + forward_info_len > bucket_data.len() { + break; + } + + let forward_bytes = &bucket_data[forward_start..forward_start + forward_info_len]; + + // Read index offset + let index_offset_pos = forward_start + forward_info_len; + if index_offset_pos + 4 > bucket_data.len() { + break; + } + + let index_offset = u32::from_le_bytes([ + bucket_data[index_offset_pos], + bucket_data[index_offset_pos + 1], + bucket_data[index_offset_pos + 2], + bucket_data[index_offset_pos + 3], + ]) as usize; + + // Get index source + if index_offset >= index_sources.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid index offset: {}", index_offset), + ) + .into()); + } + + let source = index_sources[index_offset]; + + // Build forward info + let forward = + LocalMappingForward::pack(forward_type, forward_bytes).ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Failed to unpack forward info", + ) + })?; + + // Create Mapping + let path_str = std::str::from_utf8(key_bytes).map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid UTF-8 in path: {}", e), + ) + })?; + let mapping = Mapping::new("", path_str, source); + + return Ok(Some((mapping, forward))); + } + + // Move to next mapping entry + // Entry size = 3 (header) + key_len + forward_info_len + 4 (index offset) + pos = header_end + key_len + forward_info_len + 4; + } + + Ok(None) +} + +/// Deserialize path +fn deserialize_path(bytes: &[u8]) -> Result<Vec<String>, ReadSheetDataError> { + let path_str = std::str::from_utf8(bytes).map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid UTF-8 in path: {}", e), + ) + })?; + + if path_str.is_empty() { + return Ok(Vec::new()); + } + + let segments: Vec<String> = path_str.split('/').map(|s| s.to_string()).collect(); + Ok(segments) +} + +/// Check if paths match +fn paths_match(path: &[String], node: &[&str]) -> bool { + if path.len() != node.len() { + return false; + } + + for (i, segment) in path.iter().enumerate() { + if segment != node[i] { + return false; + } + } + + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deserialize_path() { + let bytes = b"dir/subdir/file.txt"; + let path = deserialize_path(bytes).unwrap(); + assert_eq!(path, vec!["dir", "subdir", "file.txt"]); + } + + #[test] + fn test_paths_match() { + let path = vec!["dir".to_string(), "file.txt".to_string()]; + let node = &["dir", "file.txt"]; + assert!(paths_match(&path, node)); + + let node2 = &["dir", "other.txt"]; + assert!(!paths_match(&path, node2)); + } + + #[test] + fn test_read_index_table() { + let mut data = Vec::new(); + data.extend_from_slice(&123u32.to_le_bytes()); + data.extend_from_slice(&456u16.to_le_bytes()); + data.extend_from_slice(&789u32.to_le_bytes()); + data.extend_from_slice(&1011u16.to_le_bytes()); + + let sources = read_index_table(&data, 0, 2).unwrap(); + assert_eq!(sources.len(), 2); + assert_eq!(sources[0].id(), 123); + assert_eq!(sources[0].version(), 456); + assert_eq!(sources[1].id(), 789); + assert_eq!(sources[1].version(), 1011); + } + + #[test] + fn test_read_bucket_data() { + // Create simple bucket data + let mut bucket_data = Vec::new(); + + // First mapping + let path1 = b"dir/file.txt"; + bucket_data.push(path1.len() as u8); // KEY_LEN + bucket_data.push(0); // FORWARD_TYPE (Latest) + bucket_data.push(0); // FORWARD_INFO_LEN + bucket_data.extend_from_slice(path1); // KEY_BYTES + bucket_data.extend_from_slice(&0u32.to_le_bytes()); // INDEX_OFFSET + + // Second mapping + let path2 = b"other/test.txt"; + bucket_data.push(path2.len() as u8); // KEY_LEN + bucket_data.push(0); // FORWARD_TYPE (Latest) + bucket_data.push(0); // FORWARD_INFO_LEN + bucket_data.extend_from_slice(path2); // KEY_BYTES + bucket_data.extend_from_slice(&1u32.to_le_bytes()); // INDEX_OFFSET + + let index_sources = vec![IndexSource::new(1, 1), IndexSource::new(2, 1)]; + + let mappings = read_bucket_data(&bucket_data, &index_sources).unwrap(); + assert_eq!(mappings.len(), 2); + + // Verify first mapping + assert_eq!( + mappings[0].value(), + &["dir".to_string(), "file.txt".to_string()] + ); + assert_eq!(mappings[0].index_source().id(), 1); + + // Verify second mapping + assert_eq!( + mappings[1].value(), + &["other".to_string(), "test.txt".to_string()] + ); + assert_eq!(mappings[1].index_source().id(), 2); + } + + #[test] + fn test_binary_search_bucket_lookup() { + use crate::sheet::writer::convert_sheet_data_to_bytes; + + // Create test sheet data with multiple buckets + let mut sheet_data = crate::sheet::SheetData::empty(); + + // Add mappings that will go to different buckets + let mapping1 = crate::mapping::LocalMapping::new( + vec!["aaa".to_string(), "file1.txt".to_string()], + crate::index_source::IndexSource::new(1, 1), + crate::mapping::LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping2 = crate::mapping::LocalMapping::new( + vec!["mmm".to_string(), "file2.txt".to_string()], + crate::index_source::IndexSource::new(2, 2), + crate::mapping::LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping3 = crate::mapping::LocalMapping::new( + vec!["zzz".to_string(), "file3.txt".to_string()], + crate::index_source::IndexSource::new(3, 3), + crate::mapping::LocalMappingForward::Latest, + ) + .unwrap(); + + sheet_data.mappings.insert(mapping1.clone()); + sheet_data.mappings.insert(mapping2.clone()); + sheet_data.mappings.insert(mapping3.clone()); + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data); + + // Test finding each mapping using binary search + let node1 = &["aaa", "file1.txt"]; + let result1 = read_mapping(&bytes, node1).unwrap(); + assert!(result1.is_some(), "Should find mapping for aaa/file1.txt"); + + let node2 = &["mmm", "file2.txt"]; + let result2 = read_mapping(&bytes, node2).unwrap(); + assert!(result2.is_some(), "Should find mapping for mmm/file2.txt"); + + let node3 = &["zzz", "file3.txt"]; + let result3 = read_mapping(&bytes, node3).unwrap(); + assert!(result3.is_some(), "Should find mapping for zzz/file3.txt"); + + // Test non-existent mapping + let node4 = &["xxx", "notfound.txt"]; + let result4 = read_mapping(&bytes, node4).unwrap(); + assert!(result4.is_none(), "Should not find non-existent mapping"); + + // Test that binary search handles empty data + let empty_bytes = convert_sheet_data_to_bytes(crate::sheet::SheetData::empty()); + let result5 = read_mapping(&empty_bytes, node1).unwrap(); + assert!(result5.is_none(), "Should not find anything in empty sheet"); + } +} diff --git a/systems/sheet/src/sheet/test.rs b/systems/sheet/src/sheet/test.rs new file mode 100644 index 0000000..ae20be5 --- /dev/null +++ b/systems/sheet/src/sheet/test.rs @@ -0,0 +1,460 @@ +use hex_display::hex_display_slice; + +use crate::{ + index_source::IndexSource, + mapping::{LocalMapping, LocalMappingForward}, + sheet::{ + SheetData, constants::HEADER_SIZE, reader::read_sheet_data, + writer::convert_sheet_data_to_bytes, + }, +}; +use std::collections::HashSet; +use std::fs; + +/// Test writing and re-reading sheet data +#[test] +fn test_sheet_data_roundtrip() { + // Create test data + let _sheet_data = SheetData::empty(); + + // Create some test mappings + let mapping1 = LocalMapping::new( + vec!["src".to_string(), "main.rs".to_string()], + IndexSource::new(1001, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping2 = LocalMapping::new( + vec!["docs".to_string(), "README.md".to_string()], + IndexSource::new(1002, 2), + LocalMappingForward::Ref { + sheet_name: "reference".to_string(), + }, + ) + .unwrap(); + + let mapping3 = LocalMapping::new( + vec![ + "assets".to_string(), + "images".to_string(), + "logo.png".to_string(), + ], + IndexSource::new(1003, 3), + LocalMappingForward::Version { version: 12345 }, + ) + .unwrap(); + + // Add mappings to SheetData + // Note: Since the mappings field of SheetData is private, we need to create SheetData in another way + // Here we directly create a new HashSet + let mut mappings = HashSet::new(); + mappings.insert(mapping1.clone()); + mappings.insert(mapping2.clone()); + mappings.insert(mapping3.clone()); + + let sheet_data = SheetData { mappings }; + + // Convert SheetData to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Verify byte data is not empty + assert!(!bytes.is_empty(), "Converted bytes should not be empty"); + + // Verify file header + assert_eq!(bytes[0], 1, "Sheet version should be 1"); + + // Re-read SheetData from bytes + let restored_sheet_data = + read_sheet_data(&bytes).expect("Failed to read sheet data from bytes"); + + // Verify mapping count + assert_eq!( + restored_sheet_data.mappings.len(), + sheet_data.mappings.len(), + "Restored sheet should have same number of mappings" + ); + + // Verify each mapping exists + for mapping in &sheet_data.mappings { + assert!( + restored_sheet_data.mappings.contains(mapping), + "Restored sheet should contain mapping: {:?}", + mapping + ); + } + + // Verify specific mapping content + for mapping in &restored_sheet_data.mappings { + // Find original mapping + let original_mapping = sheet_data.mappings.get(mapping.value()).unwrap(); + + // Verify path + assert_eq!( + mapping.value(), + original_mapping.value(), + "Path should match" + ); + + // Verify index source + assert_eq!( + mapping.index_source().id(), + original_mapping.index_source().id(), + "Index source ID should match" + ); + + assert_eq!( + mapping.index_source().version(), + original_mapping.index_source().version(), + "Index source version should match" + ); + + // Verify forward information + let (original_type, _, _) = original_mapping.forward().unpack(); + let (restored_type, _, _) = mapping.forward().unpack(); + assert_eq!(restored_type, original_type, "Forward type should match"); + } +} + +/// Test reading and writing empty sheet data +#[test] +fn test_empty_sheet_roundtrip() { + // Create empty SheetData + let sheet_data = SheetData::empty(); + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Verify file header + assert_eq!(bytes.len(), 15, "Empty sheet should have header size only"); + assert_eq!(bytes[0], 1, "Sheet version should be 1"); + + // Verify offsets - For empty sheet, mapping data offset and index table offset should be the same + let mapping_data_offset = + u32::from_le_bytes([bytes[7], bytes[8], bytes[9], bytes[10]]) as usize; + let index_table_offset = + u32::from_le_bytes([bytes[11], bytes[12], bytes[13], bytes[14]]) as usize; + assert_eq!( + mapping_data_offset, index_table_offset, + "For empty sheet, both offsets should be the same" + ); + assert_eq!( + mapping_data_offset, HEADER_SIZE, + "Offsets should point to end of header" + ); + + // Mapping count should be 0 + let mapping_count = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]); + assert_eq!(mapping_count, 0, "Mapping count should be 0"); + + // Index source count should be 0 + let index_count = u16::from_le_bytes([bytes[5], bytes[6]]); + assert_eq!(index_count, 0, "Index count should be 0"); + + // Re-read + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read empty sheet data"); + + // Verify it's empty + assert!( + restored_sheet_data.mappings.is_empty(), + "Restored empty sheet should have no mappings" + ); +} + +/// Test reading and writing a single mapping +#[test] +fn test_single_mapping_roundtrip() { + // Create a single mapping + let mapping = LocalMapping::new( + vec!["test.txt".to_string()], + IndexSource::new(999, 42), + LocalMappingForward::Latest, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping.clone()); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Re-read + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); + + // Verify + assert_eq!(restored_sheet_data.mappings.len(), 1); + let restored_mapping = restored_sheet_data.mappings.iter().next().unwrap(); + + assert_eq!(restored_mapping.value(), &["test.txt".to_string()]); + assert_eq!(restored_mapping.index_source().id(), 999); + assert_eq!(restored_mapping.index_source().version(), 42); + + let (forward_type, _, _) = restored_mapping.forward().unpack(); + assert_eq!(forward_type, 0); // Latest type id is 0 +} + +/// Test file system read/write +#[test] +fn test_file_system_roundtrip() { + // Create test data + let mapping1 = LocalMapping::new( + vec!["file0.txt".to_string()], + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping2 = LocalMapping::new( + vec!["dir1".to_string(), "file1.txt".to_string()], + IndexSource::new(2, 2), + LocalMappingForward::Ref { + sheet_name: "other".to_string(), + }, + ) + .unwrap(); + + let mapping3 = LocalMapping::new( + vec!["dir2".to_string(), "file2.txt".to_string()], + IndexSource::new(3, 3), + LocalMappingForward::Version { version: 35 }, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping1.clone()); + mappings.insert(mapping2.clone()); + mappings.insert(mapping3.clone()); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Write to file + let test_file_path = ".temp/test.sheet"; + let test_file_path_hex = ".temp/test_hex.txt"; + + // Ensure directory exists + if let Some(parent) = std::path::Path::new(test_file_path).parent() { + fs::create_dir_all(parent).expect("Failed to create test directory"); + } + + fs::write(test_file_path, &bytes).expect("Failed to write test file"); + fs::write(test_file_path_hex, hex_display_slice(&bytes)).expect("Failed to write test file"); + + // Read file + let file_bytes = fs::read(test_file_path).expect("Failed to read test file"); + + // Verify file content matches original bytes + assert_eq!( + file_bytes, bytes, + "File content should match original bytes" + ); + + // Re-read SheetData from file bytes + let restored_from_file = read_sheet_data(&file_bytes).expect("Failed to read from file bytes"); + + // Use SheetData's Eq trait for direct comparison + assert_eq!( + restored_from_file, sheet_data, + "Restored sheet data should be equal to original" + ); + + // Verify mappings in SheetData read from file + // Check if each original mapping can be found in restored data + for original_mapping in &sheet_data.mappings { + let found = restored_from_file + .mappings + .iter() + .any(|m| m == original_mapping); + assert!( + found, + "Original mapping {:?} should be present in restored sheet data", + original_mapping + ); + } + + // Also check if each mapping in restored data can be found in original data + for restored_mapping in &restored_from_file.mappings { + let found = sheet_data.mappings.iter().any(|m| m == restored_mapping); + assert!( + found, + "Restored mapping {:?} should be present in original sheet data", + restored_mapping + ); + } + + // Test file remains in .temp/test.sheet for subsequent inspection + // Note: Need to manually clean up .temp directory before next test run +} + +/// Test reading and writing different forward types +#[test] +fn test_different_forward_types() { + // Test Latest type + let mapping_latest = LocalMapping::new( + vec!["latest.txt".to_string()], + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + // Test Ref type + let mapping_ref = LocalMapping::new( + vec!["ref.txt".to_string()], + IndexSource::new(2, 2), + LocalMappingForward::Ref { + sheet_name: "reference_sheet".to_string(), + }, + ) + .unwrap(); + + // Test Version type + let mapping_version = LocalMapping::new( + vec!["version.txt".to_string()], + IndexSource::new(3, 3), + LocalMappingForward::Version { version: 54321 }, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping_latest.clone()); + mappings.insert(mapping_ref.clone()); + mappings.insert(mapping_version.clone()); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes and re-read + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); + + // Verify all mappings exist + assert_eq!(restored_sheet_data.mappings.len(), 3); + + // Verify Latest type + let restored_latest = restored_sheet_data + .mappings + .get(&vec!["latest.txt".to_string()]) + .unwrap(); + let (latest_type, latest_len, _) = restored_latest.forward().unpack(); + assert_eq!(latest_type, 0); + assert_eq!(latest_len, 0); + + // Verify Ref type + let restored_ref = restored_sheet_data + .mappings + .get(&vec!["ref.txt".to_string()]) + .unwrap(); + let (ref_type, ref_len, ref_bytes) = restored_ref.forward().unpack(); + assert_eq!(ref_type, 1); + assert_eq!(ref_len as usize, "reference_sheet".len()); + assert_eq!(String::from_utf8(ref_bytes).unwrap(), "reference_sheet"); + + // Verify Version type + let restored_version = restored_sheet_data + .mappings + .get(&vec!["version.txt".to_string()]) + .unwrap(); + let (version_type, version_len, version_bytes) = restored_version.forward().unpack(); + assert_eq!(version_type, 2); + assert_eq!(version_len, 2); // u16 is 2 bytes + assert_eq!(u16::from_be_bytes(version_bytes.try_into().unwrap()), 54321); +} + +/// Test duplicate index source optimization +#[test] +fn test_duplicate_index_source_optimization() { + // Create multiple mappings sharing the same index source + let shared_source = IndexSource::new(777, 88); + + let mapping1 = LocalMapping::new( + vec!["file1.txt".to_string()], + shared_source, + LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping2 = LocalMapping::new( + vec!["file2.txt".to_string()], + shared_source, + LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping3 = LocalMapping::new( + vec!["file3.txt".to_string()], + shared_source, + LocalMappingForward::Latest, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping1); + mappings.insert(mapping2); + mappings.insert(mapping3); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Verify index table should have only one entry + let index_count = u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]); + assert_eq!(index_count, 1, "Should have only one unique index source"); + + // Re-read and verify + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); + assert_eq!(restored_sheet_data.mappings.len(), 3); + + // Verify all mappings use the same index source + for mapping in &restored_sheet_data.mappings { + assert_eq!(mapping.index_source().id(), 777); + assert_eq!(mapping.index_source().version(), 88); + } +} + +/// Test path serialization and deserialization +#[test] +fn test_path_serialization_deserialization() { + // Test various paths + let test_cases = vec![ + vec!["single".to_string()], + vec!["dir".to_string(), "file.txt".to_string()], + vec![ + "a".to_string(), + "b".to_string(), + "c".to_string(), + "d.txt".to_string(), + ], + vec!["with spaces".to_string(), "file name.txt".to_string()], + vec!["unicode".to_string(), "文件.txt".to_string()], + ]; + + for path in test_cases { + let mapping = LocalMapping::new( + path.clone(), + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes and re-read + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); + + // Verify path + let restored_mapping = restored_sheet_data.mappings.iter().next().unwrap(); + assert_eq!( + restored_mapping.value(), + &path, + "Path should be preserved after roundtrip" + ); + } +} diff --git a/systems/sheet/src/sheet/writer.rs b/systems/sheet/src/sheet/writer.rs new file mode 100644 index 0000000..5d9b257 --- /dev/null +++ b/systems/sheet/src/sheet/writer.rs @@ -0,0 +1,264 @@ +use crate::index_source::IndexSource; +use crate::mapping::LocalMapping; +use crate::sheet::SheetData; +use crate::sheet::constants::{ + CURRENT_SHEET_VERSION, HEADER_SIZE, INDEX_ENTRY_SIZE, MAPPING_DIR_ENTRY_SIZE, +}; +use sha2::{Digest, Sha256}; +use std::collections::{BTreeMap, HashMap}; + +/// Convert SheetData to byte array +pub fn convert_sheet_data_to_bytes(sheet_data: SheetData) -> Vec<u8> { + // Collect all mappings + let mappings: Vec<LocalMapping> = sheet_data.mappings.into_iter().collect(); + + // Collect all unique index sources + let mut index_sources = Vec::new(); + let mut source_to_offset = HashMap::new(); + + for mapping in &mappings { + let source = mapping.index_source(); + let key = (source.id(), source.version()); + if !source_to_offset.contains_key(&key) { + let offset = index_sources.len() as u32; + source_to_offset.insert(key, offset); + index_sources.push(IndexSource::new(source.id(), source.version())); + } + } + + let index_count = index_sources.len() as u32; + + // 1. Organize mappings into hash buckets + let mut buckets: BTreeMap<u32, Vec<LocalMapping>> = BTreeMap::new(); + for mapping in mappings { + let hash = calculate_path_hash(mapping.value()); + let bucket_key = hash >> 24; // Take high 8 bits as bucket key + buckets + .entry(bucket_key) + .or_insert_with(Vec::new) + .push(mapping); + } + + let bucket_count = buckets.len() as u16; + + // 2. Calculate offsets for each section + let header_size = HEADER_SIZE; + let mapping_dir_offset = header_size; + let mapping_dir_size = bucket_count as usize * MAPPING_DIR_ENTRY_SIZE; + let index_table_offset = mapping_dir_offset + mapping_dir_size; + let index_table_size = index_count as usize * INDEX_ENTRY_SIZE; + + // 3. Calculate bucket data offsets + let mut bucket_data_offset = index_table_offset + index_table_size; + let mut bucket_entries = Vec::new(); + + // Prepare data for each bucket + for (&bucket_key, bucket_mappings) in &buckets { + // Calculate bucket data size + let mut bucket_data = Vec::new(); + for mapping in bucket_mappings { + write_mapping_bucket(&mut bucket_data, mapping, &source_to_offset); + } + + let bucket_length = bucket_data.len() as u32; + bucket_entries.push((bucket_key, bucket_data_offset, bucket_length, bucket_data)); + bucket_data_offset += bucket_length as usize; + } + + // 4. Build result + let total_size = bucket_data_offset; + let mut result = Vec::with_capacity(total_size); + + // 5. File header + result.push(CURRENT_SHEET_VERSION); // Version (1 byte) + result.extend_from_slice(&bucket_count.to_le_bytes()); // Mapping bucket count (2 bytes) + result.extend_from_slice(&index_count.to_le_bytes()); // Index count (4 bytes) + result.extend_from_slice(&(mapping_dir_offset as u32).to_le_bytes()); // Mapping directory offset (4 bytes) + result.extend_from_slice(&(index_table_offset as u32).to_le_bytes()); // Index table offset (4 bytes) + + // 6. Mapping directory + for (bucket_key, bucket_offset, bucket_length, _) in &bucket_entries { + result.extend_from_slice(&bucket_key.to_le_bytes()); // Bucket hash prefix (4 bytes) + result.extend_from_slice(&(*bucket_offset as u32).to_le_bytes()); // Bucket offset (4 bytes) + result.extend_from_slice(&bucket_length.to_le_bytes()); // Bucket length (4 bytes) + } + + // 7. Index table + for source in &index_sources { + result.extend_from_slice(&source.id().to_le_bytes()); // Index ID (4 bytes) + result.extend_from_slice(&source.version().to_le_bytes()); // Index version (2 bytes) + } + + // 8. Bucket data + for (_, _, _, bucket_data) in bucket_entries { + result.extend_from_slice(&bucket_data); + } + + result +} + +/// Calculate path hash (SHA256, take first 4 bytes) +pub fn calculate_path_hash(path: &[String]) -> u32 { + let mut hasher = Sha256::new(); + for segment in path { + hasher.update(segment.as_bytes()); + hasher.update(b"/"); + } + let result = hasher.finalize(); + u32::from_le_bytes([result[0], result[1], result[2], result[3]]) +} + +/// Write single mapping to bucket data +fn write_mapping_bucket( + result: &mut Vec<u8>, + mapping: &LocalMapping, + source_to_offset: &HashMap<(u32, u16), u32>, +) { + // Serialize path + let path_bytes = serialize_path(mapping.value()); + let path_len = path_bytes.len(); + + // Get forward information + let (forward_type, forward_info_len, forward_bytes) = mapping.forward().unpack(); + + // Get index offset + let source = mapping.index_source(); + let key = (source.id(), source.version()); + let index_offset = source_to_offset.get(&key).unwrap(); + + // Write mapping bucket entry + result.push(path_len as u8); // Key length (1 byte) + result.push(forward_type); // Forward type (1 byte) + result.push(forward_info_len); // Forward info length (1 byte) + + // Write key data (path) + result.extend_from_slice(&path_bytes); + + // Write forward info data + result.extend_from_slice(&forward_bytes); + + // Write index offset + result.extend_from_slice(&index_offset.to_le_bytes()); // Index offset (4 bytes) +} + +/// Serialize path to byte array +fn serialize_path(path: &[String]) -> Vec<u8> { + let mut result = Vec::new(); + for (i, segment) in path.iter().enumerate() { + result.extend_from_slice(segment.as_bytes()); + if i < path.len() - 1 { + result.push(b'/'); + } + } + result +} + +/// Test only: Calculate single mapping bucket entry size +#[cfg(test)] +fn calculate_mapping_bucket_size(mapping: &LocalMapping) -> usize { + use crate::sheet::constants::MAPPING_BUCKET_MIN_SIZE; + + let path_size = serialize_path(mapping.value()).len(); + let (_, forward_info_len, _) = mapping.forward().unpack(); + + MAPPING_BUCKET_MIN_SIZE + path_size + forward_info_len as usize +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{mapping::LocalMappingForward, sheet::constants::MAPPING_BUCKET_MIN_SIZE}; + + #[test] + fn test_serialize_path() { + let path = vec![ + "dir".to_string(), + "subdir".to_string(), + "file.txt".to_string(), + ]; + let bytes = serialize_path(&path); + assert_eq!(bytes, b"dir/subdir/file.txt"); + } + + #[test] + fn test_calculate_path_hash() { + let path1 = vec!["test".to_string(), "file.txt".to_string()]; + let path2 = vec!["test".to_string(), "file.txt".to_string()]; + let path3 = vec!["other".to_string(), "file.txt".to_string()]; + + let hash1 = calculate_path_hash(&path1); + let hash2 = calculate_path_hash(&path2); + let hash3 = calculate_path_hash(&path3); + + assert_eq!(hash1, hash2); + assert_ne!(hash1, hash3); + } + + #[test] + fn test_calculate_mapping_bucket_size() { + let mapping = LocalMapping::new( + vec!["test".to_string(), "file.txt".to_string()], + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + let size = calculate_mapping_bucket_size(&mapping); + // 13 == "test/file.txt".len() + assert_eq!(size, MAPPING_BUCKET_MIN_SIZE + 13); + } + + #[test] + fn test_convert_empty_sheet() { + let sheet_data = SheetData::empty(); + let bytes = convert_sheet_data_to_bytes(sheet_data); + + // Verify file header + assert_eq!(bytes[0], CURRENT_SHEET_VERSION); // Version + assert_eq!(u16::from_le_bytes([bytes[1], bytes[2]]), 0); // Mapping bucket count + assert_eq!( + u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]), + 0 + ); // Index count + + // Total size should be HEADER_SIZE + assert_eq!(bytes.len(), HEADER_SIZE); + } + + #[test] + fn test_convert_sheet_with_one_mapping() { + let mut sheet_data = SheetData::empty(); + let mapping = LocalMapping::new( + vec!["dir".to_string(), "file.txt".to_string()], + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + sheet_data.mappings.insert(mapping); + + let bytes = convert_sheet_data_to_bytes(sheet_data); + + // Verify file header + assert_eq!(bytes[0], CURRENT_SHEET_VERSION); // Version + assert_eq!(u16::from_le_bytes([bytes[1], bytes[2]]), 1); // Should have 1 bucket + assert_eq!( + u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]), + 1 + ); // 1 index source + + // Verify mapping directory + let mapping_dir_offset = HEADER_SIZE; + + // Bucket offset should point after the index table + let index_table_offset = + u32::from_le_bytes([bytes[11], bytes[12], bytes[13], bytes[14]]) as usize; + let bucket_offset = u32::from_le_bytes([ + bytes[mapping_dir_offset + 4], + bytes[mapping_dir_offset + 5], + bytes[mapping_dir_offset + 6], + bytes[mapping_dir_offset + 7], + ]) as usize; + + assert!(bucket_offset >= index_table_offset + INDEX_ENTRY_SIZE); + } +} |
