From 00aa123a7cd74ee0307157fdbb31679c01f1db33 Mon Sep 17 00:00:00 2001 From: 魏曹先生 <1992414357@qq.com> Date: Tue, 24 Feb 2026 18:22:42 +0800 Subject: Move sheet reader and writer to v1 subdirectory --- systems/sheet/src/sheet/constants.rs | 55 --- systems/sheet/src/sheet/reader.rs | 627 -------------------------------- systems/sheet/src/sheet/test.rs | 460 ----------------------- systems/sheet/src/sheet/v1/constants.rs | 55 +++ systems/sheet/src/sheet/v1/reader.rs | 627 ++++++++++++++++++++++++++++++++ systems/sheet/src/sheet/v1/test.rs | 460 +++++++++++++++++++++++ systems/sheet/src/sheet/v1/writer.rs | 264 ++++++++++++++ systems/sheet/src/sheet/writer.rs | 264 -------------- 8 files changed, 1406 insertions(+), 1406 deletions(-) delete mode 100644 systems/sheet/src/sheet/constants.rs delete mode 100644 systems/sheet/src/sheet/reader.rs delete mode 100644 systems/sheet/src/sheet/test.rs create mode 100644 systems/sheet/src/sheet/v1/constants.rs create mode 100644 systems/sheet/src/sheet/v1/reader.rs create mode 100644 systems/sheet/src/sheet/v1/test.rs create mode 100644 systems/sheet/src/sheet/v1/writer.rs delete mode 100644 systems/sheet/src/sheet/writer.rs (limited to 'systems/sheet') diff --git a/systems/sheet/src/sheet/constants.rs b/systems/sheet/src/sheet/constants.rs deleted file mode 100644 index 69714bb..0000000 --- a/systems/sheet/src/sheet/constants.rs +++ /dev/null @@ -1,55 +0,0 @@ -// Header (15: 1 + 2 + 4 + 4 + 4) -// -// [SHEET_VERSION: u8] -// [MAPPING_BUCKET_COUNT: u16] -// [INDEX_COUNT: u32] -// [OFFSET_MAPPING_DIR: u32] -// [OFFSET_INDEX_TABLE: u32] - -pub const CURRENT_SHEET_VERSION: u8 = 1; -pub const HEADER_SIZE: usize = 0 - + 1 // SHEET_VERSION - + 2 // MAPPING_BUCKET_COUNT - + 4 // INDEX_COUNT - + 4 // OFFSET_MAPPING_DIR - + 4 // OFFSET_INDEX_TABLE -; - -// Mapping Directory (12: 4 + 4 + 4) -// -// [BUCKET_HASH_PREFIX: u32] -// [BUCKET_OFFSET: u32] -// [BUCKET_LENGTH: u32] - -pub const MAPPING_DIR_ENTRY_SIZE: usize = 0 - + 4 // BUCKET_HASH_PREFIX - + 4 // BUCKET_OFFSET - + 4 // BUCKET_LENGTH -; - -// Mapping Buckets (6 + 1b + N) -// -// [KEY_LEN: u8] -// [FORWARD_TYPE: byte] -// [FORWARD_INFO_LEN: u8] -// [KEY_BYTES: ?] -// [FORWARD_INFO_BYTES: ?] -// [INDEX_OFFSET: u32] - -pub const MAPPING_BUCKET_MIN_SIZE: usize = 0 - + 1 // KEY_LEN - + 1 // FORWARD_TYPE - + 1 // FORWARD_INFO_LEN - + 2 // KEY_BYTES (MIN:1) + FORWARD_INFO_BYTES (MIN:1) - + 2 // INDEX_OFFSET -; - -// Index Table (6: 4 + 2) -// -// [INDEX_ID: u32] -// [INDEX_VERSION: u16] - -pub const INDEX_ENTRY_SIZE: usize = 0 - + 4 // INDEX_ID - + 2 // INDEX_VERSION -; diff --git a/systems/sheet/src/sheet/reader.rs b/systems/sheet/src/sheet/reader.rs deleted file mode 100644 index d86b097..0000000 --- a/systems/sheet/src/sheet/reader.rs +++ /dev/null @@ -1,627 +0,0 @@ -use crate::{ - index_source::IndexSource, - mapping::{LocalMapping, LocalMappingForward, Mapping}, - sheet::{ - SheetData, - constants::{ - CURRENT_SHEET_VERSION, HEADER_SIZE, INDEX_ENTRY_SIZE, MAPPING_BUCKET_MIN_SIZE, - MAPPING_DIR_ENTRY_SIZE, - }, - error::ReadSheetDataError, - }, -}; -use std::collections::HashSet; - -/// Reconstruct complete SheetData from full sheet data -pub fn read_sheet_data(full_sheet_data: &[u8]) -> Result { - if full_sheet_data.len() < HEADER_SIZE { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Sheet data too small for header", - ) - .into()); - } - - // Read file header - let version = full_sheet_data[0]; - if version != CURRENT_SHEET_VERSION { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Unsupported sheet version: {}", version), - ) - .into()); - } - - let bucket_count = u16::from_le_bytes([full_sheet_data[1], full_sheet_data[2]]) as usize; - let index_count = u32::from_le_bytes([ - full_sheet_data[3], - full_sheet_data[4], - full_sheet_data[5], - full_sheet_data[6], - ]) as usize; - - let mapping_dir_offset = u32::from_le_bytes([ - full_sheet_data[7], - full_sheet_data[8], - full_sheet_data[9], - full_sheet_data[10], - ]) as usize; - - let index_table_offset = u32::from_le_bytes([ - full_sheet_data[11], - full_sheet_data[12], - full_sheet_data[13], - full_sheet_data[14], - ]) as usize; - - // Validate offsets - if mapping_dir_offset > full_sheet_data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Mapping directory offset out of bounds", - ) - .into()); - } - - if index_table_offset > full_sheet_data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Index table offset out of bounds", - ) - .into()); - } - - // Read index table - let index_sources = read_index_table(full_sheet_data, index_table_offset, index_count)?; - - // Read mapping directory and build all mappings - let mut mappings = HashSet::new(); - let mapping_dir_end = mapping_dir_offset + bucket_count * MAPPING_DIR_ENTRY_SIZE; - - if mapping_dir_end > full_sheet_data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Mapping directory exceeds buffer", - ) - .into()); - } - - // Iterate through all buckets - for i in 0..bucket_count { - let dir_entry_offset = mapping_dir_offset + i * MAPPING_DIR_ENTRY_SIZE; - - // Skip BUCKET_HASH_PREFIX, directly read BUCKET_OFFSET and BUCKET_LENGTH - let bucket_offset = u32::from_le_bytes([ - full_sheet_data[dir_entry_offset + 4], - full_sheet_data[dir_entry_offset + 5], - full_sheet_data[dir_entry_offset + 6], - full_sheet_data[dir_entry_offset + 7], - ]) as usize; - - let bucket_length = u32::from_le_bytes([ - full_sheet_data[dir_entry_offset + 8], - full_sheet_data[dir_entry_offset + 9], - full_sheet_data[dir_entry_offset + 10], - full_sheet_data[dir_entry_offset + 11], - ]) as usize; - - // Read bucket data - if bucket_offset + bucket_length > full_sheet_data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - format!("Bucket data exceeds buffer (bucket {})", i), - ) - .into()); - } - - let bucket_data = &full_sheet_data[bucket_offset..bucket_offset + bucket_length]; - let bucket_mappings = read_bucket_data(bucket_data, &index_sources)?; - - for mapping in bucket_mappings { - mappings.insert(mapping); - } - } - - Ok(SheetData { mappings }) -} - -/// Read mapping information for a specific node from complete sheet data -pub fn read_mapping<'a>( - full_sheet_data: &'a [u8], - node: &[&str], -) -> Result, LocalMappingForward)>, ReadSheetDataError> { - if full_sheet_data.len() < HEADER_SIZE { - return Ok(None); - } - - // Read file header - let version = full_sheet_data[0]; - if version != CURRENT_SHEET_VERSION { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Unsupported sheet version: {}", version), - ) - .into()); - } - - let bucket_count = u16::from_le_bytes([full_sheet_data[1], full_sheet_data[2]]) as usize; - let index_count = u32::from_le_bytes([ - full_sheet_data[3], - full_sheet_data[4], - full_sheet_data[5], - full_sheet_data[6], - ]) as usize; - - let mapping_dir_offset = u32::from_le_bytes([ - full_sheet_data[7], - full_sheet_data[8], - full_sheet_data[9], - full_sheet_data[10], - ]) as usize; - - let index_table_offset = u32::from_le_bytes([ - full_sheet_data[11], - full_sheet_data[12], - full_sheet_data[13], - full_sheet_data[14], - ]) as usize; - - // Validate offsets - if mapping_dir_offset > full_sheet_data.len() || index_table_offset > full_sheet_data.len() { - return Ok(None); - } - - // Read index table - let index_sources = read_index_table(full_sheet_data, index_table_offset, index_count)?; - - // Calculate hash prefix for target node - let node_path: Vec = node.iter().map(|s| s.to_string()).collect(); - let target_hash = crate::sheet::writer::calculate_path_hash(&node_path); - let target_bucket_key = target_hash >> 24; // Take high 8 bits as bucket key - - // Find corresponding bucket in mapping directory using binary search - let mapping_dir_end = mapping_dir_offset + bucket_count * MAPPING_DIR_ENTRY_SIZE; - if mapping_dir_end > full_sheet_data.len() { - return Ok(None); - } - - // Binary search for the bucket with matching hash prefix - let mut left = 0; - let mut right = bucket_count; - - while left < right { - let mid = left + (right - left) / 2; - let dir_entry_offset = mapping_dir_offset + mid * MAPPING_DIR_ENTRY_SIZE; - - let bucket_hash_prefix = u32::from_le_bytes([ - full_sheet_data[dir_entry_offset], - full_sheet_data[dir_entry_offset + 1], - full_sheet_data[dir_entry_offset + 2], - full_sheet_data[dir_entry_offset + 3], - ]); - - if bucket_hash_prefix < target_bucket_key { - left = mid + 1; - } else if bucket_hash_prefix > target_bucket_key { - right = mid; - } else { - // Found matching bucket - let bucket_offset = u32::from_le_bytes([ - full_sheet_data[dir_entry_offset + 4], - full_sheet_data[dir_entry_offset + 5], - full_sheet_data[dir_entry_offset + 6], - full_sheet_data[dir_entry_offset + 7], - ]) as usize; - - let bucket_length = u32::from_le_bytes([ - full_sheet_data[dir_entry_offset + 8], - full_sheet_data[dir_entry_offset + 9], - full_sheet_data[dir_entry_offset + 10], - full_sheet_data[dir_entry_offset + 11], - ]) as usize; - - // Read bucket data and find target node - if bucket_offset + bucket_length > full_sheet_data.len() { - break; - } - - let bucket_data = &full_sheet_data[bucket_offset..bucket_offset + bucket_length]; - return find_mapping_in_bucket(bucket_data, node, &index_sources); - } - } - - Ok(None) -} - -/// Read index table -fn read_index_table( - data: &[u8], - offset: usize, - count: usize, -) -> Result, ReadSheetDataError> { - let table_size = count * INDEX_ENTRY_SIZE; - if offset + table_size > data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Index table exceeds buffer", - ) - .into()); - } - - let mut sources = Vec::with_capacity(count); - let mut pos = offset; - - for _ in 0..count { - if pos + INDEX_ENTRY_SIZE > data.len() { - break; - } - - let id = u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]); - let ver = u16::from_le_bytes([data[pos + 4], data[pos + 5]]); - - sources.push(IndexSource::new(id, ver)); - pos += INDEX_ENTRY_SIZE; - } - - Ok(sources) -} - -/// Read all mappings in bucket data -fn read_bucket_data( - bucket_data: &[u8], - index_sources: &[IndexSource], -) -> Result, ReadSheetDataError> { - let mut mappings = Vec::new(); - let mut pos = 0; - - while pos < bucket_data.len() { - if pos + MAPPING_BUCKET_MIN_SIZE > bucket_data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Incomplete mapping bucket entry", - ) - .into()); - } - - // Read mapping bucket entry header - let key_len = bucket_data[pos] as usize; - let forward_type = bucket_data[pos + 1]; - let forward_info_len = bucket_data[pos + 2] as usize; - - pos += 3; // KEY_LEN + FORWARD_TYPE + FORWARD_INFO_LEN - - // Check bounds - if pos + key_len > bucket_data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Key data exceeds buffer", - ) - .into()); - } - - // Read key data (path) - let key_bytes = &bucket_data[pos..pos + key_len]; - let path = deserialize_path(key_bytes)?; - pos += key_len; - - // Read forward info data - if pos + forward_info_len > bucket_data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Forward info data exceeds buffer", - ) - .into()); - } - - let forward_bytes = &bucket_data[pos..pos + forward_info_len]; - pos += forward_info_len; - - // Read index offset - if pos + 4 > bucket_data.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Index offset exceeds buffer", - ) - .into()); - } - - let index_offset = u32::from_le_bytes([ - bucket_data[pos], - bucket_data[pos + 1], - bucket_data[pos + 2], - bucket_data[pos + 3], - ]) as usize; - pos += 4; - - // Get index source - if index_offset >= index_sources.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Invalid index offset: {}", index_offset), - ) - .into()); - } - - let source = index_sources[index_offset]; - - // Build forward info - let forward = LocalMappingForward::pack(forward_type, forward_bytes).ok_or_else(|| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Failed to unpack forward info", - ) - })?; - - // Create LocalMapping - let mapping = LocalMapping::new(path, source, forward).ok_or_else(|| { - std::io::Error::new(std::io::ErrorKind::InvalidData, "Failed to create mapping") - })?; - - mappings.push(mapping); - } - - Ok(mappings) -} - -/// Find mapping for specific node in bucket data -fn find_mapping_in_bucket<'a>( - bucket_data: &'a [u8], - node: &[&str], - index_sources: &[IndexSource], -) -> Result, LocalMappingForward)>, ReadSheetDataError> { - let mut pos = 0; - - while pos < bucket_data.len() { - if pos + MAPPING_BUCKET_MIN_SIZE > bucket_data.len() { - break; - } - - // Read mapping bucket entry header - let key_len = bucket_data[pos] as usize; - let forward_type = bucket_data[pos + 1]; - let forward_info_len = bucket_data[pos + 2] as usize; - - let header_end = pos + 3; // KEY_LEN + FORWARD_TYPE + FORWARD_INFO_LEN - - // Check bounds - if header_end + key_len > bucket_data.len() { - break; - } - - // Read key data (path) - let key_bytes = &bucket_data[header_end..header_end + key_len]; - let current_path = deserialize_path(key_bytes)?; - - // Check if matches target node - if paths_match(¤t_path, node) { - // Read forward info data - let forward_start = header_end + key_len; - if forward_start + forward_info_len > bucket_data.len() { - break; - } - - let forward_bytes = &bucket_data[forward_start..forward_start + forward_info_len]; - - // Read index offset - let index_offset_pos = forward_start + forward_info_len; - if index_offset_pos + 4 > bucket_data.len() { - break; - } - - let index_offset = u32::from_le_bytes([ - bucket_data[index_offset_pos], - bucket_data[index_offset_pos + 1], - bucket_data[index_offset_pos + 2], - bucket_data[index_offset_pos + 3], - ]) as usize; - - // Get index source - if index_offset >= index_sources.len() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Invalid index offset: {}", index_offset), - ) - .into()); - } - - let source = index_sources[index_offset]; - - // Build forward info - let forward = - LocalMappingForward::pack(forward_type, forward_bytes).ok_or_else(|| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - "Failed to unpack forward info", - ) - })?; - - // Create Mapping - let path_str = std::str::from_utf8(key_bytes).map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Invalid UTF-8 in path: {}", e), - ) - })?; - let mapping = Mapping::new("", path_str, source); - - return Ok(Some((mapping, forward))); - } - - // Move to next mapping entry - // Entry size = 3 (header) + key_len + forward_info_len + 4 (index offset) - pos = header_end + key_len + forward_info_len + 4; - } - - Ok(None) -} - -/// Deserialize path -fn deserialize_path(bytes: &[u8]) -> Result, ReadSheetDataError> { - let path_str = std::str::from_utf8(bytes).map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Invalid UTF-8 in path: {}", e), - ) - })?; - - if path_str.is_empty() { - return Ok(Vec::new()); - } - - let segments: Vec = path_str.split('/').map(|s| s.to_string()).collect(); - Ok(segments) -} - -/// Check if paths match -fn paths_match(path: &[String], node: &[&str]) -> bool { - if path.len() != node.len() { - return false; - } - - for (i, segment) in path.iter().enumerate() { - if segment != node[i] { - return false; - } - } - - true -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_deserialize_path() { - let bytes = b"dir/subdir/file.txt"; - let path = deserialize_path(bytes).unwrap(); - assert_eq!(path, vec!["dir", "subdir", "file.txt"]); - } - - #[test] - fn test_paths_match() { - let path = vec!["dir".to_string(), "file.txt".to_string()]; - let node = &["dir", "file.txt"]; - assert!(paths_match(&path, node)); - - let node2 = &["dir", "other.txt"]; - assert!(!paths_match(&path, node2)); - } - - #[test] - fn test_read_index_table() { - let mut data = Vec::new(); - data.extend_from_slice(&123u32.to_le_bytes()); - data.extend_from_slice(&456u16.to_le_bytes()); - data.extend_from_slice(&789u32.to_le_bytes()); - data.extend_from_slice(&1011u16.to_le_bytes()); - - let sources = read_index_table(&data, 0, 2).unwrap(); - assert_eq!(sources.len(), 2); - assert_eq!(sources[0].id(), 123); - assert_eq!(sources[0].version(), 456); - assert_eq!(sources[1].id(), 789); - assert_eq!(sources[1].version(), 1011); - } - - #[test] - fn test_read_bucket_data() { - // Create simple bucket data - let mut bucket_data = Vec::new(); - - // First mapping - let path1 = b"dir/file.txt"; - bucket_data.push(path1.len() as u8); // KEY_LEN - bucket_data.push(0); // FORWARD_TYPE (Latest) - bucket_data.push(0); // FORWARD_INFO_LEN - bucket_data.extend_from_slice(path1); // KEY_BYTES - bucket_data.extend_from_slice(&0u32.to_le_bytes()); // INDEX_OFFSET - - // Second mapping - let path2 = b"other/test.txt"; - bucket_data.push(path2.len() as u8); // KEY_LEN - bucket_data.push(0); // FORWARD_TYPE (Latest) - bucket_data.push(0); // FORWARD_INFO_LEN - bucket_data.extend_from_slice(path2); // KEY_BYTES - bucket_data.extend_from_slice(&1u32.to_le_bytes()); // INDEX_OFFSET - - let index_sources = vec![IndexSource::new(1, 1), IndexSource::new(2, 1)]; - - let mappings = read_bucket_data(&bucket_data, &index_sources).unwrap(); - assert_eq!(mappings.len(), 2); - - // Verify first mapping - assert_eq!( - mappings[0].value(), - &["dir".to_string(), "file.txt".to_string()] - ); - assert_eq!(mappings[0].index_source().id(), 1); - - // Verify second mapping - assert_eq!( - mappings[1].value(), - &["other".to_string(), "test.txt".to_string()] - ); - assert_eq!(mappings[1].index_source().id(), 2); - } - - #[test] - fn test_binary_search_bucket_lookup() { - use crate::sheet::writer::convert_sheet_data_to_bytes; - - // Create test sheet data with multiple buckets - let mut sheet_data = crate::sheet::SheetData::empty(); - - // Add mappings that will go to different buckets - let mapping1 = crate::mapping::LocalMapping::new( - vec!["aaa".to_string(), "file1.txt".to_string()], - crate::index_source::IndexSource::new(1, 1), - crate::mapping::LocalMappingForward::Latest, - ) - .unwrap(); - - let mapping2 = crate::mapping::LocalMapping::new( - vec!["mmm".to_string(), "file2.txt".to_string()], - crate::index_source::IndexSource::new(2, 2), - crate::mapping::LocalMappingForward::Latest, - ) - .unwrap(); - - let mapping3 = crate::mapping::LocalMapping::new( - vec!["zzz".to_string(), "file3.txt".to_string()], - crate::index_source::IndexSource::new(3, 3), - crate::mapping::LocalMappingForward::Latest, - ) - .unwrap(); - - sheet_data.mappings.insert(mapping1.clone()); - sheet_data.mappings.insert(mapping2.clone()); - sheet_data.mappings.insert(mapping3.clone()); - - // Convert to bytes - let bytes = convert_sheet_data_to_bytes(sheet_data); - - // Test finding each mapping using binary search - let node1 = &["aaa", "file1.txt"]; - let result1 = read_mapping(&bytes, node1).unwrap(); - assert!(result1.is_some(), "Should find mapping for aaa/file1.txt"); - - let node2 = &["mmm", "file2.txt"]; - let result2 = read_mapping(&bytes, node2).unwrap(); - assert!(result2.is_some(), "Should find mapping for mmm/file2.txt"); - - let node3 = &["zzz", "file3.txt"]; - let result3 = read_mapping(&bytes, node3).unwrap(); - assert!(result3.is_some(), "Should find mapping for zzz/file3.txt"); - - // Test non-existent mapping - let node4 = &["xxx", "notfound.txt"]; - let result4 = read_mapping(&bytes, node4).unwrap(); - assert!(result4.is_none(), "Should not find non-existent mapping"); - - // Test that binary search handles empty data - let empty_bytes = convert_sheet_data_to_bytes(crate::sheet::SheetData::empty()); - let result5 = read_mapping(&empty_bytes, node1).unwrap(); - assert!(result5.is_none(), "Should not find anything in empty sheet"); - } -} diff --git a/systems/sheet/src/sheet/test.rs b/systems/sheet/src/sheet/test.rs deleted file mode 100644 index ae20be5..0000000 --- a/systems/sheet/src/sheet/test.rs +++ /dev/null @@ -1,460 +0,0 @@ -use hex_display::hex_display_slice; - -use crate::{ - index_source::IndexSource, - mapping::{LocalMapping, LocalMappingForward}, - sheet::{ - SheetData, constants::HEADER_SIZE, reader::read_sheet_data, - writer::convert_sheet_data_to_bytes, - }, -}; -use std::collections::HashSet; -use std::fs; - -/// Test writing and re-reading sheet data -#[test] -fn test_sheet_data_roundtrip() { - // Create test data - let _sheet_data = SheetData::empty(); - - // Create some test mappings - let mapping1 = LocalMapping::new( - vec!["src".to_string(), "main.rs".to_string()], - IndexSource::new(1001, 1), - LocalMappingForward::Latest, - ) - .unwrap(); - - let mapping2 = LocalMapping::new( - vec!["docs".to_string(), "README.md".to_string()], - IndexSource::new(1002, 2), - LocalMappingForward::Ref { - sheet_name: "reference".to_string(), - }, - ) - .unwrap(); - - let mapping3 = LocalMapping::new( - vec![ - "assets".to_string(), - "images".to_string(), - "logo.png".to_string(), - ], - IndexSource::new(1003, 3), - LocalMappingForward::Version { version: 12345 }, - ) - .unwrap(); - - // Add mappings to SheetData - // Note: Since the mappings field of SheetData is private, we need to create SheetData in another way - // Here we directly create a new HashSet - let mut mappings = HashSet::new(); - mappings.insert(mapping1.clone()); - mappings.insert(mapping2.clone()); - mappings.insert(mapping3.clone()); - - let sheet_data = SheetData { mappings }; - - // Convert SheetData to bytes - let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); - - // Verify byte data is not empty - assert!(!bytes.is_empty(), "Converted bytes should not be empty"); - - // Verify file header - assert_eq!(bytes[0], 1, "Sheet version should be 1"); - - // Re-read SheetData from bytes - let restored_sheet_data = - read_sheet_data(&bytes).expect("Failed to read sheet data from bytes"); - - // Verify mapping count - assert_eq!( - restored_sheet_data.mappings.len(), - sheet_data.mappings.len(), - "Restored sheet should have same number of mappings" - ); - - // Verify each mapping exists - for mapping in &sheet_data.mappings { - assert!( - restored_sheet_data.mappings.contains(mapping), - "Restored sheet should contain mapping: {:?}", - mapping - ); - } - - // Verify specific mapping content - for mapping in &restored_sheet_data.mappings { - // Find original mapping - let original_mapping = sheet_data.mappings.get(mapping.value()).unwrap(); - - // Verify path - assert_eq!( - mapping.value(), - original_mapping.value(), - "Path should match" - ); - - // Verify index source - assert_eq!( - mapping.index_source().id(), - original_mapping.index_source().id(), - "Index source ID should match" - ); - - assert_eq!( - mapping.index_source().version(), - original_mapping.index_source().version(), - "Index source version should match" - ); - - // Verify forward information - let (original_type, _, _) = original_mapping.forward().unpack(); - let (restored_type, _, _) = mapping.forward().unpack(); - assert_eq!(restored_type, original_type, "Forward type should match"); - } -} - -/// Test reading and writing empty sheet data -#[test] -fn test_empty_sheet_roundtrip() { - // Create empty SheetData - let sheet_data = SheetData::empty(); - - // Convert to bytes - let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); - - // Verify file header - assert_eq!(bytes.len(), 15, "Empty sheet should have header size only"); - assert_eq!(bytes[0], 1, "Sheet version should be 1"); - - // Verify offsets - For empty sheet, mapping data offset and index table offset should be the same - let mapping_data_offset = - u32::from_le_bytes([bytes[7], bytes[8], bytes[9], bytes[10]]) as usize; - let index_table_offset = - u32::from_le_bytes([bytes[11], bytes[12], bytes[13], bytes[14]]) as usize; - assert_eq!( - mapping_data_offset, index_table_offset, - "For empty sheet, both offsets should be the same" - ); - assert_eq!( - mapping_data_offset, HEADER_SIZE, - "Offsets should point to end of header" - ); - - // Mapping count should be 0 - let mapping_count = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]); - assert_eq!(mapping_count, 0, "Mapping count should be 0"); - - // Index source count should be 0 - let index_count = u16::from_le_bytes([bytes[5], bytes[6]]); - assert_eq!(index_count, 0, "Index count should be 0"); - - // Re-read - let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read empty sheet data"); - - // Verify it's empty - assert!( - restored_sheet_data.mappings.is_empty(), - "Restored empty sheet should have no mappings" - ); -} - -/// Test reading and writing a single mapping -#[test] -fn test_single_mapping_roundtrip() { - // Create a single mapping - let mapping = LocalMapping::new( - vec!["test.txt".to_string()], - IndexSource::new(999, 42), - LocalMappingForward::Latest, - ) - .unwrap(); - - let mut mappings = HashSet::new(); - mappings.insert(mapping.clone()); - - let sheet_data = SheetData { mappings }; - - // Convert to bytes - let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); - - // Re-read - let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); - - // Verify - assert_eq!(restored_sheet_data.mappings.len(), 1); - let restored_mapping = restored_sheet_data.mappings.iter().next().unwrap(); - - assert_eq!(restored_mapping.value(), &["test.txt".to_string()]); - assert_eq!(restored_mapping.index_source().id(), 999); - assert_eq!(restored_mapping.index_source().version(), 42); - - let (forward_type, _, _) = restored_mapping.forward().unpack(); - assert_eq!(forward_type, 0); // Latest type id is 0 -} - -/// Test file system read/write -#[test] -fn test_file_system_roundtrip() { - // Create test data - let mapping1 = LocalMapping::new( - vec!["file0.txt".to_string()], - IndexSource::new(1, 1), - LocalMappingForward::Latest, - ) - .unwrap(); - - let mapping2 = LocalMapping::new( - vec!["dir1".to_string(), "file1.txt".to_string()], - IndexSource::new(2, 2), - LocalMappingForward::Ref { - sheet_name: "other".to_string(), - }, - ) - .unwrap(); - - let mapping3 = LocalMapping::new( - vec!["dir2".to_string(), "file2.txt".to_string()], - IndexSource::new(3, 3), - LocalMappingForward::Version { version: 35 }, - ) - .unwrap(); - - let mut mappings = HashSet::new(); - mappings.insert(mapping1.clone()); - mappings.insert(mapping2.clone()); - mappings.insert(mapping3.clone()); - - let sheet_data = SheetData { mappings }; - - // Convert to bytes - let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); - - // Write to file - let test_file_path = ".temp/test.sheet"; - let test_file_path_hex = ".temp/test_hex.txt"; - - // Ensure directory exists - if let Some(parent) = std::path::Path::new(test_file_path).parent() { - fs::create_dir_all(parent).expect("Failed to create test directory"); - } - - fs::write(test_file_path, &bytes).expect("Failed to write test file"); - fs::write(test_file_path_hex, hex_display_slice(&bytes)).expect("Failed to write test file"); - - // Read file - let file_bytes = fs::read(test_file_path).expect("Failed to read test file"); - - // Verify file content matches original bytes - assert_eq!( - file_bytes, bytes, - "File content should match original bytes" - ); - - // Re-read SheetData from file bytes - let restored_from_file = read_sheet_data(&file_bytes).expect("Failed to read from file bytes"); - - // Use SheetData's Eq trait for direct comparison - assert_eq!( - restored_from_file, sheet_data, - "Restored sheet data should be equal to original" - ); - - // Verify mappings in SheetData read from file - // Check if each original mapping can be found in restored data - for original_mapping in &sheet_data.mappings { - let found = restored_from_file - .mappings - .iter() - .any(|m| m == original_mapping); - assert!( - found, - "Original mapping {:?} should be present in restored sheet data", - original_mapping - ); - } - - // Also check if each mapping in restored data can be found in original data - for restored_mapping in &restored_from_file.mappings { - let found = sheet_data.mappings.iter().any(|m| m == restored_mapping); - assert!( - found, - "Restored mapping {:?} should be present in original sheet data", - restored_mapping - ); - } - - // Test file remains in .temp/test.sheet for subsequent inspection - // Note: Need to manually clean up .temp directory before next test run -} - -/// Test reading and writing different forward types -#[test] -fn test_different_forward_types() { - // Test Latest type - let mapping_latest = LocalMapping::new( - vec!["latest.txt".to_string()], - IndexSource::new(1, 1), - LocalMappingForward::Latest, - ) - .unwrap(); - - // Test Ref type - let mapping_ref = LocalMapping::new( - vec!["ref.txt".to_string()], - IndexSource::new(2, 2), - LocalMappingForward::Ref { - sheet_name: "reference_sheet".to_string(), - }, - ) - .unwrap(); - - // Test Version type - let mapping_version = LocalMapping::new( - vec!["version.txt".to_string()], - IndexSource::new(3, 3), - LocalMappingForward::Version { version: 54321 }, - ) - .unwrap(); - - let mut mappings = HashSet::new(); - mappings.insert(mapping_latest.clone()); - mappings.insert(mapping_ref.clone()); - mappings.insert(mapping_version.clone()); - - let sheet_data = SheetData { mappings }; - - // Convert to bytes and re-read - let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); - let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); - - // Verify all mappings exist - assert_eq!(restored_sheet_data.mappings.len(), 3); - - // Verify Latest type - let restored_latest = restored_sheet_data - .mappings - .get(&vec!["latest.txt".to_string()]) - .unwrap(); - let (latest_type, latest_len, _) = restored_latest.forward().unpack(); - assert_eq!(latest_type, 0); - assert_eq!(latest_len, 0); - - // Verify Ref type - let restored_ref = restored_sheet_data - .mappings - .get(&vec!["ref.txt".to_string()]) - .unwrap(); - let (ref_type, ref_len, ref_bytes) = restored_ref.forward().unpack(); - assert_eq!(ref_type, 1); - assert_eq!(ref_len as usize, "reference_sheet".len()); - assert_eq!(String::from_utf8(ref_bytes).unwrap(), "reference_sheet"); - - // Verify Version type - let restored_version = restored_sheet_data - .mappings - .get(&vec!["version.txt".to_string()]) - .unwrap(); - let (version_type, version_len, version_bytes) = restored_version.forward().unpack(); - assert_eq!(version_type, 2); - assert_eq!(version_len, 2); // u16 is 2 bytes - assert_eq!(u16::from_be_bytes(version_bytes.try_into().unwrap()), 54321); -} - -/// Test duplicate index source optimization -#[test] -fn test_duplicate_index_source_optimization() { - // Create multiple mappings sharing the same index source - let shared_source = IndexSource::new(777, 88); - - let mapping1 = LocalMapping::new( - vec!["file1.txt".to_string()], - shared_source, - LocalMappingForward::Latest, - ) - .unwrap(); - - let mapping2 = LocalMapping::new( - vec!["file2.txt".to_string()], - shared_source, - LocalMappingForward::Latest, - ) - .unwrap(); - - let mapping3 = LocalMapping::new( - vec!["file3.txt".to_string()], - shared_source, - LocalMappingForward::Latest, - ) - .unwrap(); - - let mut mappings = HashSet::new(); - mappings.insert(mapping1); - mappings.insert(mapping2); - mappings.insert(mapping3); - - let sheet_data = SheetData { mappings }; - - // Convert to bytes - let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); - - // Verify index table should have only one entry - let index_count = u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]); - assert_eq!(index_count, 1, "Should have only one unique index source"); - - // Re-read and verify - let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); - assert_eq!(restored_sheet_data.mappings.len(), 3); - - // Verify all mappings use the same index source - for mapping in &restored_sheet_data.mappings { - assert_eq!(mapping.index_source().id(), 777); - assert_eq!(mapping.index_source().version(), 88); - } -} - -/// Test path serialization and deserialization -#[test] -fn test_path_serialization_deserialization() { - // Test various paths - let test_cases = vec![ - vec!["single".to_string()], - vec!["dir".to_string(), "file.txt".to_string()], - vec![ - "a".to_string(), - "b".to_string(), - "c".to_string(), - "d.txt".to_string(), - ], - vec!["with spaces".to_string(), "file name.txt".to_string()], - vec!["unicode".to_string(), "文件.txt".to_string()], - ]; - - for path in test_cases { - let mapping = LocalMapping::new( - path.clone(), - IndexSource::new(1, 1), - LocalMappingForward::Latest, - ) - .unwrap(); - - let mut mappings = HashSet::new(); - mappings.insert(mapping); - - let sheet_data = SheetData { mappings }; - - // Convert to bytes and re-read - let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); - let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); - - // Verify path - let restored_mapping = restored_sheet_data.mappings.iter().next().unwrap(); - assert_eq!( - restored_mapping.value(), - &path, - "Path should be preserved after roundtrip" - ); - } -} diff --git a/systems/sheet/src/sheet/v1/constants.rs b/systems/sheet/src/sheet/v1/constants.rs new file mode 100644 index 0000000..69714bb --- /dev/null +++ b/systems/sheet/src/sheet/v1/constants.rs @@ -0,0 +1,55 @@ +// Header (15: 1 + 2 + 4 + 4 + 4) +// +// [SHEET_VERSION: u8] +// [MAPPING_BUCKET_COUNT: u16] +// [INDEX_COUNT: u32] +// [OFFSET_MAPPING_DIR: u32] +// [OFFSET_INDEX_TABLE: u32] + +pub const CURRENT_SHEET_VERSION: u8 = 1; +pub const HEADER_SIZE: usize = 0 + + 1 // SHEET_VERSION + + 2 // MAPPING_BUCKET_COUNT + + 4 // INDEX_COUNT + + 4 // OFFSET_MAPPING_DIR + + 4 // OFFSET_INDEX_TABLE +; + +// Mapping Directory (12: 4 + 4 + 4) +// +// [BUCKET_HASH_PREFIX: u32] +// [BUCKET_OFFSET: u32] +// [BUCKET_LENGTH: u32] + +pub const MAPPING_DIR_ENTRY_SIZE: usize = 0 + + 4 // BUCKET_HASH_PREFIX + + 4 // BUCKET_OFFSET + + 4 // BUCKET_LENGTH +; + +// Mapping Buckets (6 + 1b + N) +// +// [KEY_LEN: u8] +// [FORWARD_TYPE: byte] +// [FORWARD_INFO_LEN: u8] +// [KEY_BYTES: ?] +// [FORWARD_INFO_BYTES: ?] +// [INDEX_OFFSET: u32] + +pub const MAPPING_BUCKET_MIN_SIZE: usize = 0 + + 1 // KEY_LEN + + 1 // FORWARD_TYPE + + 1 // FORWARD_INFO_LEN + + 2 // KEY_BYTES (MIN:1) + FORWARD_INFO_BYTES (MIN:1) + + 2 // INDEX_OFFSET +; + +// Index Table (6: 4 + 2) +// +// [INDEX_ID: u32] +// [INDEX_VERSION: u16] + +pub const INDEX_ENTRY_SIZE: usize = 0 + + 4 // INDEX_ID + + 2 // INDEX_VERSION +; diff --git a/systems/sheet/src/sheet/v1/reader.rs b/systems/sheet/src/sheet/v1/reader.rs new file mode 100644 index 0000000..d86b097 --- /dev/null +++ b/systems/sheet/src/sheet/v1/reader.rs @@ -0,0 +1,627 @@ +use crate::{ + index_source::IndexSource, + mapping::{LocalMapping, LocalMappingForward, Mapping}, + sheet::{ + SheetData, + constants::{ + CURRENT_SHEET_VERSION, HEADER_SIZE, INDEX_ENTRY_SIZE, MAPPING_BUCKET_MIN_SIZE, + MAPPING_DIR_ENTRY_SIZE, + }, + error::ReadSheetDataError, + }, +}; +use std::collections::HashSet; + +/// Reconstruct complete SheetData from full sheet data +pub fn read_sheet_data(full_sheet_data: &[u8]) -> Result { + if full_sheet_data.len() < HEADER_SIZE { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Sheet data too small for header", + ) + .into()); + } + + // Read file header + let version = full_sheet_data[0]; + if version != CURRENT_SHEET_VERSION { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsupported sheet version: {}", version), + ) + .into()); + } + + let bucket_count = u16::from_le_bytes([full_sheet_data[1], full_sheet_data[2]]) as usize; + let index_count = u32::from_le_bytes([ + full_sheet_data[3], + full_sheet_data[4], + full_sheet_data[5], + full_sheet_data[6], + ]) as usize; + + let mapping_dir_offset = u32::from_le_bytes([ + full_sheet_data[7], + full_sheet_data[8], + full_sheet_data[9], + full_sheet_data[10], + ]) as usize; + + let index_table_offset = u32::from_le_bytes([ + full_sheet_data[11], + full_sheet_data[12], + full_sheet_data[13], + full_sheet_data[14], + ]) as usize; + + // Validate offsets + if mapping_dir_offset > full_sheet_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Mapping directory offset out of bounds", + ) + .into()); + } + + if index_table_offset > full_sheet_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Index table offset out of bounds", + ) + .into()); + } + + // Read index table + let index_sources = read_index_table(full_sheet_data, index_table_offset, index_count)?; + + // Read mapping directory and build all mappings + let mut mappings = HashSet::new(); + let mapping_dir_end = mapping_dir_offset + bucket_count * MAPPING_DIR_ENTRY_SIZE; + + if mapping_dir_end > full_sheet_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Mapping directory exceeds buffer", + ) + .into()); + } + + // Iterate through all buckets + for i in 0..bucket_count { + let dir_entry_offset = mapping_dir_offset + i * MAPPING_DIR_ENTRY_SIZE; + + // Skip BUCKET_HASH_PREFIX, directly read BUCKET_OFFSET and BUCKET_LENGTH + let bucket_offset = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset + 4], + full_sheet_data[dir_entry_offset + 5], + full_sheet_data[dir_entry_offset + 6], + full_sheet_data[dir_entry_offset + 7], + ]) as usize; + + let bucket_length = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset + 8], + full_sheet_data[dir_entry_offset + 9], + full_sheet_data[dir_entry_offset + 10], + full_sheet_data[dir_entry_offset + 11], + ]) as usize; + + // Read bucket data + if bucket_offset + bucket_length > full_sheet_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + format!("Bucket data exceeds buffer (bucket {})", i), + ) + .into()); + } + + let bucket_data = &full_sheet_data[bucket_offset..bucket_offset + bucket_length]; + let bucket_mappings = read_bucket_data(bucket_data, &index_sources)?; + + for mapping in bucket_mappings { + mappings.insert(mapping); + } + } + + Ok(SheetData { mappings }) +} + +/// Read mapping information for a specific node from complete sheet data +pub fn read_mapping<'a>( + full_sheet_data: &'a [u8], + node: &[&str], +) -> Result, LocalMappingForward)>, ReadSheetDataError> { + if full_sheet_data.len() < HEADER_SIZE { + return Ok(None); + } + + // Read file header + let version = full_sheet_data[0]; + if version != CURRENT_SHEET_VERSION { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsupported sheet version: {}", version), + ) + .into()); + } + + let bucket_count = u16::from_le_bytes([full_sheet_data[1], full_sheet_data[2]]) as usize; + let index_count = u32::from_le_bytes([ + full_sheet_data[3], + full_sheet_data[4], + full_sheet_data[5], + full_sheet_data[6], + ]) as usize; + + let mapping_dir_offset = u32::from_le_bytes([ + full_sheet_data[7], + full_sheet_data[8], + full_sheet_data[9], + full_sheet_data[10], + ]) as usize; + + let index_table_offset = u32::from_le_bytes([ + full_sheet_data[11], + full_sheet_data[12], + full_sheet_data[13], + full_sheet_data[14], + ]) as usize; + + // Validate offsets + if mapping_dir_offset > full_sheet_data.len() || index_table_offset > full_sheet_data.len() { + return Ok(None); + } + + // Read index table + let index_sources = read_index_table(full_sheet_data, index_table_offset, index_count)?; + + // Calculate hash prefix for target node + let node_path: Vec = node.iter().map(|s| s.to_string()).collect(); + let target_hash = crate::sheet::writer::calculate_path_hash(&node_path); + let target_bucket_key = target_hash >> 24; // Take high 8 bits as bucket key + + // Find corresponding bucket in mapping directory using binary search + let mapping_dir_end = mapping_dir_offset + bucket_count * MAPPING_DIR_ENTRY_SIZE; + if mapping_dir_end > full_sheet_data.len() { + return Ok(None); + } + + // Binary search for the bucket with matching hash prefix + let mut left = 0; + let mut right = bucket_count; + + while left < right { + let mid = left + (right - left) / 2; + let dir_entry_offset = mapping_dir_offset + mid * MAPPING_DIR_ENTRY_SIZE; + + let bucket_hash_prefix = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset], + full_sheet_data[dir_entry_offset + 1], + full_sheet_data[dir_entry_offset + 2], + full_sheet_data[dir_entry_offset + 3], + ]); + + if bucket_hash_prefix < target_bucket_key { + left = mid + 1; + } else if bucket_hash_prefix > target_bucket_key { + right = mid; + } else { + // Found matching bucket + let bucket_offset = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset + 4], + full_sheet_data[dir_entry_offset + 5], + full_sheet_data[dir_entry_offset + 6], + full_sheet_data[dir_entry_offset + 7], + ]) as usize; + + let bucket_length = u32::from_le_bytes([ + full_sheet_data[dir_entry_offset + 8], + full_sheet_data[dir_entry_offset + 9], + full_sheet_data[dir_entry_offset + 10], + full_sheet_data[dir_entry_offset + 11], + ]) as usize; + + // Read bucket data and find target node + if bucket_offset + bucket_length > full_sheet_data.len() { + break; + } + + let bucket_data = &full_sheet_data[bucket_offset..bucket_offset + bucket_length]; + return find_mapping_in_bucket(bucket_data, node, &index_sources); + } + } + + Ok(None) +} + +/// Read index table +fn read_index_table( + data: &[u8], + offset: usize, + count: usize, +) -> Result, ReadSheetDataError> { + let table_size = count * INDEX_ENTRY_SIZE; + if offset + table_size > data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Index table exceeds buffer", + ) + .into()); + } + + let mut sources = Vec::with_capacity(count); + let mut pos = offset; + + for _ in 0..count { + if pos + INDEX_ENTRY_SIZE > data.len() { + break; + } + + let id = u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]); + let ver = u16::from_le_bytes([data[pos + 4], data[pos + 5]]); + + sources.push(IndexSource::new(id, ver)); + pos += INDEX_ENTRY_SIZE; + } + + Ok(sources) +} + +/// Read all mappings in bucket data +fn read_bucket_data( + bucket_data: &[u8], + index_sources: &[IndexSource], +) -> Result, ReadSheetDataError> { + let mut mappings = Vec::new(); + let mut pos = 0; + + while pos < bucket_data.len() { + if pos + MAPPING_BUCKET_MIN_SIZE > bucket_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Incomplete mapping bucket entry", + ) + .into()); + } + + // Read mapping bucket entry header + let key_len = bucket_data[pos] as usize; + let forward_type = bucket_data[pos + 1]; + let forward_info_len = bucket_data[pos + 2] as usize; + + pos += 3; // KEY_LEN + FORWARD_TYPE + FORWARD_INFO_LEN + + // Check bounds + if pos + key_len > bucket_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Key data exceeds buffer", + ) + .into()); + } + + // Read key data (path) + let key_bytes = &bucket_data[pos..pos + key_len]; + let path = deserialize_path(key_bytes)?; + pos += key_len; + + // Read forward info data + if pos + forward_info_len > bucket_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Forward info data exceeds buffer", + ) + .into()); + } + + let forward_bytes = &bucket_data[pos..pos + forward_info_len]; + pos += forward_info_len; + + // Read index offset + if pos + 4 > bucket_data.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Index offset exceeds buffer", + ) + .into()); + } + + let index_offset = u32::from_le_bytes([ + bucket_data[pos], + bucket_data[pos + 1], + bucket_data[pos + 2], + bucket_data[pos + 3], + ]) as usize; + pos += 4; + + // Get index source + if index_offset >= index_sources.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid index offset: {}", index_offset), + ) + .into()); + } + + let source = index_sources[index_offset]; + + // Build forward info + let forward = LocalMappingForward::pack(forward_type, forward_bytes).ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Failed to unpack forward info", + ) + })?; + + // Create LocalMapping + let mapping = LocalMapping::new(path, source, forward).ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::InvalidData, "Failed to create mapping") + })?; + + mappings.push(mapping); + } + + Ok(mappings) +} + +/// Find mapping for specific node in bucket data +fn find_mapping_in_bucket<'a>( + bucket_data: &'a [u8], + node: &[&str], + index_sources: &[IndexSource], +) -> Result, LocalMappingForward)>, ReadSheetDataError> { + let mut pos = 0; + + while pos < bucket_data.len() { + if pos + MAPPING_BUCKET_MIN_SIZE > bucket_data.len() { + break; + } + + // Read mapping bucket entry header + let key_len = bucket_data[pos] as usize; + let forward_type = bucket_data[pos + 1]; + let forward_info_len = bucket_data[pos + 2] as usize; + + let header_end = pos + 3; // KEY_LEN + FORWARD_TYPE + FORWARD_INFO_LEN + + // Check bounds + if header_end + key_len > bucket_data.len() { + break; + } + + // Read key data (path) + let key_bytes = &bucket_data[header_end..header_end + key_len]; + let current_path = deserialize_path(key_bytes)?; + + // Check if matches target node + if paths_match(¤t_path, node) { + // Read forward info data + let forward_start = header_end + key_len; + if forward_start + forward_info_len > bucket_data.len() { + break; + } + + let forward_bytes = &bucket_data[forward_start..forward_start + forward_info_len]; + + // Read index offset + let index_offset_pos = forward_start + forward_info_len; + if index_offset_pos + 4 > bucket_data.len() { + break; + } + + let index_offset = u32::from_le_bytes([ + bucket_data[index_offset_pos], + bucket_data[index_offset_pos + 1], + bucket_data[index_offset_pos + 2], + bucket_data[index_offset_pos + 3], + ]) as usize; + + // Get index source + if index_offset >= index_sources.len() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid index offset: {}", index_offset), + ) + .into()); + } + + let source = index_sources[index_offset]; + + // Build forward info + let forward = + LocalMappingForward::pack(forward_type, forward_bytes).ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Failed to unpack forward info", + ) + })?; + + // Create Mapping + let path_str = std::str::from_utf8(key_bytes).map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid UTF-8 in path: {}", e), + ) + })?; + let mapping = Mapping::new("", path_str, source); + + return Ok(Some((mapping, forward))); + } + + // Move to next mapping entry + // Entry size = 3 (header) + key_len + forward_info_len + 4 (index offset) + pos = header_end + key_len + forward_info_len + 4; + } + + Ok(None) +} + +/// Deserialize path +fn deserialize_path(bytes: &[u8]) -> Result, ReadSheetDataError> { + let path_str = std::str::from_utf8(bytes).map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid UTF-8 in path: {}", e), + ) + })?; + + if path_str.is_empty() { + return Ok(Vec::new()); + } + + let segments: Vec = path_str.split('/').map(|s| s.to_string()).collect(); + Ok(segments) +} + +/// Check if paths match +fn paths_match(path: &[String], node: &[&str]) -> bool { + if path.len() != node.len() { + return false; + } + + for (i, segment) in path.iter().enumerate() { + if segment != node[i] { + return false; + } + } + + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deserialize_path() { + let bytes = b"dir/subdir/file.txt"; + let path = deserialize_path(bytes).unwrap(); + assert_eq!(path, vec!["dir", "subdir", "file.txt"]); + } + + #[test] + fn test_paths_match() { + let path = vec!["dir".to_string(), "file.txt".to_string()]; + let node = &["dir", "file.txt"]; + assert!(paths_match(&path, node)); + + let node2 = &["dir", "other.txt"]; + assert!(!paths_match(&path, node2)); + } + + #[test] + fn test_read_index_table() { + let mut data = Vec::new(); + data.extend_from_slice(&123u32.to_le_bytes()); + data.extend_from_slice(&456u16.to_le_bytes()); + data.extend_from_slice(&789u32.to_le_bytes()); + data.extend_from_slice(&1011u16.to_le_bytes()); + + let sources = read_index_table(&data, 0, 2).unwrap(); + assert_eq!(sources.len(), 2); + assert_eq!(sources[0].id(), 123); + assert_eq!(sources[0].version(), 456); + assert_eq!(sources[1].id(), 789); + assert_eq!(sources[1].version(), 1011); + } + + #[test] + fn test_read_bucket_data() { + // Create simple bucket data + let mut bucket_data = Vec::new(); + + // First mapping + let path1 = b"dir/file.txt"; + bucket_data.push(path1.len() as u8); // KEY_LEN + bucket_data.push(0); // FORWARD_TYPE (Latest) + bucket_data.push(0); // FORWARD_INFO_LEN + bucket_data.extend_from_slice(path1); // KEY_BYTES + bucket_data.extend_from_slice(&0u32.to_le_bytes()); // INDEX_OFFSET + + // Second mapping + let path2 = b"other/test.txt"; + bucket_data.push(path2.len() as u8); // KEY_LEN + bucket_data.push(0); // FORWARD_TYPE (Latest) + bucket_data.push(0); // FORWARD_INFO_LEN + bucket_data.extend_from_slice(path2); // KEY_BYTES + bucket_data.extend_from_slice(&1u32.to_le_bytes()); // INDEX_OFFSET + + let index_sources = vec![IndexSource::new(1, 1), IndexSource::new(2, 1)]; + + let mappings = read_bucket_data(&bucket_data, &index_sources).unwrap(); + assert_eq!(mappings.len(), 2); + + // Verify first mapping + assert_eq!( + mappings[0].value(), + &["dir".to_string(), "file.txt".to_string()] + ); + assert_eq!(mappings[0].index_source().id(), 1); + + // Verify second mapping + assert_eq!( + mappings[1].value(), + &["other".to_string(), "test.txt".to_string()] + ); + assert_eq!(mappings[1].index_source().id(), 2); + } + + #[test] + fn test_binary_search_bucket_lookup() { + use crate::sheet::writer::convert_sheet_data_to_bytes; + + // Create test sheet data with multiple buckets + let mut sheet_data = crate::sheet::SheetData::empty(); + + // Add mappings that will go to different buckets + let mapping1 = crate::mapping::LocalMapping::new( + vec!["aaa".to_string(), "file1.txt".to_string()], + crate::index_source::IndexSource::new(1, 1), + crate::mapping::LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping2 = crate::mapping::LocalMapping::new( + vec!["mmm".to_string(), "file2.txt".to_string()], + crate::index_source::IndexSource::new(2, 2), + crate::mapping::LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping3 = crate::mapping::LocalMapping::new( + vec!["zzz".to_string(), "file3.txt".to_string()], + crate::index_source::IndexSource::new(3, 3), + crate::mapping::LocalMappingForward::Latest, + ) + .unwrap(); + + sheet_data.mappings.insert(mapping1.clone()); + sheet_data.mappings.insert(mapping2.clone()); + sheet_data.mappings.insert(mapping3.clone()); + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data); + + // Test finding each mapping using binary search + let node1 = &["aaa", "file1.txt"]; + let result1 = read_mapping(&bytes, node1).unwrap(); + assert!(result1.is_some(), "Should find mapping for aaa/file1.txt"); + + let node2 = &["mmm", "file2.txt"]; + let result2 = read_mapping(&bytes, node2).unwrap(); + assert!(result2.is_some(), "Should find mapping for mmm/file2.txt"); + + let node3 = &["zzz", "file3.txt"]; + let result3 = read_mapping(&bytes, node3).unwrap(); + assert!(result3.is_some(), "Should find mapping for zzz/file3.txt"); + + // Test non-existent mapping + let node4 = &["xxx", "notfound.txt"]; + let result4 = read_mapping(&bytes, node4).unwrap(); + assert!(result4.is_none(), "Should not find non-existent mapping"); + + // Test that binary search handles empty data + let empty_bytes = convert_sheet_data_to_bytes(crate::sheet::SheetData::empty()); + let result5 = read_mapping(&empty_bytes, node1).unwrap(); + assert!(result5.is_none(), "Should not find anything in empty sheet"); + } +} diff --git a/systems/sheet/src/sheet/v1/test.rs b/systems/sheet/src/sheet/v1/test.rs new file mode 100644 index 0000000..ae20be5 --- /dev/null +++ b/systems/sheet/src/sheet/v1/test.rs @@ -0,0 +1,460 @@ +use hex_display::hex_display_slice; + +use crate::{ + index_source::IndexSource, + mapping::{LocalMapping, LocalMappingForward}, + sheet::{ + SheetData, constants::HEADER_SIZE, reader::read_sheet_data, + writer::convert_sheet_data_to_bytes, + }, +}; +use std::collections::HashSet; +use std::fs; + +/// Test writing and re-reading sheet data +#[test] +fn test_sheet_data_roundtrip() { + // Create test data + let _sheet_data = SheetData::empty(); + + // Create some test mappings + let mapping1 = LocalMapping::new( + vec!["src".to_string(), "main.rs".to_string()], + IndexSource::new(1001, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping2 = LocalMapping::new( + vec!["docs".to_string(), "README.md".to_string()], + IndexSource::new(1002, 2), + LocalMappingForward::Ref { + sheet_name: "reference".to_string(), + }, + ) + .unwrap(); + + let mapping3 = LocalMapping::new( + vec![ + "assets".to_string(), + "images".to_string(), + "logo.png".to_string(), + ], + IndexSource::new(1003, 3), + LocalMappingForward::Version { version: 12345 }, + ) + .unwrap(); + + // Add mappings to SheetData + // Note: Since the mappings field of SheetData is private, we need to create SheetData in another way + // Here we directly create a new HashSet + let mut mappings = HashSet::new(); + mappings.insert(mapping1.clone()); + mappings.insert(mapping2.clone()); + mappings.insert(mapping3.clone()); + + let sheet_data = SheetData { mappings }; + + // Convert SheetData to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Verify byte data is not empty + assert!(!bytes.is_empty(), "Converted bytes should not be empty"); + + // Verify file header + assert_eq!(bytes[0], 1, "Sheet version should be 1"); + + // Re-read SheetData from bytes + let restored_sheet_data = + read_sheet_data(&bytes).expect("Failed to read sheet data from bytes"); + + // Verify mapping count + assert_eq!( + restored_sheet_data.mappings.len(), + sheet_data.mappings.len(), + "Restored sheet should have same number of mappings" + ); + + // Verify each mapping exists + for mapping in &sheet_data.mappings { + assert!( + restored_sheet_data.mappings.contains(mapping), + "Restored sheet should contain mapping: {:?}", + mapping + ); + } + + // Verify specific mapping content + for mapping in &restored_sheet_data.mappings { + // Find original mapping + let original_mapping = sheet_data.mappings.get(mapping.value()).unwrap(); + + // Verify path + assert_eq!( + mapping.value(), + original_mapping.value(), + "Path should match" + ); + + // Verify index source + assert_eq!( + mapping.index_source().id(), + original_mapping.index_source().id(), + "Index source ID should match" + ); + + assert_eq!( + mapping.index_source().version(), + original_mapping.index_source().version(), + "Index source version should match" + ); + + // Verify forward information + let (original_type, _, _) = original_mapping.forward().unpack(); + let (restored_type, _, _) = mapping.forward().unpack(); + assert_eq!(restored_type, original_type, "Forward type should match"); + } +} + +/// Test reading and writing empty sheet data +#[test] +fn test_empty_sheet_roundtrip() { + // Create empty SheetData + let sheet_data = SheetData::empty(); + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Verify file header + assert_eq!(bytes.len(), 15, "Empty sheet should have header size only"); + assert_eq!(bytes[0], 1, "Sheet version should be 1"); + + // Verify offsets - For empty sheet, mapping data offset and index table offset should be the same + let mapping_data_offset = + u32::from_le_bytes([bytes[7], bytes[8], bytes[9], bytes[10]]) as usize; + let index_table_offset = + u32::from_le_bytes([bytes[11], bytes[12], bytes[13], bytes[14]]) as usize; + assert_eq!( + mapping_data_offset, index_table_offset, + "For empty sheet, both offsets should be the same" + ); + assert_eq!( + mapping_data_offset, HEADER_SIZE, + "Offsets should point to end of header" + ); + + // Mapping count should be 0 + let mapping_count = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]); + assert_eq!(mapping_count, 0, "Mapping count should be 0"); + + // Index source count should be 0 + let index_count = u16::from_le_bytes([bytes[5], bytes[6]]); + assert_eq!(index_count, 0, "Index count should be 0"); + + // Re-read + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read empty sheet data"); + + // Verify it's empty + assert!( + restored_sheet_data.mappings.is_empty(), + "Restored empty sheet should have no mappings" + ); +} + +/// Test reading and writing a single mapping +#[test] +fn test_single_mapping_roundtrip() { + // Create a single mapping + let mapping = LocalMapping::new( + vec!["test.txt".to_string()], + IndexSource::new(999, 42), + LocalMappingForward::Latest, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping.clone()); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Re-read + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); + + // Verify + assert_eq!(restored_sheet_data.mappings.len(), 1); + let restored_mapping = restored_sheet_data.mappings.iter().next().unwrap(); + + assert_eq!(restored_mapping.value(), &["test.txt".to_string()]); + assert_eq!(restored_mapping.index_source().id(), 999); + assert_eq!(restored_mapping.index_source().version(), 42); + + let (forward_type, _, _) = restored_mapping.forward().unpack(); + assert_eq!(forward_type, 0); // Latest type id is 0 +} + +/// Test file system read/write +#[test] +fn test_file_system_roundtrip() { + // Create test data + let mapping1 = LocalMapping::new( + vec!["file0.txt".to_string()], + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping2 = LocalMapping::new( + vec!["dir1".to_string(), "file1.txt".to_string()], + IndexSource::new(2, 2), + LocalMappingForward::Ref { + sheet_name: "other".to_string(), + }, + ) + .unwrap(); + + let mapping3 = LocalMapping::new( + vec!["dir2".to_string(), "file2.txt".to_string()], + IndexSource::new(3, 3), + LocalMappingForward::Version { version: 35 }, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping1.clone()); + mappings.insert(mapping2.clone()); + mappings.insert(mapping3.clone()); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Write to file + let test_file_path = ".temp/test.sheet"; + let test_file_path_hex = ".temp/test_hex.txt"; + + // Ensure directory exists + if let Some(parent) = std::path::Path::new(test_file_path).parent() { + fs::create_dir_all(parent).expect("Failed to create test directory"); + } + + fs::write(test_file_path, &bytes).expect("Failed to write test file"); + fs::write(test_file_path_hex, hex_display_slice(&bytes)).expect("Failed to write test file"); + + // Read file + let file_bytes = fs::read(test_file_path).expect("Failed to read test file"); + + // Verify file content matches original bytes + assert_eq!( + file_bytes, bytes, + "File content should match original bytes" + ); + + // Re-read SheetData from file bytes + let restored_from_file = read_sheet_data(&file_bytes).expect("Failed to read from file bytes"); + + // Use SheetData's Eq trait for direct comparison + assert_eq!( + restored_from_file, sheet_data, + "Restored sheet data should be equal to original" + ); + + // Verify mappings in SheetData read from file + // Check if each original mapping can be found in restored data + for original_mapping in &sheet_data.mappings { + let found = restored_from_file + .mappings + .iter() + .any(|m| m == original_mapping); + assert!( + found, + "Original mapping {:?} should be present in restored sheet data", + original_mapping + ); + } + + // Also check if each mapping in restored data can be found in original data + for restored_mapping in &restored_from_file.mappings { + let found = sheet_data.mappings.iter().any(|m| m == restored_mapping); + assert!( + found, + "Restored mapping {:?} should be present in original sheet data", + restored_mapping + ); + } + + // Test file remains in .temp/test.sheet for subsequent inspection + // Note: Need to manually clean up .temp directory before next test run +} + +/// Test reading and writing different forward types +#[test] +fn test_different_forward_types() { + // Test Latest type + let mapping_latest = LocalMapping::new( + vec!["latest.txt".to_string()], + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + // Test Ref type + let mapping_ref = LocalMapping::new( + vec!["ref.txt".to_string()], + IndexSource::new(2, 2), + LocalMappingForward::Ref { + sheet_name: "reference_sheet".to_string(), + }, + ) + .unwrap(); + + // Test Version type + let mapping_version = LocalMapping::new( + vec!["version.txt".to_string()], + IndexSource::new(3, 3), + LocalMappingForward::Version { version: 54321 }, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping_latest.clone()); + mappings.insert(mapping_ref.clone()); + mappings.insert(mapping_version.clone()); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes and re-read + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); + + // Verify all mappings exist + assert_eq!(restored_sheet_data.mappings.len(), 3); + + // Verify Latest type + let restored_latest = restored_sheet_data + .mappings + .get(&vec!["latest.txt".to_string()]) + .unwrap(); + let (latest_type, latest_len, _) = restored_latest.forward().unpack(); + assert_eq!(latest_type, 0); + assert_eq!(latest_len, 0); + + // Verify Ref type + let restored_ref = restored_sheet_data + .mappings + .get(&vec!["ref.txt".to_string()]) + .unwrap(); + let (ref_type, ref_len, ref_bytes) = restored_ref.forward().unpack(); + assert_eq!(ref_type, 1); + assert_eq!(ref_len as usize, "reference_sheet".len()); + assert_eq!(String::from_utf8(ref_bytes).unwrap(), "reference_sheet"); + + // Verify Version type + let restored_version = restored_sheet_data + .mappings + .get(&vec!["version.txt".to_string()]) + .unwrap(); + let (version_type, version_len, version_bytes) = restored_version.forward().unpack(); + assert_eq!(version_type, 2); + assert_eq!(version_len, 2); // u16 is 2 bytes + assert_eq!(u16::from_be_bytes(version_bytes.try_into().unwrap()), 54321); +} + +/// Test duplicate index source optimization +#[test] +fn test_duplicate_index_source_optimization() { + // Create multiple mappings sharing the same index source + let shared_source = IndexSource::new(777, 88); + + let mapping1 = LocalMapping::new( + vec!["file1.txt".to_string()], + shared_source, + LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping2 = LocalMapping::new( + vec!["file2.txt".to_string()], + shared_source, + LocalMappingForward::Latest, + ) + .unwrap(); + + let mapping3 = LocalMapping::new( + vec!["file3.txt".to_string()], + shared_source, + LocalMappingForward::Latest, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping1); + mappings.insert(mapping2); + mappings.insert(mapping3); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + + // Verify index table should have only one entry + let index_count = u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]); + assert_eq!(index_count, 1, "Should have only one unique index source"); + + // Re-read and verify + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); + assert_eq!(restored_sheet_data.mappings.len(), 3); + + // Verify all mappings use the same index source + for mapping in &restored_sheet_data.mappings { + assert_eq!(mapping.index_source().id(), 777); + assert_eq!(mapping.index_source().version(), 88); + } +} + +/// Test path serialization and deserialization +#[test] +fn test_path_serialization_deserialization() { + // Test various paths + let test_cases = vec![ + vec!["single".to_string()], + vec!["dir".to_string(), "file.txt".to_string()], + vec![ + "a".to_string(), + "b".to_string(), + "c".to_string(), + "d.txt".to_string(), + ], + vec!["with spaces".to_string(), "file name.txt".to_string()], + vec!["unicode".to_string(), "文件.txt".to_string()], + ]; + + for path in test_cases { + let mapping = LocalMapping::new( + path.clone(), + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + let mut mappings = HashSet::new(); + mappings.insert(mapping); + + let sheet_data = SheetData { mappings }; + + // Convert to bytes and re-read + let bytes = convert_sheet_data_to_bytes(sheet_data.clone()); + let restored_sheet_data = read_sheet_data(&bytes).expect("Failed to read sheet data"); + + // Verify path + let restored_mapping = restored_sheet_data.mappings.iter().next().unwrap(); + assert_eq!( + restored_mapping.value(), + &path, + "Path should be preserved after roundtrip" + ); + } +} diff --git a/systems/sheet/src/sheet/v1/writer.rs b/systems/sheet/src/sheet/v1/writer.rs new file mode 100644 index 0000000..5d9b257 --- /dev/null +++ b/systems/sheet/src/sheet/v1/writer.rs @@ -0,0 +1,264 @@ +use crate::index_source::IndexSource; +use crate::mapping::LocalMapping; +use crate::sheet::SheetData; +use crate::sheet::constants::{ + CURRENT_SHEET_VERSION, HEADER_SIZE, INDEX_ENTRY_SIZE, MAPPING_DIR_ENTRY_SIZE, +}; +use sha2::{Digest, Sha256}; +use std::collections::{BTreeMap, HashMap}; + +/// Convert SheetData to byte array +pub fn convert_sheet_data_to_bytes(sheet_data: SheetData) -> Vec { + // Collect all mappings + let mappings: Vec = sheet_data.mappings.into_iter().collect(); + + // Collect all unique index sources + let mut index_sources = Vec::new(); + let mut source_to_offset = HashMap::new(); + + for mapping in &mappings { + let source = mapping.index_source(); + let key = (source.id(), source.version()); + if !source_to_offset.contains_key(&key) { + let offset = index_sources.len() as u32; + source_to_offset.insert(key, offset); + index_sources.push(IndexSource::new(source.id(), source.version())); + } + } + + let index_count = index_sources.len() as u32; + + // 1. Organize mappings into hash buckets + let mut buckets: BTreeMap> = BTreeMap::new(); + for mapping in mappings { + let hash = calculate_path_hash(mapping.value()); + let bucket_key = hash >> 24; // Take high 8 bits as bucket key + buckets + .entry(bucket_key) + .or_insert_with(Vec::new) + .push(mapping); + } + + let bucket_count = buckets.len() as u16; + + // 2. Calculate offsets for each section + let header_size = HEADER_SIZE; + let mapping_dir_offset = header_size; + let mapping_dir_size = bucket_count as usize * MAPPING_DIR_ENTRY_SIZE; + let index_table_offset = mapping_dir_offset + mapping_dir_size; + let index_table_size = index_count as usize * INDEX_ENTRY_SIZE; + + // 3. Calculate bucket data offsets + let mut bucket_data_offset = index_table_offset + index_table_size; + let mut bucket_entries = Vec::new(); + + // Prepare data for each bucket + for (&bucket_key, bucket_mappings) in &buckets { + // Calculate bucket data size + let mut bucket_data = Vec::new(); + for mapping in bucket_mappings { + write_mapping_bucket(&mut bucket_data, mapping, &source_to_offset); + } + + let bucket_length = bucket_data.len() as u32; + bucket_entries.push((bucket_key, bucket_data_offset, bucket_length, bucket_data)); + bucket_data_offset += bucket_length as usize; + } + + // 4. Build result + let total_size = bucket_data_offset; + let mut result = Vec::with_capacity(total_size); + + // 5. File header + result.push(CURRENT_SHEET_VERSION); // Version (1 byte) + result.extend_from_slice(&bucket_count.to_le_bytes()); // Mapping bucket count (2 bytes) + result.extend_from_slice(&index_count.to_le_bytes()); // Index count (4 bytes) + result.extend_from_slice(&(mapping_dir_offset as u32).to_le_bytes()); // Mapping directory offset (4 bytes) + result.extend_from_slice(&(index_table_offset as u32).to_le_bytes()); // Index table offset (4 bytes) + + // 6. Mapping directory + for (bucket_key, bucket_offset, bucket_length, _) in &bucket_entries { + result.extend_from_slice(&bucket_key.to_le_bytes()); // Bucket hash prefix (4 bytes) + result.extend_from_slice(&(*bucket_offset as u32).to_le_bytes()); // Bucket offset (4 bytes) + result.extend_from_slice(&bucket_length.to_le_bytes()); // Bucket length (4 bytes) + } + + // 7. Index table + for source in &index_sources { + result.extend_from_slice(&source.id().to_le_bytes()); // Index ID (4 bytes) + result.extend_from_slice(&source.version().to_le_bytes()); // Index version (2 bytes) + } + + // 8. Bucket data + for (_, _, _, bucket_data) in bucket_entries { + result.extend_from_slice(&bucket_data); + } + + result +} + +/// Calculate path hash (SHA256, take first 4 bytes) +pub fn calculate_path_hash(path: &[String]) -> u32 { + let mut hasher = Sha256::new(); + for segment in path { + hasher.update(segment.as_bytes()); + hasher.update(b"/"); + } + let result = hasher.finalize(); + u32::from_le_bytes([result[0], result[1], result[2], result[3]]) +} + +/// Write single mapping to bucket data +fn write_mapping_bucket( + result: &mut Vec, + mapping: &LocalMapping, + source_to_offset: &HashMap<(u32, u16), u32>, +) { + // Serialize path + let path_bytes = serialize_path(mapping.value()); + let path_len = path_bytes.len(); + + // Get forward information + let (forward_type, forward_info_len, forward_bytes) = mapping.forward().unpack(); + + // Get index offset + let source = mapping.index_source(); + let key = (source.id(), source.version()); + let index_offset = source_to_offset.get(&key).unwrap(); + + // Write mapping bucket entry + result.push(path_len as u8); // Key length (1 byte) + result.push(forward_type); // Forward type (1 byte) + result.push(forward_info_len); // Forward info length (1 byte) + + // Write key data (path) + result.extend_from_slice(&path_bytes); + + // Write forward info data + result.extend_from_slice(&forward_bytes); + + // Write index offset + result.extend_from_slice(&index_offset.to_le_bytes()); // Index offset (4 bytes) +} + +/// Serialize path to byte array +fn serialize_path(path: &[String]) -> Vec { + let mut result = Vec::new(); + for (i, segment) in path.iter().enumerate() { + result.extend_from_slice(segment.as_bytes()); + if i < path.len() - 1 { + result.push(b'/'); + } + } + result +} + +/// Test only: Calculate single mapping bucket entry size +#[cfg(test)] +fn calculate_mapping_bucket_size(mapping: &LocalMapping) -> usize { + use crate::sheet::constants::MAPPING_BUCKET_MIN_SIZE; + + let path_size = serialize_path(mapping.value()).len(); + let (_, forward_info_len, _) = mapping.forward().unpack(); + + MAPPING_BUCKET_MIN_SIZE + path_size + forward_info_len as usize +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{mapping::LocalMappingForward, sheet::constants::MAPPING_BUCKET_MIN_SIZE}; + + #[test] + fn test_serialize_path() { + let path = vec![ + "dir".to_string(), + "subdir".to_string(), + "file.txt".to_string(), + ]; + let bytes = serialize_path(&path); + assert_eq!(bytes, b"dir/subdir/file.txt"); + } + + #[test] + fn test_calculate_path_hash() { + let path1 = vec!["test".to_string(), "file.txt".to_string()]; + let path2 = vec!["test".to_string(), "file.txt".to_string()]; + let path3 = vec!["other".to_string(), "file.txt".to_string()]; + + let hash1 = calculate_path_hash(&path1); + let hash2 = calculate_path_hash(&path2); + let hash3 = calculate_path_hash(&path3); + + assert_eq!(hash1, hash2); + assert_ne!(hash1, hash3); + } + + #[test] + fn test_calculate_mapping_bucket_size() { + let mapping = LocalMapping::new( + vec!["test".to_string(), "file.txt".to_string()], + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + + let size = calculate_mapping_bucket_size(&mapping); + // 13 == "test/file.txt".len() + assert_eq!(size, MAPPING_BUCKET_MIN_SIZE + 13); + } + + #[test] + fn test_convert_empty_sheet() { + let sheet_data = SheetData::empty(); + let bytes = convert_sheet_data_to_bytes(sheet_data); + + // Verify file header + assert_eq!(bytes[0], CURRENT_SHEET_VERSION); // Version + assert_eq!(u16::from_le_bytes([bytes[1], bytes[2]]), 0); // Mapping bucket count + assert_eq!( + u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]), + 0 + ); // Index count + + // Total size should be HEADER_SIZE + assert_eq!(bytes.len(), HEADER_SIZE); + } + + #[test] + fn test_convert_sheet_with_one_mapping() { + let mut sheet_data = SheetData::empty(); + let mapping = LocalMapping::new( + vec!["dir".to_string(), "file.txt".to_string()], + IndexSource::new(1, 1), + LocalMappingForward::Latest, + ) + .unwrap(); + sheet_data.mappings.insert(mapping); + + let bytes = convert_sheet_data_to_bytes(sheet_data); + + // Verify file header + assert_eq!(bytes[0], CURRENT_SHEET_VERSION); // Version + assert_eq!(u16::from_le_bytes([bytes[1], bytes[2]]), 1); // Should have 1 bucket + assert_eq!( + u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]), + 1 + ); // 1 index source + + // Verify mapping directory + let mapping_dir_offset = HEADER_SIZE; + + // Bucket offset should point after the index table + let index_table_offset = + u32::from_le_bytes([bytes[11], bytes[12], bytes[13], bytes[14]]) as usize; + let bucket_offset = u32::from_le_bytes([ + bytes[mapping_dir_offset + 4], + bytes[mapping_dir_offset + 5], + bytes[mapping_dir_offset + 6], + bytes[mapping_dir_offset + 7], + ]) as usize; + + assert!(bucket_offset >= index_table_offset + INDEX_ENTRY_SIZE); + } +} diff --git a/systems/sheet/src/sheet/writer.rs b/systems/sheet/src/sheet/writer.rs deleted file mode 100644 index 5d9b257..0000000 --- a/systems/sheet/src/sheet/writer.rs +++ /dev/null @@ -1,264 +0,0 @@ -use crate::index_source::IndexSource; -use crate::mapping::LocalMapping; -use crate::sheet::SheetData; -use crate::sheet::constants::{ - CURRENT_SHEET_VERSION, HEADER_SIZE, INDEX_ENTRY_SIZE, MAPPING_DIR_ENTRY_SIZE, -}; -use sha2::{Digest, Sha256}; -use std::collections::{BTreeMap, HashMap}; - -/// Convert SheetData to byte array -pub fn convert_sheet_data_to_bytes(sheet_data: SheetData) -> Vec { - // Collect all mappings - let mappings: Vec = sheet_data.mappings.into_iter().collect(); - - // Collect all unique index sources - let mut index_sources = Vec::new(); - let mut source_to_offset = HashMap::new(); - - for mapping in &mappings { - let source = mapping.index_source(); - let key = (source.id(), source.version()); - if !source_to_offset.contains_key(&key) { - let offset = index_sources.len() as u32; - source_to_offset.insert(key, offset); - index_sources.push(IndexSource::new(source.id(), source.version())); - } - } - - let index_count = index_sources.len() as u32; - - // 1. Organize mappings into hash buckets - let mut buckets: BTreeMap> = BTreeMap::new(); - for mapping in mappings { - let hash = calculate_path_hash(mapping.value()); - let bucket_key = hash >> 24; // Take high 8 bits as bucket key - buckets - .entry(bucket_key) - .or_insert_with(Vec::new) - .push(mapping); - } - - let bucket_count = buckets.len() as u16; - - // 2. Calculate offsets for each section - let header_size = HEADER_SIZE; - let mapping_dir_offset = header_size; - let mapping_dir_size = bucket_count as usize * MAPPING_DIR_ENTRY_SIZE; - let index_table_offset = mapping_dir_offset + mapping_dir_size; - let index_table_size = index_count as usize * INDEX_ENTRY_SIZE; - - // 3. Calculate bucket data offsets - let mut bucket_data_offset = index_table_offset + index_table_size; - let mut bucket_entries = Vec::new(); - - // Prepare data for each bucket - for (&bucket_key, bucket_mappings) in &buckets { - // Calculate bucket data size - let mut bucket_data = Vec::new(); - for mapping in bucket_mappings { - write_mapping_bucket(&mut bucket_data, mapping, &source_to_offset); - } - - let bucket_length = bucket_data.len() as u32; - bucket_entries.push((bucket_key, bucket_data_offset, bucket_length, bucket_data)); - bucket_data_offset += bucket_length as usize; - } - - // 4. Build result - let total_size = bucket_data_offset; - let mut result = Vec::with_capacity(total_size); - - // 5. File header - result.push(CURRENT_SHEET_VERSION); // Version (1 byte) - result.extend_from_slice(&bucket_count.to_le_bytes()); // Mapping bucket count (2 bytes) - result.extend_from_slice(&index_count.to_le_bytes()); // Index count (4 bytes) - result.extend_from_slice(&(mapping_dir_offset as u32).to_le_bytes()); // Mapping directory offset (4 bytes) - result.extend_from_slice(&(index_table_offset as u32).to_le_bytes()); // Index table offset (4 bytes) - - // 6. Mapping directory - for (bucket_key, bucket_offset, bucket_length, _) in &bucket_entries { - result.extend_from_slice(&bucket_key.to_le_bytes()); // Bucket hash prefix (4 bytes) - result.extend_from_slice(&(*bucket_offset as u32).to_le_bytes()); // Bucket offset (4 bytes) - result.extend_from_slice(&bucket_length.to_le_bytes()); // Bucket length (4 bytes) - } - - // 7. Index table - for source in &index_sources { - result.extend_from_slice(&source.id().to_le_bytes()); // Index ID (4 bytes) - result.extend_from_slice(&source.version().to_le_bytes()); // Index version (2 bytes) - } - - // 8. Bucket data - for (_, _, _, bucket_data) in bucket_entries { - result.extend_from_slice(&bucket_data); - } - - result -} - -/// Calculate path hash (SHA256, take first 4 bytes) -pub fn calculate_path_hash(path: &[String]) -> u32 { - let mut hasher = Sha256::new(); - for segment in path { - hasher.update(segment.as_bytes()); - hasher.update(b"/"); - } - let result = hasher.finalize(); - u32::from_le_bytes([result[0], result[1], result[2], result[3]]) -} - -/// Write single mapping to bucket data -fn write_mapping_bucket( - result: &mut Vec, - mapping: &LocalMapping, - source_to_offset: &HashMap<(u32, u16), u32>, -) { - // Serialize path - let path_bytes = serialize_path(mapping.value()); - let path_len = path_bytes.len(); - - // Get forward information - let (forward_type, forward_info_len, forward_bytes) = mapping.forward().unpack(); - - // Get index offset - let source = mapping.index_source(); - let key = (source.id(), source.version()); - let index_offset = source_to_offset.get(&key).unwrap(); - - // Write mapping bucket entry - result.push(path_len as u8); // Key length (1 byte) - result.push(forward_type); // Forward type (1 byte) - result.push(forward_info_len); // Forward info length (1 byte) - - // Write key data (path) - result.extend_from_slice(&path_bytes); - - // Write forward info data - result.extend_from_slice(&forward_bytes); - - // Write index offset - result.extend_from_slice(&index_offset.to_le_bytes()); // Index offset (4 bytes) -} - -/// Serialize path to byte array -fn serialize_path(path: &[String]) -> Vec { - let mut result = Vec::new(); - for (i, segment) in path.iter().enumerate() { - result.extend_from_slice(segment.as_bytes()); - if i < path.len() - 1 { - result.push(b'/'); - } - } - result -} - -/// Test only: Calculate single mapping bucket entry size -#[cfg(test)] -fn calculate_mapping_bucket_size(mapping: &LocalMapping) -> usize { - use crate::sheet::constants::MAPPING_BUCKET_MIN_SIZE; - - let path_size = serialize_path(mapping.value()).len(); - let (_, forward_info_len, _) = mapping.forward().unpack(); - - MAPPING_BUCKET_MIN_SIZE + path_size + forward_info_len as usize -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{mapping::LocalMappingForward, sheet::constants::MAPPING_BUCKET_MIN_SIZE}; - - #[test] - fn test_serialize_path() { - let path = vec![ - "dir".to_string(), - "subdir".to_string(), - "file.txt".to_string(), - ]; - let bytes = serialize_path(&path); - assert_eq!(bytes, b"dir/subdir/file.txt"); - } - - #[test] - fn test_calculate_path_hash() { - let path1 = vec!["test".to_string(), "file.txt".to_string()]; - let path2 = vec!["test".to_string(), "file.txt".to_string()]; - let path3 = vec!["other".to_string(), "file.txt".to_string()]; - - let hash1 = calculate_path_hash(&path1); - let hash2 = calculate_path_hash(&path2); - let hash3 = calculate_path_hash(&path3); - - assert_eq!(hash1, hash2); - assert_ne!(hash1, hash3); - } - - #[test] - fn test_calculate_mapping_bucket_size() { - let mapping = LocalMapping::new( - vec!["test".to_string(), "file.txt".to_string()], - IndexSource::new(1, 1), - LocalMappingForward::Latest, - ) - .unwrap(); - - let size = calculate_mapping_bucket_size(&mapping); - // 13 == "test/file.txt".len() - assert_eq!(size, MAPPING_BUCKET_MIN_SIZE + 13); - } - - #[test] - fn test_convert_empty_sheet() { - let sheet_data = SheetData::empty(); - let bytes = convert_sheet_data_to_bytes(sheet_data); - - // Verify file header - assert_eq!(bytes[0], CURRENT_SHEET_VERSION); // Version - assert_eq!(u16::from_le_bytes([bytes[1], bytes[2]]), 0); // Mapping bucket count - assert_eq!( - u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]), - 0 - ); // Index count - - // Total size should be HEADER_SIZE - assert_eq!(bytes.len(), HEADER_SIZE); - } - - #[test] - fn test_convert_sheet_with_one_mapping() { - let mut sheet_data = SheetData::empty(); - let mapping = LocalMapping::new( - vec!["dir".to_string(), "file.txt".to_string()], - IndexSource::new(1, 1), - LocalMappingForward::Latest, - ) - .unwrap(); - sheet_data.mappings.insert(mapping); - - let bytes = convert_sheet_data_to_bytes(sheet_data); - - // Verify file header - assert_eq!(bytes[0], CURRENT_SHEET_VERSION); // Version - assert_eq!(u16::from_le_bytes([bytes[1], bytes[2]]), 1); // Should have 1 bucket - assert_eq!( - u32::from_le_bytes([bytes[3], bytes[4], bytes[5], bytes[6]]), - 1 - ); // 1 index source - - // Verify mapping directory - let mapping_dir_offset = HEADER_SIZE; - - // Bucket offset should point after the index table - let index_table_offset = - u32::from_le_bytes([bytes[11], bytes[12], bytes[13], bytes[14]]) as usize; - let bucket_offset = u32::from_le_bytes([ - bytes[mapping_dir_offset + 4], - bytes[mapping_dir_offset + 5], - bytes[mapping_dir_offset + 6], - bytes[mapping_dir_offset + 7], - ]) as usize; - - assert!(bucket_offset >= index_table_offset + INDEX_ENTRY_SIZE); - } -} -- cgit