diff options
| author | 魏曹先生 <1992414357@qq.com> | 2026-03-07 19:37:52 +0800 |
|---|---|---|
| committer | 魏曹先生 <1992414357@qq.com> | 2026-03-07 19:37:52 +0800 |
| commit | 9e7c0fd45e169929156bdb317b10d7bb3db65f8b (patch) | |
| tree | 94c1e0e6cafe996b7b7da8dfd6e1ff1a04539cda /src/chunker/rw/storage/bidx.rs | |
| parent | 22926ce29e3f8e040ec349401aeb6a77f32eae72 (diff) | |
Add callback support to chunk_stream_with and implement stream writing
Diffstat (limited to 'src/chunker/rw/storage/bidx.rs')
| -rw-r--r-- | src/chunker/rw/storage/bidx.rs | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/src/chunker/rw/storage/bidx.rs b/src/chunker/rw/storage/bidx.rs new file mode 100644 index 0000000..783ded6 --- /dev/null +++ b/src/chunker/rw/storage/bidx.rs @@ -0,0 +1,157 @@ +//! Bidx (Butchunker Index) file format utilities +//! +//! The bidx file format: +//! - Magic number: [u8; 4] = b"G00d" +//! - Original filename length: u16 (little-endian) +//! - Original filename: [u8] (UTF-8, no null terminator) +//! - Chunk hashes: [u8; 32][u8; 32][u8; 32]... (binary hashes, not hex strings) + +use std::io::{self, Write}; +use std::path::Path; + +use crate::chunker::constants::BUTCK_INDEX_MAGIC; +use crate::chunker::rw::storage::ChunkInfo; + +/// Write a bidx index file +pub fn write_bidx_file( + index_path: &Path, + chunk_infos: &[ChunkInfo], + original_file_path: &Path, +) -> io::Result<()> { + let file = std::fs::File::create(index_path)?; + let mut writer = io::BufWriter::new(file); + + // Magic bytes + writer.write_all(&BUTCK_INDEX_MAGIC)?; + + // Get original filename + let filename = original_file_path + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown"); + let filename_bytes = filename.as_bytes(); + + // Validate filename length + if filename_bytes.len() > u16::MAX as usize { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("Filename too long: {} bytes", filename_bytes.len()), + )); + } + + // Write filename length as u16 + let filename_len = filename_bytes.len() as u16; + writer.write_all(&filename_len.to_le_bytes())?; + + // Write filename bytes + writer.write_all(filename_bytes)?; + + // Write chunk hashes + for chunk_info in chunk_infos { + // Convert hex hash to 32-byte binary representation + let hash_bytes = match hex::decode(&chunk_info.hash) { + Ok(bytes) => bytes, + Err(e) => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Failed to decode hash hex '{}': {}", chunk_info.hash, e), + )); + } + }; + + // Ensure hash is exactly 32 bytes + if hash_bytes.len() != 32 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Hash must be 32 bytes, got {} bytes", hash_bytes.len()), + )); + } + + // Write hash + writer.write_all(&hash_bytes)?; + } + + writer.flush()?; + Ok(()) +} + +/// Read a bidx index file +pub fn read_bidx_file(index_path: &Path) -> io::Result<(String, Vec<ChunkInfo>)> { + use std::io::Read; + + let mut file = std::fs::File::open(index_path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + + if buffer.len() < 4 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "File too short to contain magic number", + )); + } + + // Check magic number + if &buffer[0..4] != BUTCK_INDEX_MAGIC { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Invalid magic number", + )); + } + + let mut offset = 4; + + // Read filename length + if offset + 2 > buffer.len() { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "File too short to contain filename length", + )); + } + let filename_len = u16::from_le_bytes([buffer[offset], buffer[offset + 1]]) as usize; + offset += 2; + + // Read filename + if offset + filename_len > buffer.len() { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "File too short to contain filename", + )); + } + let filename_bytes = &buffer[offset..offset + filename_len]; + let filename = String::from_utf8(filename_bytes.to_vec()).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Filename is not valid UTF-8: {}", e), + ) + })?; + offset += filename_len; + + // Read chunk hashes + let mut chunk_infos = Vec::new(); + let hash_size = 32; + + while offset + hash_size <= buffer.len() { + // Read hash + let hash_bytes = &buffer[offset..offset + hash_size]; + let hash = hex::encode(hash_bytes); + offset += hash_size; + + chunk_infos.push(ChunkInfo { + index: chunk_infos.len(), + hash, + }); + } + + // Check if we read exactly all data + if offset != buffer.len() { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "File contains {} extra bytes after chunk hashes", + buffer.len() - offset + ), + )); + } + + Ok((filename, chunk_infos)) +} |
