//! Bidx (Butchunker Index) file format utilities //! //! The bidx file format: //! - Magic number: [u8; 4] = b"G00d" //! - Original filename length: u16 (little-endian) //! - Original filename: [u8] (UTF-8, no null terminator) //! - Chunk hashes: [u8; 32][u8; 32][u8; 32]... (binary hashes, not hex strings) use std::io::{self, Write}; use std::path::Path; use crate::chunker::constants::BUTCK_INDEX_MAGIC; use crate::chunker::rw::storage::ChunkInfo; /// Write a bidx index file pub fn write_bidx_file( index_path: &Path, chunk_infos: &[ChunkInfo], original_file_path: &Path, ) -> io::Result<()> { let file = std::fs::File::create(index_path)?; let mut writer = io::BufWriter::new(file); // Magic bytes writer.write_all(&BUTCK_INDEX_MAGIC)?; // Get original filename let filename = original_file_path .file_name() .and_then(|n| n.to_str()) .unwrap_or("unknown"); let filename_bytes = filename.as_bytes(); // Validate filename length if filename_bytes.len() > u16::MAX as usize { return Err(io::Error::new( io::ErrorKind::InvalidInput, format!("Filename too long: {} bytes", filename_bytes.len()), )); } // Write filename length as u16 let filename_len = filename_bytes.len() as u16; writer.write_all(&filename_len.to_le_bytes())?; // Write filename bytes writer.write_all(filename_bytes)?; // Write chunk hashes for chunk_info in chunk_infos { // Convert hex hash to 32-byte binary representation let hash_bytes = match hex::decode(&chunk_info.hash) { Ok(bytes) => bytes, Err(e) => { return Err(io::Error::new( io::ErrorKind::InvalidData, format!("Failed to decode hash hex '{}': {}", chunk_info.hash, e), )); } }; // Ensure hash is exactly 32 bytes if hash_bytes.len() != 32 { return Err(io::Error::new( io::ErrorKind::InvalidData, format!("Hash must be 32 bytes, got {} bytes", hash_bytes.len()), )); } // Write hash writer.write_all(&hash_bytes)?; } writer.flush()?; Ok(()) } /// Read a bidx index file pub fn read_bidx_file(index_path: &Path) -> io::Result<(String, Vec)> { use std::io::Read; let mut file = std::fs::File::open(index_path)?; let mut buffer = Vec::new(); file.read_to_end(&mut buffer)?; if buffer.len() < 4 { return Err(io::Error::new( io::ErrorKind::InvalidData, "File too short to contain magic number", )); } // Check magic number if &buffer[0..4] != BUTCK_INDEX_MAGIC { return Err(io::Error::new( io::ErrorKind::InvalidData, "Invalid magic number", )); } let mut offset = 4; // Read filename length if offset + 2 > buffer.len() { return Err(io::Error::new( io::ErrorKind::InvalidData, "File too short to contain filename length", )); } let filename_len = u16::from_le_bytes([buffer[offset], buffer[offset + 1]]) as usize; offset += 2; // Read filename if offset + filename_len > buffer.len() { return Err(io::Error::new( io::ErrorKind::InvalidData, "File too short to contain filename", )); } let filename_bytes = &buffer[offset..offset + filename_len]; let filename = String::from_utf8(filename_bytes.to_vec()).map_err(|e| { io::Error::new( io::ErrorKind::InvalidData, format!("Filename is not valid UTF-8: {}", e), ) })?; offset += filename_len; // Read chunk hashes let mut chunk_infos = Vec::new(); let hash_size = 32; while offset + hash_size <= buffer.len() { // Read hash let hash_bytes = &buffer[offset..offset + hash_size]; let hash = hex::encode(hash_bytes); offset += hash_size; chunk_infos.push(ChunkInfo { index: chunk_infos.len(), hash, }); } // Check if we read exactly all data if offset != buffer.len() { return Err(io::Error::new( io::ErrorKind::InvalidData, format!( "File contains {} extra bytes after chunk hashes", buffer.len() - offset ), )); } Ok((filename, chunk_infos)) }