summaryrefslogtreecommitdiff
path: root/src/chunker/rw/storage/bidx.rs
diff options
context:
space:
mode:
author魏曹先生 <1992414357@qq.com>2026-03-07 19:37:52 +0800
committer魏曹先生 <1992414357@qq.com>2026-03-07 19:37:52 +0800
commit9e7c0fd45e169929156bdb317b10d7bb3db65f8b (patch)
tree94c1e0e6cafe996b7b7da8dfd6e1ff1a04539cda /src/chunker/rw/storage/bidx.rs
parent22926ce29e3f8e040ec349401aeb6a77f32eae72 (diff)
Add callback support to chunk_stream_with and implement stream writing
Diffstat (limited to 'src/chunker/rw/storage/bidx.rs')
-rw-r--r--src/chunker/rw/storage/bidx.rs157
1 files changed, 157 insertions, 0 deletions
diff --git a/src/chunker/rw/storage/bidx.rs b/src/chunker/rw/storage/bidx.rs
new file mode 100644
index 0000000..783ded6
--- /dev/null
+++ b/src/chunker/rw/storage/bidx.rs
@@ -0,0 +1,157 @@
+//! Bidx (Butchunker Index) file format utilities
+//!
+//! The bidx file format:
+//! - Magic number: [u8; 4] = b"G00d"
+//! - Original filename length: u16 (little-endian)
+//! - Original filename: [u8] (UTF-8, no null terminator)
+//! - Chunk hashes: [u8; 32][u8; 32][u8; 32]... (binary hashes, not hex strings)
+
+use std::io::{self, Write};
+use std::path::Path;
+
+use crate::chunker::constants::BUTCK_INDEX_MAGIC;
+use crate::chunker::rw::storage::ChunkInfo;
+
+/// Write a bidx index file
+pub fn write_bidx_file(
+ index_path: &Path,
+ chunk_infos: &[ChunkInfo],
+ original_file_path: &Path,
+) -> io::Result<()> {
+ let file = std::fs::File::create(index_path)?;
+ let mut writer = io::BufWriter::new(file);
+
+ // Magic bytes
+ writer.write_all(&BUTCK_INDEX_MAGIC)?;
+
+ // Get original filename
+ let filename = original_file_path
+ .file_name()
+ .and_then(|n| n.to_str())
+ .unwrap_or("unknown");
+ let filename_bytes = filename.as_bytes();
+
+ // Validate filename length
+ if filename_bytes.len() > u16::MAX as usize {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidInput,
+ format!("Filename too long: {} bytes", filename_bytes.len()),
+ ));
+ }
+
+ // Write filename length as u16
+ let filename_len = filename_bytes.len() as u16;
+ writer.write_all(&filename_len.to_le_bytes())?;
+
+ // Write filename bytes
+ writer.write_all(filename_bytes)?;
+
+ // Write chunk hashes
+ for chunk_info in chunk_infos {
+ // Convert hex hash to 32-byte binary representation
+ let hash_bytes = match hex::decode(&chunk_info.hash) {
+ Ok(bytes) => bytes,
+ Err(e) => {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ format!("Failed to decode hash hex '{}': {}", chunk_info.hash, e),
+ ));
+ }
+ };
+
+ // Ensure hash is exactly 32 bytes
+ if hash_bytes.len() != 32 {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ format!("Hash must be 32 bytes, got {} bytes", hash_bytes.len()),
+ ));
+ }
+
+ // Write hash
+ writer.write_all(&hash_bytes)?;
+ }
+
+ writer.flush()?;
+ Ok(())
+}
+
+/// Read a bidx index file
+pub fn read_bidx_file(index_path: &Path) -> io::Result<(String, Vec<ChunkInfo>)> {
+ use std::io::Read;
+
+ let mut file = std::fs::File::open(index_path)?;
+ let mut buffer = Vec::new();
+ file.read_to_end(&mut buffer)?;
+
+ if buffer.len() < 4 {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ "File too short to contain magic number",
+ ));
+ }
+
+ // Check magic number
+ if &buffer[0..4] != BUTCK_INDEX_MAGIC {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ "Invalid magic number",
+ ));
+ }
+
+ let mut offset = 4;
+
+ // Read filename length
+ if offset + 2 > buffer.len() {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ "File too short to contain filename length",
+ ));
+ }
+ let filename_len = u16::from_le_bytes([buffer[offset], buffer[offset + 1]]) as usize;
+ offset += 2;
+
+ // Read filename
+ if offset + filename_len > buffer.len() {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ "File too short to contain filename",
+ ));
+ }
+ let filename_bytes = &buffer[offset..offset + filename_len];
+ let filename = String::from_utf8(filename_bytes.to_vec()).map_err(|e| {
+ io::Error::new(
+ io::ErrorKind::InvalidData,
+ format!("Filename is not valid UTF-8: {}", e),
+ )
+ })?;
+ offset += filename_len;
+
+ // Read chunk hashes
+ let mut chunk_infos = Vec::new();
+ let hash_size = 32;
+
+ while offset + hash_size <= buffer.len() {
+ // Read hash
+ let hash_bytes = &buffer[offset..offset + hash_size];
+ let hash = hex::encode(hash_bytes);
+ offset += hash_size;
+
+ chunk_infos.push(ChunkInfo {
+ index: chunk_infos.len(),
+ hash,
+ });
+ }
+
+ // Check if we read exactly all data
+ if offset != buffer.len() {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ format!(
+ "File contains {} extra bytes after chunk hashes",
+ buffer.len() - offset
+ ),
+ ));
+ }
+
+ Ok((filename, chunk_infos))
+}