Add callback support to chunk_stream_with and implement stream writing

author: 魏曹先生 <1992414357@qq.com> 2026-03-07 19:37:52 +0800
committer: 魏曹先生 <1992414357@qq.com> 2026-03-07 19:37:52 +0800
commit: 9e7c0fd45e169929156bdb317b10d7bb3db65f8b (patch)
tree: 94c1e0e6cafe996b7b7da8dfd6e1ff1a04539cda /src/chunker/rw/storage/bidx.rs
parent: 22926ce29e3f8e040ec349401aeb6a77f32eae72 (diff)
1 files changed, 157 insertions, 0 deletions
diff --git a/src/chunker/rw/storage/bidx.rs b/src/chunker/rw/storage/bidx.rs
new file mode 100644
index 0000000..783ded6
--- /dev/null
+++ b/src/chunker/rw/storage/bidx.rs
@@ -0,0 +1,157 @@
+//! Bidx (Butchunker Index) file format utilities
+//!
+//! The bidx file format:
+//! - Magic number: [u8; 4] = b"G00d"
+//! - Original filename length: u16 (little-endian)
+//! - Original filename: [u8] (UTF-8, no null terminator)
+//! - Chunk hashes: [u8; 32][u8; 32][u8; 32]... (binary hashes, not hex strings)
+
+use std::io::{self, Write};
+use std::path::Path;
+
+use crate::chunker::constants::BUTCK_INDEX_MAGIC;
+use crate::chunker::rw::storage::ChunkInfo;
+
+/// Write a bidx index file
+pub fn write_bidx_file(
+    index_path: &Path,
+    chunk_infos: &[ChunkInfo],
+    original_file_path: &Path,
+) -> io::Result<()> {
+    let file = std::fs::File::create(index_path)?;
+    let mut writer = io::BufWriter::new(file);
+
+    // Magic bytes
+    writer.write_all(&BUTCK_INDEX_MAGIC)?;
+
+    // Get original filename
+    let filename = original_file_path
+        .file_name()
+        .and_then(|n| n.to_str())
+        .unwrap_or("unknown");
+    let filename_bytes = filename.as_bytes();
+
+    // Validate filename length
+    if filename_bytes.len() > u16::MAX as usize {
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidInput,
+            format!("Filename too long: {} bytes", filename_bytes.len()),
+        ));
+    }
+
+    // Write filename length as u16
+    let filename_len = filename_bytes.len() as u16;
+    writer.write_all(&filename_len.to_le_bytes())?;
+
+    // Write filename bytes
+    writer.write_all(filename_bytes)?;
+
+    // Write chunk hashes
+    for chunk_info in chunk_infos {
+        // Convert hex hash to 32-byte binary representation
+        let hash_bytes = match hex::decode(&chunk_info.hash) {
+            Ok(bytes) => bytes,
+            Err(e) => {
+                return Err(io::Error::new(
+                    io::ErrorKind::InvalidData,
+                    format!("Failed to decode hash hex '{}': {}", chunk_info.hash, e),
+                ));
+            }
+        };
+
+        // Ensure hash is exactly 32 bytes
+        if hash_bytes.len() != 32 {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidData,
+                format!("Hash must be 32 bytes, got {} bytes", hash_bytes.len()),
+            ));
+        }
+
+        // Write hash
+        writer.write_all(&hash_bytes)?;
+    }
+
+    writer.flush()?;
+    Ok(())
+}
+
+/// Read a bidx index file
+pub fn read_bidx_file(index_path: &Path) -> io::Result<(String, Vec<ChunkInfo>)> {
+    use std::io::Read;
+
+    let mut file = std::fs::File::open(index_path)?;
+    let mut buffer = Vec::new();
+    file.read_to_end(&mut buffer)?;
+
+    if buffer.len() < 4 {
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidData,
+            "File too short to contain magic number",
+        ));
+    }
+
+    // Check magic number
+    if &buffer[0..4] != BUTCK_INDEX_MAGIC {
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidData,
+            "Invalid magic number",
+        ));
+    }
+
+    let mut offset = 4;
+
+    // Read filename length
+    if offset + 2 > buffer.len() {
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidData,
+            "File too short to contain filename length",
+        ));
+    }
+    let filename_len = u16::from_le_bytes([buffer[offset], buffer[offset + 1]]) as usize;
+    offset += 2;
+
+    // Read filename
+    if offset + filename_len > buffer.len() {
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidData,
+            "File too short to contain filename",
+        ));
+    }
+    let filename_bytes = &buffer[offset..offset + filename_len];
+    let filename = String::from_utf8(filename_bytes.to_vec()).map_err(|e| {
+        io::Error::new(
+            io::ErrorKind::InvalidData,
+            format!("Filename is not valid UTF-8: {}", e),
+        )
+    })?;
+    offset += filename_len;
+
+    // Read chunk hashes
+    let mut chunk_infos = Vec::new();
+    let hash_size = 32;
+
+    while offset + hash_size <= buffer.len() {
+        // Read hash
+        let hash_bytes = &buffer[offset..offset + hash_size];
+        let hash = hex::encode(hash_bytes);
+        offset += hash_size;
+
+        chunk_infos.push(ChunkInfo {
+            index: chunk_infos.len(),
+            hash,
+        });
+    }
+
+    // Check if we read exactly all data
+    if offset != buffer.len() {
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidData,
+            format!(
+                "File contains {} extra bytes after chunk hashes",
+                buffer.len() - offset
+            ),
+        ));
+    }
+
+    Ok((filename, chunk_infos))
+}
author	魏曹先生 <1992414357@qq.com>	2026-03-07 19:37:52 +0800
committer	魏曹先生 <1992414357@qq.com>	2026-03-07 19:37:52 +0800
commit	9e7c0fd45e169929156bdb317b10d7bb3db65f8b (patch)
tree	94c1e0e6cafe996b7b7da8dfd6e1ff1a04539cda /src/chunker/rw/storage/bidx.rs
parent	22926ce29e3f8e040ec349401aeb6a77f32eae72 (diff)