summaryrefslogtreecommitdiff
path: root/src/chunker/rw/storage/build.rs
diff options
context:
space:
mode:
author魏曹先生 <1992414357@qq.com>2026-03-07 19:37:52 +0800
committer魏曹先生 <1992414357@qq.com>2026-03-07 19:37:52 +0800
commit9e7c0fd45e169929156bdb317b10d7bb3db65f8b (patch)
tree94c1e0e6cafe996b7b7da8dfd6e1ff1a04539cda /src/chunker/rw/storage/build.rs
parent22926ce29e3f8e040ec349401aeb6a77f32eae72 (diff)
Add callback support to chunk_stream_with and implement stream writing
Diffstat (limited to 'src/chunker/rw/storage/build.rs')
-rw-r--r--src/chunker/rw/storage/build.rs82
1 files changed, 32 insertions, 50 deletions
diff --git a/src/chunker/rw/storage/build.rs b/src/chunker/rw/storage/build.rs
index 7608b5c..51b5bf5 100644
--- a/src/chunker/rw/storage/build.rs
+++ b/src/chunker/rw/storage/build.rs
@@ -1,13 +1,12 @@
use futures::future::join_all;
use just_progress::progress;
use log::{error, info, trace};
-use memmap2::Mmap;
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
use tokio::{fs::File, io::AsyncWriteExt};
use crate::{
chunker::{
- constants::{BUTCK_INDEX_FILE_SUFFIX, BUTCK_INDEX_MAGIC},
+ constants::BUTCK_INDEX_FILE_SUFFIX,
context::ButckContext,
rw::error::{ButckRWError, ButckRWErrorKind},
rw::storage,
@@ -52,64 +51,47 @@ async fn rebuild_from_bidx(
bidx_path: &PathBuf,
ctx: &ButckContext,
) -> Result<(), ButckRWErrorKind> {
- // Validate file extension
- if let Some(ext) = bidx_path.extension()
- && ext != BUTCK_INDEX_FILE_SUFFIX
+ // Validate file suffix
+ if let Some(suffix) = bidx_path.extension()
+ && suffix != BUTCK_INDEX_FILE_SUFFIX
{
return Err(ButckRWErrorKind::InvalidBidxFormat);
}
info!("Rebuilding from bidx file: {}", bidx_path.display());
- // Read bidx file content
- let bidx_content = if ctx.memmap_read {
- let file = File::open(bidx_path).await?;
- let mmap = unsafe { Mmap::map(&file)? };
- mmap.to_vec()
- } else {
- tokio::fs::read(bidx_path).await?
- };
-
- // Verify file size includes at least the header
- if bidx_content.len() < 6 {
- return Err(ButckRWErrorKind::InvalidBidxFormat);
- }
-
- // Validate MAGIC bytes
- if bidx_content[0..4] != BUTCK_INDEX_MAGIC {
- return Err(ButckRWErrorKind::InvalidBidxFormat);
- }
-
- // Read filename
- let filename_len = u16::from_le_bytes([bidx_content[4], bidx_content[5]]) as usize;
- if bidx_content.len() < 6 + filename_len {
- return Err(ButckRWErrorKind::InvalidBidxFormat);
- }
- let filename_bytes = &bidx_content[6..6 + filename_len];
- let original_filename = String::from_utf8(filename_bytes.to_vec())
- .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?;
+ // Use the unified bidx file reader
+ let (original_filename, chunk_infos) =
+ crate::chunker::rw::storage::bidx::read_bidx_file(bidx_path).map_err(|e| {
+ error!("Failed to read bidx file: {}", e);
+ ButckRWErrorKind::InvalidBidxFormat
+ })?;
trace!("Original filename from bidx: {}", original_filename);
- let hash_data_start = 6 + filename_len;
- let hash_data = &bidx_content[hash_data_start..];
-
- // Verify that hash data size is a multiple of 32 bytes
- if hash_data.len() % 32 != 0 {
- return Err(ButckRWErrorKind::InvalidBidxFormat);
- }
-
- let chunk_count = hash_data.len() / 32;
+ let chunk_count = chunk_infos.len();
info!("Found {} chunks in bidx file", chunk_count);
+ // Extract hash bytes from chunk infos
let mut chunk_hashes = Vec::with_capacity(chunk_count);
- for i in 0..chunk_count {
- let start = i * 32;
- let end = start + 32;
- let hash_bytes: [u8; 32] = hash_data[start..end]
- .try_into()
- .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?;
- chunk_hashes.push(hash_bytes);
+ for chunk_info in &chunk_infos {
+ match hex::decode(&chunk_info.hash) {
+ Ok(hash_bytes) => {
+ if hash_bytes.len() == 32 {
+ let hash_array: [u8; 32] = hash_bytes
+ .try_into()
+ .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?;
+ chunk_hashes.push(hash_array);
+ } else {
+ error!("Invalid hash length: {} bytes", hash_bytes.len());
+ return Err(ButckRWErrorKind::InvalidBidxFormat);
+ }
+ }
+ Err(e) => {
+ error!("Failed to decode hash hex: {}", e);
+ return Err(ButckRWErrorKind::InvalidBidxFormat);
+ }
+ }
}
trace!("Parsed {} chunk hashes", chunk_hashes.len());
@@ -210,7 +192,7 @@ async fn read_chunk(
progress_name: &str,
step: f64,
hash_hex: String,
- storage_dir: &PathBuf,
+ storage_dir: &Path,
chunk_index: usize,
) -> Result<Vec<u8>, ButckRWErrorKind> {
trace!("read_chunk[{}]: Starting, hash: {}", chunk_index, hash_hex);