diff options
Diffstat (limited to 'src/chunker/rw/storage/build.rs')
| -rw-r--r-- | src/chunker/rw/storage/build.rs | 82 |
1 files changed, 32 insertions, 50 deletions
diff --git a/src/chunker/rw/storage/build.rs b/src/chunker/rw/storage/build.rs index 7608b5c..51b5bf5 100644 --- a/src/chunker/rw/storage/build.rs +++ b/src/chunker/rw/storage/build.rs @@ -1,13 +1,12 @@ use futures::future::join_all; use just_progress::progress; use log::{error, info, trace}; -use memmap2::Mmap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use tokio::{fs::File, io::AsyncWriteExt}; use crate::{ chunker::{ - constants::{BUTCK_INDEX_FILE_SUFFIX, BUTCK_INDEX_MAGIC}, + constants::BUTCK_INDEX_FILE_SUFFIX, context::ButckContext, rw::error::{ButckRWError, ButckRWErrorKind}, rw::storage, @@ -52,64 +51,47 @@ async fn rebuild_from_bidx( bidx_path: &PathBuf, ctx: &ButckContext, ) -> Result<(), ButckRWErrorKind> { - // Validate file extension - if let Some(ext) = bidx_path.extension() - && ext != BUTCK_INDEX_FILE_SUFFIX + // Validate file suffix + if let Some(suffix) = bidx_path.extension() + && suffix != BUTCK_INDEX_FILE_SUFFIX { return Err(ButckRWErrorKind::InvalidBidxFormat); } info!("Rebuilding from bidx file: {}", bidx_path.display()); - // Read bidx file content - let bidx_content = if ctx.memmap_read { - let file = File::open(bidx_path).await?; - let mmap = unsafe { Mmap::map(&file)? }; - mmap.to_vec() - } else { - tokio::fs::read(bidx_path).await? - }; - - // Verify file size includes at least the header - if bidx_content.len() < 6 { - return Err(ButckRWErrorKind::InvalidBidxFormat); - } - - // Validate MAGIC bytes - if bidx_content[0..4] != BUTCK_INDEX_MAGIC { - return Err(ButckRWErrorKind::InvalidBidxFormat); - } - - // Read filename - let filename_len = u16::from_le_bytes([bidx_content[4], bidx_content[5]]) as usize; - if bidx_content.len() < 6 + filename_len { - return Err(ButckRWErrorKind::InvalidBidxFormat); - } - let filename_bytes = &bidx_content[6..6 + filename_len]; - let original_filename = String::from_utf8(filename_bytes.to_vec()) - .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?; + // Use the unified bidx file reader + let (original_filename, chunk_infos) = + crate::chunker::rw::storage::bidx::read_bidx_file(bidx_path).map_err(|e| { + error!("Failed to read bidx file: {}", e); + ButckRWErrorKind::InvalidBidxFormat + })?; trace!("Original filename from bidx: {}", original_filename); - let hash_data_start = 6 + filename_len; - let hash_data = &bidx_content[hash_data_start..]; - - // Verify that hash data size is a multiple of 32 bytes - if hash_data.len() % 32 != 0 { - return Err(ButckRWErrorKind::InvalidBidxFormat); - } - - let chunk_count = hash_data.len() / 32; + let chunk_count = chunk_infos.len(); info!("Found {} chunks in bidx file", chunk_count); + // Extract hash bytes from chunk infos let mut chunk_hashes = Vec::with_capacity(chunk_count); - for i in 0..chunk_count { - let start = i * 32; - let end = start + 32; - let hash_bytes: [u8; 32] = hash_data[start..end] - .try_into() - .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?; - chunk_hashes.push(hash_bytes); + for chunk_info in &chunk_infos { + match hex::decode(&chunk_info.hash) { + Ok(hash_bytes) => { + if hash_bytes.len() == 32 { + let hash_array: [u8; 32] = hash_bytes + .try_into() + .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?; + chunk_hashes.push(hash_array); + } else { + error!("Invalid hash length: {} bytes", hash_bytes.len()); + return Err(ButckRWErrorKind::InvalidBidxFormat); + } + } + Err(e) => { + error!("Failed to decode hash hex: {}", e); + return Err(ButckRWErrorKind::InvalidBidxFormat); + } + } } trace!("Parsed {} chunk hashes", chunk_hashes.len()); @@ -210,7 +192,7 @@ async fn read_chunk( progress_name: &str, step: f64, hash_hex: String, - storage_dir: &PathBuf, + storage_dir: &Path, chunk_index: usize, ) -> Result<Vec<u8>, ButckRWErrorKind> { trace!("read_chunk[{}]: Starting, hash: {}", chunk_index, hash_hex); |
