From 22926ce29e3f8e040ec349401aeb6a77f32eae72 Mon Sep 17 00:00:00 2001 From: 魏曹先生 <1992414357@qq.com> Date: Wed, 4 Mar 2026 21:26:04 +0800 Subject: Initialize Butchunker project structure and policy system --- src/chunker/context.rs | 226 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 src/chunker/context.rs (limited to 'src/chunker/context.rs') diff --git a/src/chunker/context.rs b/src/chunker/context.rs new file mode 100644 index 0000000..79254f5 --- /dev/null +++ b/src/chunker/context.rs @@ -0,0 +1,226 @@ +use std::{collections::HashMap, env::current_dir, path::PathBuf, process::exit, str::FromStr}; + +use log::{error, warn}; + +use crate::{ + chunker::constants::BUTCK_STORAGE_DIR_NAME, core::hash::ChunkWriteHash, special_argument, + special_flag, utils::file_input_solve::parse_path_input, +}; + +#[derive(Debug, Default)] +pub struct ButckContext { + /// All input files + pub file_paths: Vec, + + /// Path of Butck Storage + pub storage_path: Option, + + // Display chunk boundaries + pub display_boundaries: bool, + + /// Whether to read in stream mode + pub stream_read: Option, + + /// Whether to read files using memory mapping + pub memmap_read: bool, + + /// Register name + pub register_name: Option, + + /// Chunking policy name + pub policy_name: Option, + + /// Hash algorithm used for chunking + pub chunk_hash: ChunkWriteHash, + + /// Output directory + pub output_dir: PathBuf, + + /// Output file (not available for some commands) + pub output_file: Option, + + /// Override parameters + pub params: HashMap, +} + +impl ButckContext { + /// Apply the args of ChunkerContext to itself + pub fn from_args(mut args: Vec) -> Self { + let mut ctx = ButckContext::default(); + let recursive = ctx.read_recursive(&mut args); + ctx.apply_stream_read(&mut args); + ctx.apply_memmap_read(&mut args); + ctx.apply_register_name(&mut args); + ctx.apply_policy_name(&mut args); + ctx.apply_chunk_hash(&mut args); + ctx.apply_storage_dir(&mut args); + ctx.apply_output_paths(&mut args); + ctx.apply_params(&mut args); + ctx.apply_display_boundaries(&mut args); + + // Finally, parse path input + ctx.file_paths = parse_path_input(args, recursive, vec![BUTCK_STORAGE_DIR_NAME]); + ctx + } + + fn read_recursive(&mut self, args: &mut Vec) -> bool { + special_flag!(args, "-r", "--recursive") + } + + fn apply_stream_read(&mut self, args: &mut Vec) { + if let Some(size_str) = special_argument!(args, "-S", "--stream-read") + && let Ok(size) = size_str.parse::() { + self.stream_read = Some(size); + } + } + + fn apply_memmap_read(&mut self, args: &mut Vec) -> bool { + special_flag!(args, "-m", "--memmap-read") + } + + fn apply_register_name(&mut self, args: &mut Vec) { + self.register_name = special_argument!(args, "-R", "--register"); + } + + fn apply_policy_name(&mut self, args: &mut Vec) { + self.policy_name = special_argument!(args, "-p", "--policy"); + } + + fn apply_chunk_hash(&mut self, args: &mut Vec) { + let chunk_hash_str = special_argument!(args, "-H", "--chunk-hash"); + self.chunk_hash = match chunk_hash_str { + Some(ref s) => match s.as_str() { + "blake3" => ChunkWriteHash::Blake3, + "sha256" => ChunkWriteHash::Sha256, + _ => ChunkWriteHash::default(), + }, + None => ChunkWriteHash::default(), + }; + } + + fn apply_output_paths(&mut self, args: &mut Vec) { + let output_dir_str = special_argument!(args, "-o", "--output-dir"); + let output_file_str = special_argument!(args, "-O", "--output-file"); + + let current_dir = current_dir().unwrap(); + + let output_dir = if let Some(output_dir_str) = output_dir_str { + let path = PathBuf::from(output_dir_str); + if path.exists() { Some(path) } else { None } + } else { + None + }; + + self.output_dir = if let Some(output_dir) = output_dir { + output_dir + } else if let Some(storage_path) = &self.storage_path { + storage_path.clone() + } else { + current_dir + }; + + self.output_file = output_file_str.map(PathBuf::from) + } + + fn apply_params(&mut self, args: &mut Vec) { + while let Some(arg) = special_argument!(args, "+p", "+param") { + let split = arg.split('=').collect::>(); + if split.len() == 2 { + self.params + .insert(split[0].to_string(), split[1].to_string()); + } + } + } + + fn apply_storage_dir(&mut self, args: &mut Vec) { + self.storage_path = { + let storage_override = match special_argument!(args, "-s", "--storage") { + Some(o) => { + let path = PathBuf::from_str(o.as_str()); + if let Ok(p) = &path { + Self::init_butck_storage(p.clone()); + } + path.ok() + } + None => None, + }; + Self::find_butck_storage_dir(storage_override) + }; + } + + fn apply_display_boundaries(&mut self, args: &mut Vec) { + self.display_boundaries = special_flag!(args, "-D", "--display-boundaries"); + } + + fn init_butck_storage(path: PathBuf) -> Option { + if !path.exists() { + // If the path does not exist, create it and initialize Butck Storage here + if let Err(e) = std::fs::create_dir_all(&path) { + error!("Failed to create directory '{}': {}", path.display(), e); + exit(1); + } + let butck_dir = path.join(BUTCK_STORAGE_DIR_NAME); + if let Err(e) = std::fs::create_dir_all(&butck_dir) { + error!( + "Failed to create '{}' directory: {}", + BUTCK_STORAGE_DIR_NAME, e + ); + exit(1); + } + Some(path) + } else { + let butck_dir = path.join(BUTCK_STORAGE_DIR_NAME); + + // Check if Butck Storage already exists + if butck_dir.exists() { + // Butck Storage already exists, return the path + Some(path) + } else { + // Butck Storage doesn't exist, create it with a warning if directory is not empty + let is_empty = path + .read_dir() + .map(|mut entries| entries.next().is_none()) + .unwrap_or(false); + + if !is_empty { + // Warn about creating storage in non-empty directory + warn!( + "Creating '{}' storage in non-empty directory: {}", + BUTCK_STORAGE_DIR_NAME, + path.display() + ); + } + + // Create Butck Storage directory + if let Err(e) = std::fs::create_dir_all(&butck_dir) { + error!( + "Failed to create '{}' directory: {}", + BUTCK_STORAGE_DIR_NAME, e + ); + exit(1); + } + Some(path) + } + } + } + + // Get the ButckStorage directory based on context + fn find_butck_storage_dir(from: Option) -> Option { + let mut current_dir = match from { + Some(path) => path, + None => std::env::current_dir().ok()?, + }; + + loop { + let butck_dir = current_dir.join(BUTCK_STORAGE_DIR_NAME); + if butck_dir.is_dir() { + return Some(current_dir); + } + + if !current_dir.pop() { + break; + } + } + None + } +} -- cgit