use std::{collections::HashMap, env::current_dir, path::PathBuf, process::exit, str::FromStr}; use log::{error, warn}; use crate::{ chunker::{constants::BUTCK_METADATA_DIR_NAME, rw::storage::hash::ChunkWriteHash}, special_argument, special_flag, storage::{ButckRWError, build, write}, utils::file_input_solve::parse_path_input, }; pub struct Butck; impl Butck { pub fn write(files: Vec, storage: PathBuf) -> ButckContext { ButckContext::default() .with_file_paths(files) .with_storage_path(storage) .with_write_mode() } pub fn build(index_files: Vec, storage: PathBuf) -> ButckContext { ButckContext::default() .with_file_paths(index_files) .with_storage_path(storage) .with_build_mode() } } #[derive(Debug, Default)] enum ButckMethod { #[default] None, Write, Build, } #[derive(Debug, Default)] pub struct ButckContext { method: ButckMethod, /// All input files /// They will be processed by build / write operations /// For build operation, it expects *.bidx file collections /// For write operation, it expects file path collections without directories pub file_paths: Vec, /// Storage repository path /// It specifies the storage location for file chunks /// For build operation, chunks will be read from this directory /// For write operation, chunks will be output to this directory /// /// If set to None, the program cannot execute successfully pub storage_path: Option, /// Display chunk boundaries /// /// If set to true, no chunking or building logic will be executed /// Only boundary information will be output to stdio pub display_boundaries: bool, /// Stream reading /// If set to Some(size) /// Will load data of size each time and process it with streaming strategy pub stream_read: Option, /// Memory map reading /// /// If enabled, will use Memmap to map files to memory for access pub memmap_read: bool, /// Registration name /// When set to Some(name) /// After write operation completes, /// the corresponding bidx file will be registered to the storage directory's registry /// /// In build phase, registration name can be used directly to create files pub register_name: Option, /// Chunking policy /// For command line program, can use `butck lspolicy-all` to query /// It specifies the concrete chunking policy, /// reasonable policy settings can significantly improve file deduplication rate /// /// If set to None, the program cannot execute successfully pub policy_name: Option, /// Chunk hash /// Stores hash representation of chunks /// Currently supports Blake3 and SHA256 pub chunk_hash: ChunkWriteHash, /// Output path /// For build operation, built files will be output here /// For write operation, generated *.bidx files will be output here /// /// Defaults to current runtime directory pub output_dir: PathBuf, /// Output file /// Precisely specifies output file name /// /// If number of input files is greater than 1, program cannot execute successfully pub output_file: Option, /// Parameters /// Provides concrete parameters for the policy pub params: HashMap, } impl ButckContext { pub fn with_build_mode(mut self) -> Self { self.method = ButckMethod::Build; self } pub fn with_write_mode(mut self) -> Self { self.method = ButckMethod::Write; self } pub fn with_storage_path(mut self, path: PathBuf) -> Self { self.storage_path = Some(path); self } pub fn with_display_boundaries(mut self, display: bool) -> Self { self.display_boundaries = display; self } pub fn with_stream_read(mut self, size: Option) -> Self { self.stream_read = size; self } pub fn with_memmap_read(mut self, use_memmap: bool) -> Self { self.memmap_read = use_memmap; self } pub fn with_register_name(mut self, name: Option) -> Self { self.register_name = name; self } pub fn with_policy_name(mut self, name: Option) -> Self { self.policy_name = name; self } pub fn with_chunk_hash(mut self, hash: ChunkWriteHash) -> Self { self.chunk_hash = hash; self } pub fn with_output_dir(mut self, dir: PathBuf) -> Self { self.output_dir = dir; self } pub fn with_output_file(mut self, file: Option) -> Self { self.output_file = file; self } pub fn param(mut self, key: String, value: String) -> Self { self.params.insert(key, value); self } pub fn with_file_paths(mut self, paths: Vec) -> Self { self.file_paths = paths; self } pub fn add_file(mut self, path: PathBuf) -> Self { self.file_paths.push(path); self } pub async fn exec(self) -> Result<(), ButckRWError> { match self.method { ButckMethod::None => Ok(()), ButckMethod::Write => { write(self).await?; Ok(()) } ButckMethod::Build => { build(self).await?; Ok(()) } } } } impl ButckContext { /// Apply the args of ChunkerContext to itself pub fn from_args(mut args: Vec) -> Self { let mut ctx = ButckContext::default(); let recursive = ctx.read_recursive(&mut args); ctx.apply_stream_read(&mut args); ctx.apply_memmap_read(&mut args); ctx.apply_register_name(&mut args); ctx.apply_policy_name(&mut args); ctx.apply_chunk_hash(&mut args); ctx.apply_storage_dir(&mut args); ctx.apply_output_paths(&mut args); ctx.apply_params(&mut args); ctx.apply_display_boundaries(&mut args); // Finally, parse path input args.retain(|arg| !arg.starts_with("--") && !arg.starts_with('-')); ctx.file_paths = parse_path_input(args, recursive, vec![BUTCK_METADATA_DIR_NAME]); ctx } fn read_recursive(&mut self, args: &mut Vec) -> bool { special_flag!(args, "-r", "--recursive") } fn apply_stream_read(&mut self, args: &mut Vec) { if let Some(size_str) = special_argument!(args, "-S", "--stream") && let Ok(size) = size_str.parse::() { self.stream_read = Some(size); } } fn apply_memmap_read(&mut self, args: &mut Vec) -> bool { special_flag!(args, "-m", "--memmap-read") } fn apply_register_name(&mut self, args: &mut Vec) { self.register_name = special_argument!(args, "-R", "--register"); } fn apply_policy_name(&mut self, args: &mut Vec) { self.policy_name = special_argument!(args, "-p", "--policy"); } fn apply_chunk_hash(&mut self, args: &mut Vec) { let chunk_hash_str = special_argument!(args, "-H", "--chunk-hash"); self.chunk_hash = match chunk_hash_str { Some(ref s) => match s.as_str() { "blake3" => ChunkWriteHash::Blake3, "sha256" => ChunkWriteHash::Sha256, _ => ChunkWriteHash::default(), }, None => ChunkWriteHash::default(), }; } fn apply_output_paths(&mut self, args: &mut Vec) { let output_dir_str = special_argument!(args, "-o", "--output-dir"); let output_file_str = special_argument!(args, "-O", "--output-file"); let current_dir = current_dir().unwrap(); let output_dir = if let Some(output_dir_str) = output_dir_str { let path = PathBuf::from(output_dir_str); if path.exists() { Some(path) } else { None } } else { None }; self.output_dir = if let Some(output_dir) = output_dir { output_dir } else if let Some(storage_path) = &self.storage_path { storage_path.clone() } else { current_dir }; self.output_file = output_file_str.map(PathBuf::from) } fn apply_params(&mut self, args: &mut Vec) { while let Some(arg) = special_argument!(args, "+p", "+param") { let split = arg.split('=').collect::>(); if split.len() == 2 { self.params .insert(split[0].to_string(), split[1].to_string()); } } } fn apply_storage_dir(&mut self, args: &mut Vec) { self.storage_path = { let storage_override = match special_argument!(args, "-s", "--storage") { Some(o) => { let path = PathBuf::from_str(o.as_str()); if let Ok(p) = &path { Self::init_butck_storage(p.clone()); } path.ok() } None => None, }; Self::find_butck_storage_dir(storage_override) }; } fn apply_display_boundaries(&mut self, args: &mut Vec) { self.display_boundaries = special_flag!(args, "-D", "--display-boundaries"); } fn init_butck_storage(path: PathBuf) -> Option { if !path.exists() { // If the path does not exist, create it and initialize Butck Storage here if let Err(e) = std::fs::create_dir_all(&path) { error!("Failed to create directory '{}': {}", path.display(), e); exit(1); } let butck_dir = path.join(BUTCK_METADATA_DIR_NAME); if let Err(e) = std::fs::create_dir_all(&butck_dir) { error!( "Failed to create '{}' directory: {}", BUTCK_METADATA_DIR_NAME, e ); exit(1); } Some(path) } else { let butck_dir = path.join(BUTCK_METADATA_DIR_NAME); // Check if Butck Storage already exists if butck_dir.exists() { // Butck Storage already exists, return the path Some(path) } else { // Butck Storage doesn't exist, create it with a warning if directory is not empty let is_empty = path .read_dir() .map(|mut entries| entries.next().is_none()) .unwrap_or(false); if !is_empty { // Warn about creating storage in non-empty directory warn!( "Creating '{}' storage in non-empty directory: {}", BUTCK_METADATA_DIR_NAME, path.display() ); } // Create Butck Storage directory if let Err(e) = std::fs::create_dir_all(&butck_dir) { error!( "Failed to create '{}' directory: {}", BUTCK_METADATA_DIR_NAME, e ); exit(1); } Some(path) } } } // Get the ButckStorage directory based on context fn find_butck_storage_dir(from: Option) -> Option { let mut current_dir = match from { Some(path) => path, None => std::env::current_dir().ok()?, }; loop { let butck_dir = current_dir.join(BUTCK_METADATA_DIR_NAME); if butck_dir.is_dir() { return Some(current_dir); } if !current_dir.pop() { break; } } None } }