diff options
| author | Weicao-CatilGrass <1992414357@qq.com> | 2026-03-09 15:55:29 +0800 |
|---|---|---|
| committer | Weicao-CatilGrass <1992414357@qq.com> | 2026-03-09 15:55:29 +0800 |
| commit | 5bd8a0b8e98e0dc1c8ba693c6e1530e75c62b81f (patch) | |
| tree | df0a501e18b60a671d9e35127207ee61bbbeca85 | |
| parent | 2cddfb098f6cf54a54e2812e1070bf238bb4d20e (diff) | |
Add Butck struct and builder methods to ButckContext
| -rw-r--r-- | src/chunker/context.rs | 169 |
1 files changed, 159 insertions, 10 deletions
diff --git a/src/chunker/context.rs b/src/chunker/context.rs index d918937..45283bc 100644 --- a/src/chunker/context.rs +++ b/src/chunker/context.rs @@ -5,46 +5,195 @@ use log::{error, warn}; use crate::{ chunker::{constants::BUTCK_METADATA_DIR_NAME, rw::storage::hash::ChunkWriteHash}, special_argument, special_flag, + storage::{ButckRWError, build, write}, utils::file_input_solve::parse_path_input, }; +pub struct Butck; +impl Butck { + pub fn write(files: Vec<PathBuf>, storage: PathBuf) -> ButckContext { + ButckContext::default() + .with_file_paths(files) + .with_storage_path(storage) + .with_write_mode() + } + + pub fn build(index_files: Vec<PathBuf>, storage: PathBuf) -> ButckContext { + ButckContext::default() + .with_file_paths(index_files) + .with_storage_path(storage) + .with_build_mode() + } +} + +#[derive(Debug, Default)] +enum ButckMethod { + #[default] + None, + Write, + Build, +} + #[derive(Debug, Default)] pub struct ButckContext { + method: ButckMethod, + /// All input files + /// They will be processed by build / write operations + /// For build operation, it expects *.bidx file collections + /// For write operation, it expects file path collections without directories pub file_paths: Vec<PathBuf>, - /// Path of Butck Storage + /// Storage repository path + /// It specifies the storage location for file chunks + /// For build operation, chunks will be read from this directory + /// For write operation, chunks will be output to this directory + /// + /// If set to None, the program cannot execute successfully pub storage_path: Option<PathBuf>, - // Display chunk boundaries + /// Display chunk boundaries + /// + /// If set to true, no chunking or building logic will be executed + /// Only boundary information will be output to stdio pub display_boundaries: bool, - /// Whether to read in stream mode + /// Stream reading + /// If set to Some(size) + /// Will load data of size each time and process it with streaming strategy pub stream_read: Option<u32>, - /// Whether to read files using memory mapping + /// Memory map reading + /// + /// If enabled, will use Memmap to map files to memory for access pub memmap_read: bool, - /// Register name + /// Registration name + /// When set to Some(name) + /// After write operation completes, + /// the corresponding bidx file will be registered to the storage directory's registry + /// + /// In build phase, registration name can be used directly to create files pub register_name: Option<String>, - /// Chunking policy name + /// Chunking policy + /// For command line program, can use `butck lspolicy-all` to query + /// It specifies the concrete chunking policy, + /// reasonable policy settings can significantly improve file deduplication rate + /// + /// If set to None, the program cannot execute successfully pub policy_name: Option<String>, - /// Hash algorithm used for chunking + /// Chunk hash + /// Stores hash representation of chunks + /// Currently supports Blake3 and SHA256 pub chunk_hash: ChunkWriteHash, - /// Output directory + /// Output path + /// For build operation, built files will be output here + /// For write operation, generated *.bidx files will be output here + /// + /// Defaults to current runtime directory pub output_dir: PathBuf, - /// Output file (not available for some commands) + /// Output file + /// Precisely specifies output file name + /// + /// If number of input files is greater than 1, program cannot execute successfully pub output_file: Option<PathBuf>, - /// Override parameters + /// Parameters + /// Provides concrete parameters for the policy pub params: HashMap<String, String>, } impl ButckContext { + pub fn with_build_mode(mut self) -> Self { + self.method = ButckMethod::Build; + self + } + + pub fn with_write_mode(mut self) -> Self { + self.method = ButckMethod::Write; + self + } + + pub fn with_storage_path(mut self, path: PathBuf) -> Self { + self.storage_path = Some(path); + self + } + + pub fn with_display_boundaries(mut self, display: bool) -> Self { + self.display_boundaries = display; + self + } + + pub fn with_stream_read(mut self, size: Option<u32>) -> Self { + self.stream_read = size; + self + } + + pub fn with_memmap_read(mut self, use_memmap: bool) -> Self { + self.memmap_read = use_memmap; + self + } + + pub fn with_register_name(mut self, name: Option<String>) -> Self { + self.register_name = name; + self + } + + pub fn with_policy_name(mut self, name: Option<String>) -> Self { + self.policy_name = name; + self + } + + pub fn with_chunk_hash(mut self, hash: ChunkWriteHash) -> Self { + self.chunk_hash = hash; + self + } + + pub fn with_output_dir(mut self, dir: PathBuf) -> Self { + self.output_dir = dir; + self + } + + pub fn with_output_file(mut self, file: Option<PathBuf>) -> Self { + self.output_file = file; + self + } + + pub fn param(mut self, key: String, value: String) -> Self { + self.params.insert(key, value); + self + } + + pub fn with_file_paths(mut self, paths: Vec<PathBuf>) -> Self { + self.file_paths = paths; + self + } + + pub fn add_file(mut self, path: PathBuf) -> Self { + self.file_paths.push(path); + self + } + + pub async fn exec(self) -> Result<(), ButckRWError> { + match self.method { + ButckMethod::None => Ok(()), + ButckMethod::Write => { + write(self).await?; + Ok(()) + } + ButckMethod::Build => { + build(self).await?; + Ok(()) + } + } + } +} + +impl ButckContext { /// Apply the args of ChunkerContext to itself pub fn from_args(mut args: Vec<String>) -> Self { let mut ctx = ButckContext::default(); |
