summaryrefslogtreecommitdiff
path: root/src/chunker/context.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/chunker/context.rs')
-rw-r--r--src/chunker/context.rs169
1 files changed, 159 insertions, 10 deletions
diff --git a/src/chunker/context.rs b/src/chunker/context.rs
index d918937..45283bc 100644
--- a/src/chunker/context.rs
+++ b/src/chunker/context.rs
@@ -5,46 +5,195 @@ use log::{error, warn};
use crate::{
chunker::{constants::BUTCK_METADATA_DIR_NAME, rw::storage::hash::ChunkWriteHash},
special_argument, special_flag,
+ storage::{ButckRWError, build, write},
utils::file_input_solve::parse_path_input,
};
+pub struct Butck;
+impl Butck {
+ pub fn write(files: Vec<PathBuf>, storage: PathBuf) -> ButckContext {
+ ButckContext::default()
+ .with_file_paths(files)
+ .with_storage_path(storage)
+ .with_write_mode()
+ }
+
+ pub fn build(index_files: Vec<PathBuf>, storage: PathBuf) -> ButckContext {
+ ButckContext::default()
+ .with_file_paths(index_files)
+ .with_storage_path(storage)
+ .with_build_mode()
+ }
+}
+
+#[derive(Debug, Default)]
+enum ButckMethod {
+ #[default]
+ None,
+ Write,
+ Build,
+}
+
#[derive(Debug, Default)]
pub struct ButckContext {
+ method: ButckMethod,
+
/// All input files
+ /// They will be processed by build / write operations
+ /// For build operation, it expects *.bidx file collections
+ /// For write operation, it expects file path collections without directories
pub file_paths: Vec<PathBuf>,
- /// Path of Butck Storage
+ /// Storage repository path
+ /// It specifies the storage location for file chunks
+ /// For build operation, chunks will be read from this directory
+ /// For write operation, chunks will be output to this directory
+ ///
+ /// If set to None, the program cannot execute successfully
pub storage_path: Option<PathBuf>,
- // Display chunk boundaries
+ /// Display chunk boundaries
+ ///
+ /// If set to true, no chunking or building logic will be executed
+ /// Only boundary information will be output to stdio
pub display_boundaries: bool,
- /// Whether to read in stream mode
+ /// Stream reading
+ /// If set to Some(size)
+ /// Will load data of size each time and process it with streaming strategy
pub stream_read: Option<u32>,
- /// Whether to read files using memory mapping
+ /// Memory map reading
+ ///
+ /// If enabled, will use Memmap to map files to memory for access
pub memmap_read: bool,
- /// Register name
+ /// Registration name
+ /// When set to Some(name)
+ /// After write operation completes,
+ /// the corresponding bidx file will be registered to the storage directory's registry
+ ///
+ /// In build phase, registration name can be used directly to create files
pub register_name: Option<String>,
- /// Chunking policy name
+ /// Chunking policy
+ /// For command line program, can use `butck lspolicy-all` to query
+ /// It specifies the concrete chunking policy,
+ /// reasonable policy settings can significantly improve file deduplication rate
+ ///
+ /// If set to None, the program cannot execute successfully
pub policy_name: Option<String>,
- /// Hash algorithm used for chunking
+ /// Chunk hash
+ /// Stores hash representation of chunks
+ /// Currently supports Blake3 and SHA256
pub chunk_hash: ChunkWriteHash,
- /// Output directory
+ /// Output path
+ /// For build operation, built files will be output here
+ /// For write operation, generated *.bidx files will be output here
+ ///
+ /// Defaults to current runtime directory
pub output_dir: PathBuf,
- /// Output file (not available for some commands)
+ /// Output file
+ /// Precisely specifies output file name
+ ///
+ /// If number of input files is greater than 1, program cannot execute successfully
pub output_file: Option<PathBuf>,
- /// Override parameters
+ /// Parameters
+ /// Provides concrete parameters for the policy
pub params: HashMap<String, String>,
}
impl ButckContext {
+ pub fn with_build_mode(mut self) -> Self {
+ self.method = ButckMethod::Build;
+ self
+ }
+
+ pub fn with_write_mode(mut self) -> Self {
+ self.method = ButckMethod::Write;
+ self
+ }
+
+ pub fn with_storage_path(mut self, path: PathBuf) -> Self {
+ self.storage_path = Some(path);
+ self
+ }
+
+ pub fn with_display_boundaries(mut self, display: bool) -> Self {
+ self.display_boundaries = display;
+ self
+ }
+
+ pub fn with_stream_read(mut self, size: Option<u32>) -> Self {
+ self.stream_read = size;
+ self
+ }
+
+ pub fn with_memmap_read(mut self, use_memmap: bool) -> Self {
+ self.memmap_read = use_memmap;
+ self
+ }
+
+ pub fn with_register_name(mut self, name: Option<String>) -> Self {
+ self.register_name = name;
+ self
+ }
+
+ pub fn with_policy_name(mut self, name: Option<String>) -> Self {
+ self.policy_name = name;
+ self
+ }
+
+ pub fn with_chunk_hash(mut self, hash: ChunkWriteHash) -> Self {
+ self.chunk_hash = hash;
+ self
+ }
+
+ pub fn with_output_dir(mut self, dir: PathBuf) -> Self {
+ self.output_dir = dir;
+ self
+ }
+
+ pub fn with_output_file(mut self, file: Option<PathBuf>) -> Self {
+ self.output_file = file;
+ self
+ }
+
+ pub fn param(mut self, key: String, value: String) -> Self {
+ self.params.insert(key, value);
+ self
+ }
+
+ pub fn with_file_paths(mut self, paths: Vec<PathBuf>) -> Self {
+ self.file_paths = paths;
+ self
+ }
+
+ pub fn add_file(mut self, path: PathBuf) -> Self {
+ self.file_paths.push(path);
+ self
+ }
+
+ pub async fn exec(self) -> Result<(), ButckRWError> {
+ match self.method {
+ ButckMethod::None => Ok(()),
+ ButckMethod::Write => {
+ write(self).await?;
+ Ok(())
+ }
+ ButckMethod::Build => {
+ build(self).await?;
+ Ok(())
+ }
+ }
+ }
+}
+
+impl ButckContext {
/// Apply the args of ChunkerContext to itself
pub fn from_args(mut args: Vec<String>) -> Self {
let mut ctx = ButckContext::default();