summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author魏曹先生 <1992414357@qq.com>2026-03-04 21:26:04 +0800
committer魏曹先生 <1992414357@qq.com>2026-03-04 21:35:09 +0800
commit22926ce29e3f8e040ec349401aeb6a77f32eae72 (patch)
tree678753ec49a61fb9d3e2d8e869393dec90ea7ef4
Initialize Butchunker project structure and policy system
-rw-r--r--.cargo/config.toml2
-rw-r--r--.gitignore1
-rw-r--r--Cargo.lock889
-rw-r--r--Cargo.toml41
-rw-r--r--LICENSE-APACHE202
-rw-r--r--LICENSE-MIT21
-rw-r--r--policy/README.md96
-rw-r--r--policy/_policies/Cargo.lock7
-rw-r--r--policy/_policies/Cargo.toml15
-rw-r--r--policy/_policies/Cargo.toml.t19
-rw-r--r--policy/_policies/src/error.rs14
-rw-r--r--policy/_policies/src/lib.rs75
-rw-r--r--policy/_policies/src/lib.rs.t117
-rw-r--r--policy/_policies/src/stream_read.rs46
-rw-r--r--policy/butck/butck_fixed_size/Cargo.lock7
-rw-r--r--policy/butck/butck_fixed_size/Cargo.toml7
-rw-r--r--policy/butck/butck_fixed_size/src/lib.rs48
-rw-r--r--resources/helps/butck.txt20
-rw-r--r--resources/version_info.txt1
-rw-r--r--scripts/sh/comp_butck.sh56
-rw-r--r--src/bin/butck.rs109
-rw-r--r--src/bin/butckrepo-guide.rs13
-rw-r--r--src/bin/butckrepo-refresh.rs619
-rw-r--r--src/chunker.rs4
-rw-r--r--src/chunker/constants.rs3
-rw-r--r--src/chunker/context.rs226
-rw-r--r--src/chunker/entry.rs39
-rw-r--r--src/chunker/rw.rs2
-rw-r--r--src/chunker/rw/error.rs61
-rw-r--r--src/chunker/rw/storage.rs88
-rw-r--r--src/chunker/rw/storage/build.rs250
-rw-r--r--src/chunker/rw/storage/write.rs118
-rw-r--r--src/chunker/rw/storage/write/simple.rs368
-rw-r--r--src/chunker/rw/storage/write/stream.rs12
-rw-r--r--src/core.rs1
-rw-r--r--src/core/hash.rs38
-rw-r--r--src/lib.rs11
-rw-r--r--src/log.rs33
-rw-r--r--src/macros.rs47
-rw-r--r--src/utils.rs2
-rw-r--r--src/utils/file_input_solve.rs82
-rw-r--r--src/utils/size_display.rs14
42 files changed, 3824 insertions, 0 deletions
diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 0000000..e0fdf5b
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,2 @@
+[build]
+target-dir = "./.temp/target/"
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e0fdece
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.temp
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..eb87eab
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,889 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "arrayref"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
+
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
+[[package]]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+
+[[package]]
+name = "blake3"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+ "cpufeatures",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "butchunker"
+version = "0.1.0"
+dependencies = [
+ "blake3",
+ "butck_policies",
+ "colored",
+ "env_logger",
+ "futures",
+ "hex",
+ "just_fmt",
+ "just_progress",
+ "just_template",
+ "log",
+ "memmap2",
+ "sha2",
+ "syn",
+ "thiserror",
+ "tokio",
+ "toml",
+]
+
+[[package]]
+name = "butck_fixed_size"
+version = "0.1.0"
+
+[[package]]
+name = "butck_policies"
+version = "0.1.0"
+dependencies = [
+ "butck_fixed_size",
+ "thiserror",
+ "tokio",
+]
+
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
+[[package]]
+name = "cc"
+version = "1.2.56"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
+dependencies = [
+ "find-msvc-tools",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "colored"
+version = "3.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "constant_time_eq"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+]
+
+[[package]]
+name = "env_filter"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f"
+dependencies = [
+ "log",
+ "regex",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.11.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "env_filter",
+ "jiff",
+ "log",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "futures"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "jiff"
+version = "0.2.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819b44bc7c87d9117eb522f14d46e918add69ff12713c475946b0a29363ed1c2"
+dependencies = [
+ "jiff-static",
+ "log",
+ "portable-atomic",
+ "portable-atomic-util",
+ "serde_core",
+]
+
+[[package]]
+name = "jiff-static"
+version = "0.2.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "470252db18ecc35fd766c0891b1e3ec6cbbcd62507e85276c01bf75d8e94d4a1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "just_fmt"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5454cda0d57db59778608d7a47bff5b16c6705598265869fb052b657f66cf05e"
+
+[[package]]
+name = "just_progress"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef1a564328a5061a4828b4f82b7275a7f3dbc7d4ed5778da986f6ab48563c88"
+dependencies = [
+ "tokio",
+]
+
+[[package]]
+name = "just_template"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db3edb658c34b10b69c4b3b58f7ba989cd09c82c0621dee1eef51843c2327225"
+dependencies = [
+ "just_fmt",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.182"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "memmap2"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
+[[package]]
+name = "portable-atomic-util"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5"
+dependencies = [
+ "portable-atomic",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_spanned"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio"
+version = "1.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "toml"
+version = "1.0.3+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c"
+dependencies = [
+ "indexmap",
+ "serde_core",
+ "serde_spanned",
+ "toml_datetime",
+ "toml_parser",
+ "toml_writer",
+ "winnow",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "1.0.0+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.0.9+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "toml_writer"
+version = "1.0.6+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
+
+[[package]]
+name = "typenum"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "winnow"
+version = "0.7.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..cfab2b8
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,41 @@
+[package]
+name = "butchunker"
+version = "0.1.0"
+edition = "2024"
+default-run = "butckrepo-guide"
+
+[dependencies]
+butck_policies = { path = "policy/_policies" }
+
+# Code generate
+just_template = "0.1.3"
+
+# Memory mapping
+memmap2 = "0.9"
+
+# Error
+thiserror = "2"
+
+# Syntax
+just_fmt = "0.1.2"
+syn = { version = "2", features = ["full"] }
+
+# Async
+futures = "0.3"
+tokio = { version = "1", features = ["full"] }
+
+# Display
+colored = "3"
+just_progress = "0.1.3"
+
+# Serialize & Config
+toml = "1"
+
+# Logging
+log = "0.4"
+env_logger = "0.11"
+
+# Hashing
+blake3 = "1.8"
+sha2 = "0.10"
+hex = "0.4"
diff --git a/LICENSE-APACHE b/LICENSE-APACHE
new file mode 100644
index 0000000..f630f06
--- /dev/null
+++ b/LICENSE-APACHE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2026 Butchunker Team, Weicao-CatilGrass
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/LICENSE-MIT b/LICENSE-MIT
new file mode 100644
index 0000000..f4814bc
--- /dev/null
+++ b/LICENSE-MIT
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Butchunker Team, Weicao-CatilGrass
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/policy/README.md b/policy/README.md
new file mode 100644
index 0000000..c995506
--- /dev/null
+++ b/policy/README.md
@@ -0,0 +1,96 @@
+# Write your Policy!
+
+Welcome to the Butchunker Policy Development Guide. This guide explains how to create a custom chunking policy for Butchunker. A chunking policy defines how to split data streams or files into chunks. This is a core task for data deduplication, storage, and transfer.
+
+Before starting, you should know basic Rust and understand the Butchunker framework. Your policy will decide where to split the data based on its content and your settings.
+
+## Creating a Policy Crate
+
+First, create a new `Rust Crate` to host your chunking policy.
+
+### Writing `Cargo.toml`
+
+```toml
+[package]
+name = "butck_fixed_size" # Policy name
+authors = ["Butchunker"] # Author info
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+```
+
+## Implementing Policy Logic
+
+### Writing `src/lib.rs`
+
+In `src/lib.rs`, implement one or both of the following schemes:
+
+#### Scheme 1: Streaming Processing Scheme
+
+Suitable for processing large files where subsequent content cannot be predicted, but also does not require loading the entire file into memory.
+
+```rust
+use std::collections::HashMap;
+
+// Streaming policy struct (must implement the Default trait)
+#[derive(Default)]
+pub struct YourPolicyStream {
+ // Define your state fields here
+}
+
+// Streaming processing function
+pub async fn your_policy_stream(
+ current_data: &[u8], // Current data chunk
+ len: u32, // Data length
+ stream: &mut FixedSizeStream, // Streaming processing context
+ params: &HashMap<&str, &str>, // Configuration parameters
+) -> Option<u32> {
+ // Implement your chunking logic
+ // Return the split position (offset from the start of current_data), or None if no split
+ None
+}
+```
+
+#### Scheme 2: Simple Processing Scheme
+
+Suitable for processing small to medium-sized files that can be loaded entirely at once, allowing knowledge of subsequent data during chunking for better results.
+
+```rust
+use std::collections::HashMap;
+
+// Simple processing function
+pub async fn your_policy(
+ raw_data: &[u8], // Raw data
+ params: &HashMap<&str, &str>, // Configuration parameters
+) -> Vec<u32> {
+ // Implement your chunking logic
+ // Return a vector of all split positions (offsets from the start of raw_data)
+ vec![]
+}
+```
+
+## Registration and Usage
+
+### Deploying the Policy
+
+1. Place the completed policy `Crate` into the `./policy/` directory of the Butchunker repository.
+2. Use the `butckrepo-refresh` program to refresh the registry:
+ - If the program is not yet installed, you can execute the following in the root directory of the Butchunker repository:
+
+ ```bash
+ cargo install --path ./
+ ```
+3. After each policy library update, you must:
+ - Execute `butckrepo-refresh` to refresh the registry.
+ - Reinstall the `butck` binary: `cargo install --path ./`.
+
+### Calling the Policy
+
+- The policy will be automatically registered in Butchunker's registry.
+
+ Use the following command to call the policy:
+
+ ````rust
+ butck write <file> --policy <policy_name> --storage ./
+ ````
diff --git a/policy/_policies/Cargo.lock b/policy/_policies/Cargo.lock
new file mode 100644
index 0000000..8f7bc05
--- /dev/null
+++ b/policy/_policies/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "butck_policies"
+version = "0.1.0"
diff --git a/policy/_policies/Cargo.toml b/policy/_policies/Cargo.toml
new file mode 100644
index 0000000..d939dd2
--- /dev/null
+++ b/policy/_policies/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "butck_policies"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+thiserror = "2"
+tokio = { version = "1", features = ["fs"] }
+
+# Auto generated dependencies
+# If you find issues with the dependencies, please
+# 1. Delete all code after this comment
+# 2. Clear the file `policy/_policies/src/lib.rs`
+# 3. Run `cargo run --bin butckrepo-refresh` in the Butchunker root directory
+butck_fixed_size = { path = "../../policy/butck/butck_fixed_size" } \ No newline at end of file
diff --git a/policy/_policies/Cargo.toml.t b/policy/_policies/Cargo.toml.t
new file mode 100644
index 0000000..aab90b9
--- /dev/null
+++ b/policy/_policies/Cargo.toml.t
@@ -0,0 +1,19 @@
+[package]
+name = "butck_policies"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+thiserror = "2"
+tokio = { version = "1", features = ["fs"] }
+
+# Auto generated dependencies
+# If you find issues with the dependencies, please
+# 1. Delete all code after this comment
+# 2. Clear the file `policy/_policies/src/lib.rs`
+# 3. Run `cargo run --bin butckrepo-refresh` in the Butchunker root directory
+>>>>>>>>>> deps
+
+@@@ >>> deps
+<<<crate_name>>> = { path = "../../<<<path>>>" }
+@@@ <<<
diff --git a/policy/_policies/src/error.rs b/policy/_policies/src/error.rs
new file mode 100644
index 0000000..975749d
--- /dev/null
+++ b/policy/_policies/src/error.rs
@@ -0,0 +1,14 @@
+#[derive(Debug, thiserror::Error)]
+pub enum ChunkFailed {
+ #[error("IO error: {0}")]
+ Io(#[from] std::io::Error),
+
+ #[error("Target policy not found")]
+ PolicyNotFound,
+
+ #[error("File read failed: {0}")]
+ FileReadFailed(std::path::PathBuf),
+
+ #[error("File open failed: {0}")]
+ FileOpenFailed(std::path::PathBuf),
+}
diff --git a/policy/_policies/src/lib.rs b/policy/_policies/src/lib.rs
new file mode 100644
index 0000000..397579a
--- /dev/null
+++ b/policy/_policies/src/lib.rs
@@ -0,0 +1,75 @@
+// Auto generated dependencies
+// If you find issues with the dependencies, please
+// 1. Delete all code after this comment
+// 2. Clear the auto generated part in `policy/_policies/Cargo.toml`
+// 3. Run `cargo run --bin butckrepo-refresh` in the Butchunker root directory
+pub mod error;
+pub mod stream_read;
+
+use error::ChunkFailed;
+use std::{collections::HashMap, path::Path};
+
+use crate::stream_read::chunk_stream_process;
+
+/// Chunks the specified raw data using the specified chunking policy
+///
+/// # Parameters
+/// - `policy_name`: Chunking policy name, currently supports 1 policies
+/// - `raw_data`: Raw data byte slice
+/// - `params`: Hashmap of parameters required by the chunking policy
+pub async fn chunk_with(
+ policy_name: &str,
+ raw_data: &[u8],
+ params: &HashMap<&str, &str>,
+) -> Result<Vec<u32>, ChunkFailed> {
+ match policy_name {
+ "butck_fixed_size" => Ok(butck_fixed_size::chunk(raw_data, params).await),
+ _ => Err(ChunkFailed::PolicyNotFound),
+ }
+}
+
+pub async fn chunk_stream_with(
+ policy_name: &str,
+ size: u32,
+ path: &Path,
+ params: &HashMap<&str, &str>,
+) -> Result<Vec<u32>, ChunkFailed> {
+ match policy_name {
+ "butck_fixed_size" => {
+ let mut stream = butck_fixed_size::FixedSizeStream::default();
+ chunk_stream_process(
+ path, &mut stream, size, params,
+ async |current_data, len, stream, params| {
+ butck_fixed_size::chunk_stream(current_data, len, stream, params).await
+ },
+ )
+ .await
+ }
+ _ => Err(ChunkFailed::PolicyNotFound),
+ }
+}
+
+pub fn policies() -> Vec<&'static str> {
+ vec![
+ // butck_fixed_size
+ "butck_fixed_size",
+ ]
+}
+
+pub mod butck_fixed_size {
+ pub use butck_fixed_size::FixedSizeStream;
+ use std::collections::HashMap;
+
+ pub async fn chunk(raw_data: &[u8], params: &HashMap<&str, &str>) -> Vec<u32> {
+ butck_fixed_size::chunk_fixed_size(raw_data, params).await
+ }
+
+ pub async fn chunk_stream(
+ current_data: &[u8],
+ len: u32,
+ stream: &mut butck_fixed_size::FixedSizeStream,
+ params: &HashMap<&str, &str>,
+ ) -> Option<u32> {
+ butck_fixed_size::chunk_fixed_size_stream(current_data, len, stream, params).await
+ }
+} \ No newline at end of file
diff --git a/policy/_policies/src/lib.rs.t b/policy/_policies/src/lib.rs.t
new file mode 100644
index 0000000..873a4cd
--- /dev/null
+++ b/policy/_policies/src/lib.rs.t
@@ -0,0 +1,117 @@
+// Auto generated dependencies
+// If you find issues with the dependencies, please
+// 1. Delete all code after this comment
+// 2. Clear the auto generated part in `policy/_policies/Cargo.toml`
+// 3. Run `cargo run --bin butckrepo-refresh` in the Butchunker root directory
+pub mod error;
+pub mod stream_read;
+
+use error::ChunkFailed;
+use std::{collections::HashMap, path::Path};
+
+use crate::stream_read::chunk_stream_process;
+
+/// Chunks the specified raw data using the specified chunking policy
+///
+/// # Parameters
+/// - `policy_name`: Chunking policy name, currently supports <<<policy_count>>> policies
+/// - `raw_data`: Raw data byte slice
+/// - `params`: Hashmap of parameters required by the chunking policy
+pub async fn chunk_with(
+ policy_name: &str,
+ raw_data: &[u8],
+ params: &HashMap<&str, &str>,
+) -> Result<Vec<u32>, ChunkFailed> {
+ match policy_name {
+>>>>>>>>>> match_arms
+ _ => Err(ChunkFailed::PolicyNotFound),
+ }
+}
+
+pub async fn chunk_stream_with(
+ policy_name: &str,
+ size: u32,
+ path: &Path,
+ params: &HashMap<&str, &str>,
+) -> Result<Vec<u32>, ChunkFailed> {
+ match policy_name {
+>>>>>>>>>> match_arms_stream
+ _ => Err(ChunkFailed::PolicyNotFound),
+ }
+}
+
+pub fn policies() -> Vec<&'static str> {
+ vec![
+>>>>>>>>>> policy_names
+ ]
+}
+
+>>>>>>>>>> exports_simple
+>>>>>>>>>> exports_stream
+>>>>>>>>>> exports_both
+
+@@@ >>> match_arms
+ "<<<crate_name>>>" => Ok(<<<crate_name>>>::chunk(raw_data, params).await),
+@@@ <<<
+
+@@@ >>> match_arms_stream
+ "<<<crate_name>>>" => {
+ let mut stream = <<<stream_struct_id>>>::default();
+ chunk_stream_process(
+ path, &mut stream, size, params,
+ async |current_data, len, stream, params| {
+ <<<crate_name>>>::chunk_stream(current_data, len, stream, params).await
+ },
+ )
+ .await
+ }
+@@@ <<<
+
+@@@ >>> policy_names
+ // <<<name>>>
+ "<<<name>>>",
+@@@ <<<
+
+@@@ >>> exports_simple
+pub mod <<<crate_name>>> {
+ use std::collections::HashMap;
+ pub async fn chunk(raw_data: &[u8], params: &HashMap<&str, &str>) -> Vec<u32> {
+ <<<crate_name>>>::<<<matched_func>>>(raw_data, params)<<<has_await>>>
+ }
+}
+@@@ <<<
+
+@@@ >>> exports_stream
+pub mod <<<crate_name>>> {
+ pub use <<<stream_struct_id>>>;
+
+ pub async fn chunk_stream(
+ current_data: &[u8],
+ len: u32,
+ stream: &mut <<<stream_struct_id>>>,
+ params: &std::collections::HashMap<&str, &str>,
+ ) -> Option<u32> {
+ <<<crate_name>>>::<<<matched_func_stream>>>(current_data, len, stream, params)<<<has_await_stream>>>
+ }
+}
+@@@ <<<
+
+@@@ >>> exports_both
+pub mod <<<crate_name>>> {
+ pub use <<<stream_struct_id>>>;
+ use std::collections::HashMap;
+
+ pub async fn chunk(raw_data: &[u8], params: &HashMap<&str, &str>) -> Vec<u32> {
+ <<<crate_name>>>::<<<matched_func>>>(raw_data, params)<<<has_await>>>
+ }
+
+ pub async fn chunk_stream(
+ current_data: &[u8],
+ len: u32,
+ stream: &mut <<<stream_struct_id>>>,
+ params: &HashMap<&str, &str>,
+ ) -> Option<u32> {
+ <<<crate_name>>>::<<<matched_func_stream>>>(current_data, len, stream, params)<<<has_await_stream>>>
+ }
+}
+@@@ <<<
diff --git a/policy/_policies/src/stream_read.rs b/policy/_policies/src/stream_read.rs
new file mode 100644
index 0000000..5cf7791
--- /dev/null
+++ b/policy/_policies/src/stream_read.rs
@@ -0,0 +1,46 @@
+use crate::error::ChunkFailed;
+use std::{collections::HashMap, path::Path};
+
+pub async fn chunk_stream_process<T, F>(
+ path: &Path,
+ stream_data: &mut T,
+ size: u32,
+ params: &HashMap<&str, &str>,
+ chunk_func: F,
+) -> Result<Vec<u32>, ChunkFailed>
+where
+ T: Default,
+ F: AsyncFn(&[u8], u32, &mut T, &HashMap<&str, &str>) -> Option<u32>,
+{
+ let mut file = tokio::fs::File::open(path)
+ .await
+ .map_err(|_| ChunkFailed::FileOpenFailed(path.to_path_buf()))?;
+ let mut buffer = vec![0u8; size as usize];
+ let mut splits = Vec::new();
+ let mut total_read = 0;
+
+ loop {
+ let bytes_read = tokio::io::AsyncReadExt::read(&mut file, &mut buffer)
+ .await
+ .map_err(|_| ChunkFailed::FileReadFailed(path.to_path_buf()))?;
+
+ if bytes_read == 0 {
+ break Ok(splits);
+ }
+
+ // Process chunking on the buffer slice
+ let chunk_result = chunk_func(
+ &buffer[..bytes_read],
+ bytes_read as u32,
+ stream_data,
+ params,
+ )
+ .await;
+
+ if let Some(offset) = chunk_result {
+ splits.push(total_read + offset);
+ }
+
+ total_read += bytes_read as u32;
+ }
+}
diff --git a/policy/butck/butck_fixed_size/Cargo.lock b/policy/butck/butck_fixed_size/Cargo.lock
new file mode 100644
index 0000000..c1e1873
--- /dev/null
+++ b/policy/butck/butck_fixed_size/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "butck_fixed_size"
+version = "0.1.0"
diff --git a/policy/butck/butck_fixed_size/Cargo.toml b/policy/butck/butck_fixed_size/Cargo.toml
new file mode 100644
index 0000000..1550cb9
--- /dev/null
+++ b/policy/butck/butck_fixed_size/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "butck_fixed_size"
+authors = ["Butchunker"]
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
diff --git a/policy/butck/butck_fixed_size/src/lib.rs b/policy/butck/butck_fixed_size/src/lib.rs
new file mode 100644
index 0000000..28cabff
--- /dev/null
+++ b/policy/butck/butck_fixed_size/src/lib.rs
@@ -0,0 +1,48 @@
+use std::collections::HashMap;
+
+const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024; // 1MB
+
+fn get_chunk_size(params: &HashMap<&str, &str>) -> usize {
+ params
+ .get("size")
+ .and_then(|s| s.parse().ok())
+ .unwrap_or(DEFAULT_CHUNK_SIZE)
+}
+
+pub async fn chunk_fixed_size(raw_data: &[u8], params: &HashMap<&str, &str>) -> Vec<u32> {
+ let chunk_size = get_chunk_size(params);
+ (chunk_size..raw_data.len())
+ .step_by(chunk_size)
+ .map(|pos| pos as u32)
+ .collect()
+}
+
+#[derive(Default)]
+pub struct FixedSizeStream {
+ processed_bytes: usize,
+}
+
+pub async fn chunk_fixed_size_stream(
+ _current_data: &[u8],
+ len: u32,
+ stream: &mut FixedSizeStream,
+ params: &HashMap<&str, &str>,
+) -> Option<u32> {
+ let chunk_size = get_chunk_size(params);
+ let valid_len = len as usize;
+
+ let prev_chunk = stream.processed_bytes / chunk_size;
+ let new_processed = stream.processed_bytes + valid_len;
+ let new_chunk = new_processed / chunk_size;
+
+ if prev_chunk != new_chunk {
+ // Find chunk boundary in current data, update processed bytes, return position
+ let boundary_in_chunk = chunk_size - (stream.processed_bytes % chunk_size);
+ stream.processed_bytes += boundary_in_chunk;
+ Some(boundary_in_chunk.min(valid_len) as u32)
+ } else {
+ // Update bytes processed
+ stream.processed_bytes = new_processed;
+ None
+ }
+}
diff --git a/resources/helps/butck.txt b/resources/helps/butck.txt
new file mode 100644
index 0000000..3ee666c
--- /dev/null
+++ b/resources/helps/butck.txt
@@ -0,0 +1,20 @@
+Usage: butck [-v | --version] [-h | --help] [-q | --quiet]
+ [-l | --log-level <trace/debug/info/warn/error>]
+ [-np | --no-progress] [-D | --display-boundaries]
+
+ [-s | --storage <path>] [-p | --policy <policy_name>]
+ [-H | --chunk-hash <blake3/sha256>]
+ [-o | --output-dir <output>] [-O | --output-file <file>]
+ [-r | --recursive] [-R | --register <name>]
+ [-S | --stream-read <size_byte>] [-m | --memmap-read]
+
+ [+p | +param key=value]
+
+Subcommands:
+ write <file> Write a file and output the index file
+ write <file> -R <name> Then, register the index
+ build <index/name> Input an index file and build the file from the storage
+ policies Output the available policies
+
+Butchunker 0.1.0
+Copyright (c) 2026 Weicao-CatilGrass
diff --git a/resources/version_info.txt b/resources/version_info.txt
new file mode 100644
index 0000000..2736fa7
--- /dev/null
+++ b/resources/version_info.txt
@@ -0,0 +1 @@
+Butchunker 0.1.0
diff --git a/scripts/sh/comp_butck.sh b/scripts/sh/comp_butck.sh
new file mode 100644
index 0000000..8cb31f0
--- /dev/null
+++ b/scripts/sh/comp_butck.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+_butck_completion() {
+ local cur prev words cword
+ COMPREPLY=()
+ cur="${COMP_WORDS[COMP_CWORD]}"
+ prev="${COMP_WORDS[COMP_CWORD-1]}"
+ words=("${COMP_WORDS[@]}")
+ cword=$COMP_CWORD
+
+ if [[ $cur == -* ]]; then
+ return
+ fi
+
+ case "$prev" in
+ -l|--log-level)
+ COMPREPLY=($(compgen -W "trace debug info warn error" -- "$cur"))
+ return
+ ;;
+ -H|--chunk-hash)
+ COMPREPLY=($(compgen -W "blake3 sha256" -- "$cur"))
+ return
+ ;;
+ -o|--output-dir)
+ COMPREPLY=($(compgen -d -- "$cur"))
+ return
+ ;;
+ -O|--output-file)
+ COMPREPLY=($(compgen -f -- "$cur"))
+ return
+ ;;
+ -p|--policy)
+ local policies
+ policies=$(butck policies 2>/dev/null)
+ COMPREPLY=($(compgen -W "$policies" -- "$cur"))
+ return
+ ;;
+ -R|--register)
+ return
+ ;;
+ esac
+
+ if [[ $cword -eq 1 ]]; then
+ COMPREPLY=($(compgen -W "write build state policies" -- "$cur"))
+ fi
+
+ if [[ $cword -ge 2 ]]; then
+ local subcommand="${COMP_WORDS[1]}"
+ case "$subcommand" in
+ "build"|"write")
+ COMPREPLY=($(compgen -f -- "$cur"))
+ ;;
+ esac
+ fi
+}
+
+complete -F _butck_completion butck
diff --git a/src/bin/butck.rs b/src/bin/butck.rs
new file mode 100644
index 0000000..6a81fbb
--- /dev/null
+++ b/src/bin/butck.rs
@@ -0,0 +1,109 @@
+use std::process::exit;
+
+use butchunker::{
+ chunker::{
+ context::ButckContext,
+ entry::{entry, print_help, print_version},
+ rw::error::{ButckRWError, ButckRWErrorKind},
+ },
+ log::init_logger,
+ special_argument, special_flag,
+};
+use just_progress::{progress, renderer};
+use log::error;
+use tokio::join;
+
+#[tokio::main]
+async fn main() {
+ // Collect arguments
+ let mut args: Vec<String> = std::env::args().skip(1).collect();
+
+ let version = special_flag!(args, "-v", "--version");
+ let help = special_flag!(args, "-h", "--help");
+
+ if version {
+ print_version();
+ exit(0)
+ }
+
+ // Special arguments, early return
+ if help || args.is_empty() {
+ print_help();
+ exit(0)
+ }
+
+ // Init colored
+ #[cfg(windows)]
+ colored::control::set_virtual_terminal(true).unwrap();
+
+ // Output control flags
+ let quiet = special_flag!(args, "-q", "--quiet");
+ let no_progress = special_flag!(args, "-np", "--no-progress");
+
+ // Logger
+ if !quiet {
+ let logger_level = match special_argument!(args, "-l", "--log-level") {
+ Some(level) => match level.trim().to_lowercase().as_str() {
+ "trace" => log::LevelFilter::Trace,
+ "debug" => log::LevelFilter::Debug,
+ "info" => log::LevelFilter::Info,
+ "warn" => log::LevelFilter::Warn,
+ "error" => log::LevelFilter::Error,
+ _ => log::LevelFilter::Info,
+ },
+ None => log::LevelFilter::Info,
+ };
+ init_logger(Some(logger_level));
+ }
+
+ let ctx = ButckContext::from_args(args.clone());
+
+ // When `--no-progress` or `--quiet` is enabled,
+ // the progress system will not be initialized
+ if no_progress || quiet {
+ handle_entry_result(entry(ctx, args).await);
+ } else {
+ let progress = progress::init();
+ let renderer = renderer::ProgressSimpleRenderer::new().with_subprogress(true);
+ let bind = progress::bind(progress, move |name, state| renderer.update(name, state));
+ join!(
+ async {
+ handle_entry_result(entry(ctx, args).await);
+ progress::close();
+ },
+ bind
+ );
+ }
+}
+
+fn handle_entry_result(r: Result<(), ButckRWError>) {
+ match r {
+ Ok(_) => {}
+ Err(e) => match e.kind() {
+ ButckRWErrorKind::NoButckStorageFound => {
+ error!("No butck storage found");
+ error!("Use `--storage <PATH>` to specify or init butck storage");
+ }
+ ButckRWErrorKind::ChunkingPolicyNotSpecified => {
+ error!("Chunking policy not specified");
+ error!("Use `--policy <policy_name>` to specify chunking policy");
+ error!("or use `butck policies` to output the available policies");
+ }
+ ButckRWErrorKind::ReadingMethodAmbiguous => error!("Reading method ambiguous"),
+ ButckRWErrorKind::OutputCountMismatch => {
+ error!("Output count mismatch");
+ error!("When processing a single file, use `--output-file` to specify output path");
+ error!(
+ "When processing multiple files, use `--output-dir` to specify output directory"
+ );
+ }
+ ButckRWErrorKind::ChunkNotFound(chunk_id) => {
+ error!("Chunk not found in storage: {}", chunk_id)
+ }
+ ButckRWErrorKind::RebuildFailed(reason) => error!("Failed to rebuild file: {}", reason),
+ ButckRWErrorKind::ChunkFailed(_chunk_failed) => error!("Chunk failed"),
+ ButckRWErrorKind::IOError(error) => error!("IO error: {}", error),
+ ButckRWErrorKind::InvalidBidxFormat => error!("Invalid bidx format"),
+ },
+ }
+}
diff --git a/src/bin/butckrepo-guide.rs b/src/bin/butckrepo-guide.rs
new file mode 100644
index 0000000..d694ba5
--- /dev/null
+++ b/src/bin/butckrepo-guide.rs
@@ -0,0 +1,13 @@
+use colored::Colorize;
+
+fn main() {
+ println!("Welcome to Butchunker!");
+ println!(
+ "Please add your policy crates to the `{}` directory",
+ "./policy/".bright_green()
+ );
+ println!(
+ "Then run `{}` to update the policy registry",
+ "cargo run --bin butckrepo-refresh".bright_green()
+ );
+}
diff --git a/src/bin/butckrepo-refresh.rs b/src/bin/butckrepo-refresh.rs
new file mode 100644
index 0000000..9184efb
--- /dev/null
+++ b/src/bin/butckrepo-refresh.rs
@@ -0,0 +1,619 @@
+use colored::Colorize;
+use just_fmt::fmt_path::fmt_path_str;
+use just_template::{Template, tmpl, tmpl_param};
+use std::{
+ env::current_dir,
+ path::{Path, PathBuf},
+};
+use tokio::fs;
+
+const LIB_RS_TEMPLATE_PATH: &str = "policy/_policies/src/lib.rs.t";
+const CARGO_TOML_TEMPLATE_PATH: &str = "policy/_policies/Cargo.toml.t";
+const LIB_RS_PATH: &str = "./policy/_policies/src/lib.rs";
+const CARGO_TOML_PATH: &str = "./policy/_policies/Cargo.toml";
+
+#[tokio::main]
+async fn main() {
+ let current_dir = current_dir().unwrap();
+ precheck(&current_dir).await;
+
+ println!("Updating policies ...");
+ let (mut lib_rs_template, mut cargo_toml_template) = {
+ let lib_rs_template_path = current_dir.join("policy/_policies/src/lib.rs.t");
+ let cargo_toml_template_path = current_dir.join("policy/_policies/Cargo.toml.t");
+
+ let lib_rs_content = fs::read_to_string(&lib_rs_template_path)
+ .await
+ .unwrap_or_else(|_| {
+ eprintln!(
+ "{}",
+ format!(
+ "Error: Failed to read template file: {}",
+ lib_rs_template_path.display()
+ )
+ .red()
+ );
+ std::process::exit(1);
+ });
+
+ let cargo_toml_content = fs::read_to_string(&cargo_toml_template_path)
+ .await
+ .unwrap_or_else(|_| {
+ eprintln!(
+ "{}",
+ format!(
+ "Error: Failed to read template file: {}",
+ cargo_toml_template_path.display()
+ )
+ .red()
+ );
+ std::process::exit(1);
+ });
+
+ (
+ Template::from(lib_rs_content),
+ Template::from(cargo_toml_content),
+ )
+ };
+
+ let cargo_toml_pathes = find_cargo_toml_dirs(&current_dir.join("policy")).await;
+ println!(
+ "Found {} crates, register to `{}`",
+ cargo_toml_pathes.len(),
+ CARGO_TOML_PATH.bright_green()
+ );
+
+ tmpl_param!(lib_rs_template, policy_count = cargo_toml_pathes.len());
+
+ let collect_futures = cargo_toml_pathes.iter().map(collect).collect::<Vec<_>>();
+
+ for policy in futures::future::join_all(collect_futures).await {
+ let Some(policy) = policy else { continue };
+ tmpl!(cargo_toml_template += {
+ deps { (crate_name = policy.crate_name, path = policy.path) }
+ });
+ // Determine which export template to use based on detected functions
+ if policy.matched_func_stream.is_some() {
+ let stream_struct_id = format!(
+ "{}::{}",
+ policy.crate_name,
+ policy.stream_struct_id.unwrap()
+ );
+ if policy.matched_func.is_empty() {
+ // Only stream function
+ tmpl!(lib_rs_template += {
+ exports_stream { (
+ crate_name = policy.crate_name,
+ matched_func_stream = policy.matched_func_stream.unwrap(),
+ has_await_stream =
+ if policy.matched_func_stream_has_await { ".await" } else { "" },
+ stream_struct_id = stream_struct_id
+ ) },
+ match_arms { (
+ crate_name = policy.crate_name,
+ ) },
+ match_arms_stream { (
+ crate_name = policy.crate_name,
+ stream_struct_id = stream_struct_id
+ ) },
+ policy_names { (
+ name = policy.crate_name,
+ ) }
+ });
+ } else {
+ // Both simple and stream functions
+ tmpl!(lib_rs_template += {
+ exports_both { (
+ crate_name = policy.crate_name,
+ matched_func = policy.matched_func,
+ has_await =
+ if policy.matched_func_has_await { ".await" } else { "" },
+ matched_func_stream = policy.matched_func_stream.unwrap(),
+ has_await_stream =
+ if policy.matched_func_stream_has_await { ".await" } else { "" },
+ stream_struct_id = stream_struct_id
+ ) },
+ match_arms { (
+ crate_name = policy.crate_name,
+ ) },
+ match_arms_stream { (
+ crate_name = policy.crate_name,
+ stream_struct_id = stream_struct_id
+ ) },
+ policy_names { (
+ name = policy.crate_name,
+ ) }
+ });
+ }
+ } else {
+ // Only simple function
+ tmpl!(lib_rs_template += {
+ exports_simple { (
+ crate_name = policy.crate_name,
+ matched_func = policy.matched_func,
+ has_await =
+ if policy.matched_func_has_await { ".await" } else { "" }
+ ) },
+ match_arms { (
+ crate_name = policy.crate_name,
+ ) },
+ policy_names { (
+ name = policy.crate_name,
+ ) }
+ });
+ }
+ }
+
+ let (write_cargo, write_lib) = tokio::join!(
+ fs::write(CARGO_TOML_PATH, cargo_toml_template.expand().unwrap()),
+ fs::write(LIB_RS_PATH, lib_rs_template.expand().unwrap())
+ );
+ write_cargo.unwrap();
+ write_lib.unwrap();
+}
+
+struct CollectedPolicy {
+ crate_name: String,
+ path: String,
+ matched_func: String,
+ matched_func_has_await: bool,
+ matched_func_stream: Option<String>,
+ matched_func_stream_has_await: bool,
+ stream_struct_id: Option<String>,
+}
+
+async fn collect(policy_crate_path: &PathBuf) -> Option<CollectedPolicy> {
+ let lib_rs_path = policy_crate_path.join("src").join("lib.rs");
+ let lib_rs_content = fs::read_to_string(&lib_rs_path).await.ok()?;
+
+ let cargo_toml_content = fs::read_to_string(policy_crate_path.join("Cargo.toml"))
+ .await
+ .ok()?;
+ let cargo_toml: toml::Value = toml::from_str(&cargo_toml_content).ok()?;
+ let crate_name = cargo_toml
+ .get("package")?
+ .get("name")?
+ .as_str()?
+ .to_string();
+ let crate_path = fmt_path_str(
+ policy_crate_path
+ .strip_prefix(current_dir().unwrap())
+ .unwrap()
+ .to_string_lossy(),
+ )
+ .ok()?;
+
+ let (
+ matched_func,
+ matched_func_has_await,
+ matched_func_stream,
+ matched_func_stream_has_await,
+ stream_struct_id,
+ ) = collect_matched_func(lib_rs_content.as_str())?;
+
+ println!(
+ "{} {} (at: `{}`) with func `{}{}{}{}(..)`",
+ "Register:".bright_blue().bold(),
+ crate_name,
+ crate_path.bright_green(),
+ "pub ".bright_magenta(),
+ if matched_func_has_await { "async " } else { "" }.bright_magenta(),
+ "fn ".bright_magenta(),
+ matched_func.bright_blue(),
+ );
+ if let Some(stream_func) = &matched_func_stream {
+ println!(
+ " and stream func `{}{}{}{}(..)`",
+ "pub ".bright_magenta(),
+ if matched_func_stream_has_await {
+ "async "
+ } else {
+ ""
+ }
+ .bright_magenta(),
+ "fn ".bright_magenta(),
+ stream_func.bright_blue()
+ );
+ }
+
+ Some(CollectedPolicy {
+ crate_name,
+ path: crate_path,
+ matched_func,
+ matched_func_has_await,
+ matched_func_stream,
+ matched_func_stream_has_await,
+ stream_struct_id,
+ })
+}
+
+fn collect_matched_func(
+ lib_rs_content: &str,
+) -> Option<(String, bool, Option<String>, bool, Option<String>)> {
+ let syntax_tree = syn::parse_file(lib_rs_content).ok()?;
+
+ let mut matched_func = None;
+ let mut matched_func_has_await = false;
+ let mut matched_func_stream = None;
+ let mut matched_func_stream_has_await = false;
+ let mut stream_struct_id = None;
+
+ // Iterate over all items, looking for functions that match the criteria
+ for item in &syntax_tree.items {
+ let syn::Item::Fn(func) = item else { continue };
+
+ // Check if the function visibility is pub
+ if !matches!(func.vis, syn::Visibility::Public(_)) {
+ continue;
+ }
+
+ let sig = &func.sig;
+
+ // Check for simple chunk function (returns Vec<u32>)
+ if check_simple_chunk_function(sig) {
+ matched_func = Some(sig.ident.to_string());
+ matched_func_has_await = sig.asyncness.is_some();
+ }
+ // Check for stream chunk function (returns Option<u8>)
+ else if let Some(struct_id) = check_stream_chunk_function(sig, &syntax_tree) {
+ matched_func_stream = Some(sig.ident.to_string());
+ matched_func_stream_has_await = sig.asyncness.is_some();
+ stream_struct_id = Some(struct_id);
+ }
+ }
+
+ if matched_func.is_some() || matched_func_stream.is_some() {
+ Some((
+ matched_func.unwrap_or_default(),
+ matched_func_has_await,
+ matched_func_stream,
+ matched_func_stream_has_await,
+ stream_struct_id,
+ ))
+ } else {
+ None
+ }
+}
+
+fn check_simple_chunk_function(sig: &syn::Signature) -> bool {
+ // Check if the return type is Vec<u32>
+ let return_type_matches = match &sig.output {
+ syn::ReturnType::Type(_, ty) => {
+ let syn::Type::Path(type_path) = &**ty else {
+ return false;
+ };
+ let segments = &type_path.path.segments;
+
+ segments.len() == 1
+ && segments[0].ident == "Vec"
+ && matches!(&segments[0].arguments, syn::PathArguments::AngleBracketed(args)
+ if args.args.len() == 1 &&
+ matches!(&args.args[0], syn::GenericArgument::Type(syn::Type::Path(inner_type))
+ if inner_type.path.segments.len() == 1 &&
+ inner_type.path.segments[0].ident == "u32"
+ )
+ )
+ }
+ _ => false,
+ };
+
+ if !return_type_matches {
+ return false;
+ }
+
+ // Check that there are exactly 2 parameters
+ if sig.inputs.len() != 2 {
+ return false;
+ }
+
+ // Check that the first parameter type is &[u8]
+ let first_param_matches = match &sig.inputs[0] {
+ syn::FnArg::Typed(pat_type) => {
+ let syn::Type::Reference(type_ref) = &*pat_type.ty else {
+ return false;
+ };
+ let syn::Type::Slice(slice_type) = &*type_ref.elem else {
+ return false;
+ };
+ let syn::Type::Path(type_path) = &*slice_type.elem else {
+ return false;
+ };
+
+ type_path.path.segments.len() == 1 && type_path.path.segments[0].ident == "u8"
+ }
+ _ => false,
+ };
+
+ // Check that the second parameter type is &HashMap<&str, &str>
+ let second_param_matches = match &sig.inputs[1] {
+ syn::FnArg::Typed(pat_type) => {
+ let syn::Type::Reference(type_ref) = &*pat_type.ty else {
+ return false;
+ };
+ let syn::Type::Path(type_path) = &*type_ref.elem else {
+ return false;
+ };
+
+ type_path.path.segments.len() == 1
+ && type_path.path.segments[0].ident == "HashMap"
+ && matches!(&type_path.path.segments[0].arguments, syn::PathArguments::AngleBracketed(args)
+ if args.args.len() == 2 &&
+ matches!(&args.args[0], syn::GenericArgument::Type(syn::Type::Reference(first_ref))
+ if matches!(&*first_ref.elem, syn::Type::Path(first_path)
+ if first_path.path.segments.len() == 1 &&
+ first_path.path.segments[0].ident == "str"
+ )
+ ) &&
+ matches!(&args.args[1], syn::GenericArgument::Type(syn::Type::Reference(second_ref))
+ if matches!(&*second_ref.elem, syn::Type::Path(second_path)
+ if second_path.path.segments.len() == 1 &&
+ second_path.path.segments[0].ident == "str"
+ )
+ )
+ )
+ }
+ _ => false,
+ };
+
+ first_param_matches && second_param_matches
+}
+
+fn check_stream_chunk_function(sig: &syn::Signature, syntax_tree: &syn::File) -> Option<String> {
+ // Check if the return type is Option<u32>
+ let return_type_matches = match &sig.output {
+ syn::ReturnType::Type(_, ty) => {
+ let syn::Type::Path(type_path) = &**ty else {
+ return None;
+ };
+ let segments = &type_path.path.segments;
+
+ segments.len() == 1
+ && segments[0].ident == "Option"
+ && matches!(&segments[0].arguments, syn::PathArguments::AngleBracketed(args)
+ if args.args.len() == 1 &&
+ matches!(&args.args[0], syn::GenericArgument::Type(syn::Type::Path(inner_type))
+ if inner_type.path.segments.len() == 1 &&
+ inner_type.path.segments[0].ident == "u32"
+ )
+ )
+ }
+ _ => false,
+ };
+
+ if !return_type_matches {
+ return None;
+ }
+
+ // Check that there are exactly 4 parameters
+ if sig.inputs.len() != 4 {
+ return None;
+ }
+
+ // Check that the first parameter type is &[u8]
+ let first_param_matches = match &sig.inputs[0] {
+ syn::FnArg::Typed(pat_type) => {
+ let syn::Type::Reference(type_ref) = &*pat_type.ty else {
+ return None;
+ };
+ let syn::Type::Slice(slice_type) = &*type_ref.elem else {
+ return None;
+ };
+ let syn::Type::Path(type_path) = &*slice_type.elem else {
+ return None;
+ };
+
+ // Check it's u8
+ type_path.path.segments.len() == 1 && type_path.path.segments[0].ident == "u8"
+ }
+ _ => false,
+ };
+
+ // Check that the second parameter type is u32
+ let second_param_matches = match &sig.inputs[1] {
+ syn::FnArg::Typed(pat_type) => {
+ let syn::Type::Path(type_path) = &*pat_type.ty else {
+ return None;
+ };
+ type_path.path.segments.len() == 1 && type_path.path.segments[0].ident == "u32"
+ }
+ _ => false,
+ };
+
+ // Check that the third parameter type is &mut T where T is a struct defined in this crate
+ let third_param_info = match &sig.inputs[2] {
+ syn::FnArg::Typed(pat_type) => {
+ let syn::Type::Reference(type_ref) = &*pat_type.ty else {
+ return None;
+ };
+
+ // Check it's mutable reference
+ type_ref.mutability?;
+
+ // Get the inner type
+ let syn::Type::Path(type_path) = &*type_ref.elem else {
+ return None;
+ };
+
+ // Get the struct identifier
+ if type_path.path.segments.len() != 1 {
+ return None;
+ }
+
+ let struct_ident = type_path.path.segments[0].ident.to_string();
+
+ // Check if this struct is defined in the current crate and implements Default
+ if is_struct_defined_in_crate(&struct_ident, syntax_tree) {
+ Some(struct_ident)
+ } else {
+ None
+ }
+ }
+ _ => None,
+ };
+
+ let struct_ident = third_param_info?;
+
+ // Check that the fourth parameter type is &HashMap<&str, &str>
+ let fourth_param_matches = match &sig.inputs[3] {
+ syn::FnArg::Typed(pat_type) => {
+ let syn::Type::Reference(type_ref) = &*pat_type.ty else {
+ return None;
+ };
+ let syn::Type::Path(type_path) = &*type_ref.elem else {
+ return None;
+ };
+
+ type_path.path.segments.len() == 1
+ && type_path.path.segments[0].ident == "HashMap"
+ && matches!(&type_path.path.segments[0].arguments, syn::PathArguments::AngleBracketed(args)
+ if args.args.len() == 2 &&
+ matches!(&args.args[0], syn::GenericArgument::Type(syn::Type::Reference(first_ref))
+ if matches!(&*first_ref.elem, syn::Type::Path(first_path)
+ if first_path.path.segments.len() == 1 &&
+ first_path.path.segments[0].ident == "str"
+ )
+ ) &&
+ matches!(&args.args[1], syn::GenericArgument::Type(syn::Type::Reference(second_ref))
+ if matches!(&*second_ref.elem, syn::Type::Path(second_path)
+ if second_path.path.segments.len() == 1 &&
+ second_path.path.segments[0].ident == "str"
+ )
+ )
+ )
+ }
+ _ => false,
+ };
+
+ if first_param_matches && second_param_matches && fourth_param_matches {
+ Some(struct_ident)
+ } else {
+ None
+ }
+}
+
+fn is_struct_defined_in_crate(struct_ident: &str, syntax_tree: &syn::File) -> bool {
+ for item in &syntax_tree.items {
+ match item {
+ syn::Item::Struct(item_struct) => {
+ if item_struct.ident == struct_ident {
+ // Check if it implements Default via derive attribute
+ return has_default_derive(&item_struct.attrs)
+ || has_default_trait_bound(&item_struct.generics);
+ }
+ }
+ _ => continue,
+ }
+ }
+ false
+}
+
+fn has_default_derive(attrs: &[syn::Attribute]) -> bool {
+ for attr in attrs {
+ if attr.path().is_ident("derive") {
+ // Parse the attribute meta to check for Default
+ if let syn::Meta::List(list) = &attr.meta {
+ // Convert tokens to string and check for Default
+ let tokens = list.tokens.to_string();
+ if tokens.contains("Default") {
+ return true;
+ }
+ }
+ }
+ }
+ false
+}
+
+fn has_default_trait_bound(generics: &syn::Generics) -> bool {
+ for param in &generics.params {
+ if let syn::GenericParam::Type(type_param) = param {
+ for bound in &type_param.bounds {
+ if let syn::TypeParamBound::Trait(trait_bound) = bound {
+ let path = &trait_bound.path;
+ if path.segments.len() == 1 && path.segments[0].ident == "Default" {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ false
+}
+
+async fn find_cargo_toml_dirs(root: &Path) -> Vec<PathBuf> {
+ let mut result = Vec::new();
+ let mut dirs_to_visit = vec![root.to_path_buf()];
+
+ while let Some(current_dir) = dirs_to_visit.pop() {
+ let cargo_toml_path = current_dir.join("Cargo.toml");
+ if fs::metadata(&cargo_toml_path).await.is_ok() {
+ result.push(current_dir);
+ continue;
+ }
+
+ let mut read_dir = match fs::read_dir(&current_dir).await {
+ Ok(rd) => rd,
+ Err(_) => continue,
+ };
+
+ while let Ok(Some(entry)) = read_dir.next_entry().await {
+ if let Ok(file_type) = entry.file_type().await
+ && file_type.is_dir()
+ {
+ let path = entry.path();
+ if let Some(file_name) = path.file_name()
+ && let Some(name_str) = file_name.to_str()
+ && name_str.starts_with('_')
+ {
+ continue;
+ }
+ dirs_to_visit.push(path);
+ }
+ }
+ }
+
+ result
+}
+
+async fn precheck(current_dir: &Path) {
+ let cargo_toml_path = current_dir.join("Cargo.toml");
+ let cargo_toml_content = fs::read_to_string(&cargo_toml_path)
+ .await
+ .unwrap_or_else(|_| {
+ eprintln!(
+ "{}",
+ "Error: Cargo.toml not found in current directory".red()
+ );
+ std::process::exit(1);
+ });
+ let cargo_toml: toml::Value = toml::from_str(&cargo_toml_content).unwrap_or_else(|_| {
+ eprintln!("{}", "Error: Failed to parse Cargo.toml".red());
+ std::process::exit(1);
+ });
+ let package_name = cargo_toml
+ .get("package")
+ .unwrap_or_else(|| {
+ eprintln!("{}", "Error: No package section in Cargo.toml".red());
+ std::process::exit(1);
+ })
+ .get("name")
+ .unwrap_or_else(|| {
+ eprintln!("{}", "Error: No package.name in Cargo.toml".red());
+ std::process::exit(1);
+ })
+ .as_str()
+ .unwrap_or_else(|| {
+ eprintln!("{}", "Error: package.name is not a string".red());
+ std::process::exit(1);
+ });
+ if package_name != "butchunker" {
+ eprintln!(
+ "{}",
+ format!(
+ "Error: package.name must be 'butchunker', found '{}'",
+ package_name
+ )
+ .red()
+ );
+ std::process::exit(1);
+ }
+}
diff --git a/src/chunker.rs b/src/chunker.rs
new file mode 100644
index 0000000..3143f68
--- /dev/null
+++ b/src/chunker.rs
@@ -0,0 +1,4 @@
+pub mod constants;
+pub mod context;
+pub mod entry;
+pub mod rw;
diff --git a/src/chunker/constants.rs b/src/chunker/constants.rs
new file mode 100644
index 0000000..5e4870e
--- /dev/null
+++ b/src/chunker/constants.rs
@@ -0,0 +1,3 @@
+pub const BUTCK_STORAGE_DIR_NAME: &str = ".butck";
+pub const BUTCK_INDEX_FILE_SUFFIX: &str = "bidx";
+pub const BUTCK_INDEX_MAGIC: [u8; 4] = [0xfe, 0xe1, 0xf0, 0x0d];
diff --git a/src/chunker/context.rs b/src/chunker/context.rs
new file mode 100644
index 0000000..79254f5
--- /dev/null
+++ b/src/chunker/context.rs
@@ -0,0 +1,226 @@
+use std::{collections::HashMap, env::current_dir, path::PathBuf, process::exit, str::FromStr};
+
+use log::{error, warn};
+
+use crate::{
+ chunker::constants::BUTCK_STORAGE_DIR_NAME, core::hash::ChunkWriteHash, special_argument,
+ special_flag, utils::file_input_solve::parse_path_input,
+};
+
+#[derive(Debug, Default)]
+pub struct ButckContext {
+ /// All input files
+ pub file_paths: Vec<PathBuf>,
+
+ /// Path of Butck Storage
+ pub storage_path: Option<PathBuf>,
+
+ // Display chunk boundaries
+ pub display_boundaries: bool,
+
+ /// Whether to read in stream mode
+ pub stream_read: Option<u32>,
+
+ /// Whether to read files using memory mapping
+ pub memmap_read: bool,
+
+ /// Register name
+ pub register_name: Option<String>,
+
+ /// Chunking policy name
+ pub policy_name: Option<String>,
+
+ /// Hash algorithm used for chunking
+ pub chunk_hash: ChunkWriteHash,
+
+ /// Output directory
+ pub output_dir: PathBuf,
+
+ /// Output file (not available for some commands)
+ pub output_file: Option<PathBuf>,
+
+ /// Override parameters
+ pub params: HashMap<String, String>,
+}
+
+impl ButckContext {
+ /// Apply the args of ChunkerContext to itself
+ pub fn from_args(mut args: Vec<String>) -> Self {
+ let mut ctx = ButckContext::default();
+ let recursive = ctx.read_recursive(&mut args);
+ ctx.apply_stream_read(&mut args);
+ ctx.apply_memmap_read(&mut args);
+ ctx.apply_register_name(&mut args);
+ ctx.apply_policy_name(&mut args);
+ ctx.apply_chunk_hash(&mut args);
+ ctx.apply_storage_dir(&mut args);
+ ctx.apply_output_paths(&mut args);
+ ctx.apply_params(&mut args);
+ ctx.apply_display_boundaries(&mut args);
+
+ // Finally, parse path input
+ ctx.file_paths = parse_path_input(args, recursive, vec![BUTCK_STORAGE_DIR_NAME]);
+ ctx
+ }
+
+ fn read_recursive(&mut self, args: &mut Vec<String>) -> bool {
+ special_flag!(args, "-r", "--recursive")
+ }
+
+ fn apply_stream_read(&mut self, args: &mut Vec<String>) {
+ if let Some(size_str) = special_argument!(args, "-S", "--stream-read")
+ && let Ok(size) = size_str.parse::<u32>() {
+ self.stream_read = Some(size);
+ }
+ }
+
+ fn apply_memmap_read(&mut self, args: &mut Vec<String>) -> bool {
+ special_flag!(args, "-m", "--memmap-read")
+ }
+
+ fn apply_register_name(&mut self, args: &mut Vec<String>) {
+ self.register_name = special_argument!(args, "-R", "--register");
+ }
+
+ fn apply_policy_name(&mut self, args: &mut Vec<String>) {
+ self.policy_name = special_argument!(args, "-p", "--policy");
+ }
+
+ fn apply_chunk_hash(&mut self, args: &mut Vec<String>) {
+ let chunk_hash_str = special_argument!(args, "-H", "--chunk-hash");
+ self.chunk_hash = match chunk_hash_str {
+ Some(ref s) => match s.as_str() {
+ "blake3" => ChunkWriteHash::Blake3,
+ "sha256" => ChunkWriteHash::Sha256,
+ _ => ChunkWriteHash::default(),
+ },
+ None => ChunkWriteHash::default(),
+ };
+ }
+
+ fn apply_output_paths(&mut self, args: &mut Vec<String>) {
+ let output_dir_str = special_argument!(args, "-o", "--output-dir");
+ let output_file_str = special_argument!(args, "-O", "--output-file");
+
+ let current_dir = current_dir().unwrap();
+
+ let output_dir = if let Some(output_dir_str) = output_dir_str {
+ let path = PathBuf::from(output_dir_str);
+ if path.exists() { Some(path) } else { None }
+ } else {
+ None
+ };
+
+ self.output_dir = if let Some(output_dir) = output_dir {
+ output_dir
+ } else if let Some(storage_path) = &self.storage_path {
+ storage_path.clone()
+ } else {
+ current_dir
+ };
+
+ self.output_file = output_file_str.map(PathBuf::from)
+ }
+
+ fn apply_params(&mut self, args: &mut Vec<String>) {
+ while let Some(arg) = special_argument!(args, "+p", "+param") {
+ let split = arg.split('=').collect::<Vec<&str>>();
+ if split.len() == 2 {
+ self.params
+ .insert(split[0].to_string(), split[1].to_string());
+ }
+ }
+ }
+
+ fn apply_storage_dir(&mut self, args: &mut Vec<String>) {
+ self.storage_path = {
+ let storage_override = match special_argument!(args, "-s", "--storage") {
+ Some(o) => {
+ let path = PathBuf::from_str(o.as_str());
+ if let Ok(p) = &path {
+ Self::init_butck_storage(p.clone());
+ }
+ path.ok()
+ }
+ None => None,
+ };
+ Self::find_butck_storage_dir(storage_override)
+ };
+ }
+
+ fn apply_display_boundaries(&mut self, args: &mut Vec<String>) {
+ self.display_boundaries = special_flag!(args, "-D", "--display-boundaries");
+ }
+
+ fn init_butck_storage(path: PathBuf) -> Option<PathBuf> {
+ if !path.exists() {
+ // If the path does not exist, create it and initialize Butck Storage here
+ if let Err(e) = std::fs::create_dir_all(&path) {
+ error!("Failed to create directory '{}': {}", path.display(), e);
+ exit(1);
+ }
+ let butck_dir = path.join(BUTCK_STORAGE_DIR_NAME);
+ if let Err(e) = std::fs::create_dir_all(&butck_dir) {
+ error!(
+ "Failed to create '{}' directory: {}",
+ BUTCK_STORAGE_DIR_NAME, e
+ );
+ exit(1);
+ }
+ Some(path)
+ } else {
+ let butck_dir = path.join(BUTCK_STORAGE_DIR_NAME);
+
+ // Check if Butck Storage already exists
+ if butck_dir.exists() {
+ // Butck Storage already exists, return the path
+ Some(path)
+ } else {
+ // Butck Storage doesn't exist, create it with a warning if directory is not empty
+ let is_empty = path
+ .read_dir()
+ .map(|mut entries| entries.next().is_none())
+ .unwrap_or(false);
+
+ if !is_empty {
+ // Warn about creating storage in non-empty directory
+ warn!(
+ "Creating '{}' storage in non-empty directory: {}",
+ BUTCK_STORAGE_DIR_NAME,
+ path.display()
+ );
+ }
+
+ // Create Butck Storage directory
+ if let Err(e) = std::fs::create_dir_all(&butck_dir) {
+ error!(
+ "Failed to create '{}' directory: {}",
+ BUTCK_STORAGE_DIR_NAME, e
+ );
+ exit(1);
+ }
+ Some(path)
+ }
+ }
+ }
+
+ // Get the ButckStorage directory based on context
+ fn find_butck_storage_dir(from: Option<PathBuf>) -> Option<PathBuf> {
+ let mut current_dir = match from {
+ Some(path) => path,
+ None => std::env::current_dir().ok()?,
+ };
+
+ loop {
+ let butck_dir = current_dir.join(BUTCK_STORAGE_DIR_NAME);
+ if butck_dir.is_dir() {
+ return Some(current_dir);
+ }
+
+ if !current_dir.pop() {
+ break;
+ }
+ }
+ None
+ }
+}
diff --git a/src/chunker/entry.rs b/src/chunker/entry.rs
new file mode 100644
index 0000000..4fdb1f8
--- /dev/null
+++ b/src/chunker/entry.rs
@@ -0,0 +1,39 @@
+use std::process::exit;
+
+use log::info;
+
+use crate::chunker::{
+ context::ButckContext,
+ rw::{self, error::ButckRWError},
+};
+
+pub async fn entry(ctx: ButckContext, args: Vec<String>) -> Result<(), ButckRWError> {
+ if let Some(subcommand) = args.first() {
+ return match subcommand.as_str() {
+ "write" => rw::storage::write(ctx).await,
+ "build" => rw::storage::build(ctx).await,
+ "policies" => {
+ butck_policies::policies()
+ .iter()
+ .for_each(|p| info!("{}", p));
+ return Ok(());
+ }
+ _ => {
+ print_help();
+ exit(1)
+ }
+ };
+ }
+ Ok(())
+}
+
+pub fn print_help() {
+ println!("{}", include_str!("../../resources/helps/butck.txt").trim());
+}
+
+pub fn print_version() {
+ println!(
+ "{}",
+ include_str!("../../resources/version_info.txt").trim()
+ );
+}
diff --git a/src/chunker/rw.rs b/src/chunker/rw.rs
new file mode 100644
index 0000000..85e734e
--- /dev/null
+++ b/src/chunker/rw.rs
@@ -0,0 +1,2 @@
+pub mod error;
+pub mod storage;
diff --git a/src/chunker/rw/error.rs b/src/chunker/rw/error.rs
new file mode 100644
index 0000000..7f263a5
--- /dev/null
+++ b/src/chunker/rw/error.rs
@@ -0,0 +1,61 @@
+use butck_policies::error::ChunkFailed;
+
+use crate::chunker::context::ButckContext;
+
+#[derive(Debug)]
+pub struct ButckRWError {
+ kind: ButckRWErrorKind,
+ ctx: ButckContext,
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum ButckRWErrorKind {
+ #[error("No butck storage found")]
+ NoButckStorageFound,
+
+ #[error("Chunking policy not specified")]
+ ChunkingPolicyNotSpecified,
+
+ #[error("Cannot enable both MemmapRead and StreamRead")]
+ ReadingMethodAmbiguous,
+
+ #[error("Multiple input files specified but only one output file allowed")]
+ OutputCountMismatch,
+
+ #[error("Invalid bidx file format")]
+ InvalidBidxFormat,
+
+ #[error("Chunk not found in storage: {0}")]
+ ChunkNotFound(String),
+
+ #[error("Failed to rebuild file: {0}")]
+ RebuildFailed(String),
+
+ #[error("Chunking failed: {0}")]
+ ChunkFailed(#[from] ChunkFailed),
+
+ #[error("IO error: {0}")]
+ IOError(#[from] std::io::Error),
+}
+
+impl std::fmt::Display for ButckRWError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{:?}", self.kind)
+ }
+}
+
+impl ButckRWError {
+ pub fn ctx(&self) -> &ButckContext {
+ &self.ctx
+ }
+
+ pub fn kind(&self) -> &ButckRWErrorKind {
+ &self.kind
+ }
+}
+
+impl ButckRWErrorKind {
+ pub fn pack(self, ctx: ButckContext) -> ButckRWError {
+ ButckRWError { kind: self, ctx }
+ }
+}
diff --git a/src/chunker/rw/storage.rs b/src/chunker/rw/storage.rs
new file mode 100644
index 0000000..13452d0
--- /dev/null
+++ b/src/chunker/rw/storage.rs
@@ -0,0 +1,88 @@
+pub mod build;
+pub mod write;
+
+pub use build::build;
+pub use write::write;
+
+use std::path::{Path, PathBuf};
+
+/// Information about a chunk for index file
+#[derive(Debug, Clone)]
+pub struct ChunkInfo {
+ /// Index of the chunk in the file
+ pub index: usize,
+ /// Hash of the chunk (hex string)
+ pub hash: String,
+ /// Size of the chunk in bytes
+ pub size: usize,
+ /// Start position in the original file
+ pub start: usize,
+ /// End position in the original file (exclusive)
+ pub end: usize,
+}
+
+/// 根据hash值计算chunk文件的存储路径
+///
+/// # 参数
+/// - `storage_dir`: 存储目录
+/// - `hash_hex`: chunk的hash值(16进制字符串)
+///
+/// # 返回
+/// 返回chunk文件的完整路径
+pub fn get_chunk_path(storage_dir: &Path, hash_hex: &str) -> PathBuf {
+ let first_slice = &hash_hex[0..2];
+ let second_slice = &hash_hex[2..4];
+ storage_dir
+ .join(first_slice)
+ .join(second_slice)
+ .join(hash_hex)
+}
+
+/// 根据hash字节数组计算chunk文件的存储路径
+///
+/// # 参数
+/// - `storage_dir`: 存储目录
+/// - `hash_bytes`: chunk的hash值(字节数组)
+///
+/// # 返回
+/// 返回chunk文件的完整路径
+pub fn get_chunk_path_from_bytes(storage_dir: &Path, hash_bytes: &[u8; 32]) -> PathBuf {
+ let hash_hex = hex::encode(hash_bytes);
+ get_chunk_path(storage_dir, &hash_hex)
+}
+
+/// 生成唯一的文件路径,如果文件已存在则添加数字后缀
+///
+/// # 参数
+/// - `output_dir`: 输出目录
+/// - `desired_filename`: 期望的文件名
+///
+/// # 返回
+/// 返回唯一的文件路径
+pub fn generate_unique_path(output_dir: &Path, desired_filename: &str) -> PathBuf {
+ let desired_path = output_dir.join(desired_filename);
+ let mut candidate = desired_path.clone();
+
+ let mut counter = 1;
+ while candidate.exists() {
+ let path_buf = PathBuf::from(desired_filename);
+ if let Some(stem) = path_buf.file_stem() {
+ if let Some(ext) = path_buf.extension() {
+ let ext_str = ext.to_string_lossy();
+ let new_name = if ext_str.is_empty() {
+ format!("{}_{}", stem.to_string_lossy(), counter)
+ } else {
+ format!("{}.{}_{}", stem.to_string_lossy(), ext_str, counter)
+ };
+ candidate = output_dir.join(new_name);
+ } else {
+ candidate = output_dir.join(format!("{}_{}", stem.to_string_lossy(), counter));
+ }
+ } else {
+ candidate = output_dir.join(format!("{}_{}", desired_filename, counter));
+ }
+ counter += 1;
+ }
+
+ candidate
+}
diff --git a/src/chunker/rw/storage/build.rs b/src/chunker/rw/storage/build.rs
new file mode 100644
index 0000000..7608b5c
--- /dev/null
+++ b/src/chunker/rw/storage/build.rs
@@ -0,0 +1,250 @@
+use futures::future::join_all;
+use just_progress::progress;
+use log::{error, info, trace};
+use memmap2::Mmap;
+use std::path::PathBuf;
+use tokio::{fs::File, io::AsyncWriteExt};
+
+use crate::{
+ chunker::{
+ constants::{BUTCK_INDEX_FILE_SUFFIX, BUTCK_INDEX_MAGIC},
+ context::ButckContext,
+ rw::error::{ButckRWError, ButckRWErrorKind},
+ rw::storage,
+ },
+ utils::size_display::size_display,
+};
+
+pub async fn build(ctx: ButckContext) -> Result<(), ButckRWError> {
+ if ctx.storage_path.is_none() {
+ return Err(ButckRWErrorKind::NoButckStorageFound.pack(ctx));
+ }
+ if ctx.file_paths.is_empty() {
+ return Err(
+ ButckRWErrorKind::RebuildFailed("No bidx files specified".to_string()).pack(ctx),
+ );
+ }
+
+ let tasks: Vec<_> = ctx
+ .file_paths
+ .iter()
+ .map(|bidx_path| async {
+ trace!(
+ "Preparing to rebuild from bidx file `{}`",
+ bidx_path.display()
+ );
+ rebuild_from_bidx(bidx_path, &ctx).await
+ })
+ .collect();
+
+ let results = join_all(tasks).await;
+
+ for result in results {
+ if let Err(e) = result {
+ return Err(e.pack(ctx));
+ }
+ }
+
+ Ok(())
+}
+
+async fn rebuild_from_bidx(
+ bidx_path: &PathBuf,
+ ctx: &ButckContext,
+) -> Result<(), ButckRWErrorKind> {
+ // Validate file extension
+ if let Some(ext) = bidx_path.extension()
+ && ext != BUTCK_INDEX_FILE_SUFFIX
+ {
+ return Err(ButckRWErrorKind::InvalidBidxFormat);
+ }
+
+ info!("Rebuilding from bidx file: {}", bidx_path.display());
+
+ // Read bidx file content
+ let bidx_content = if ctx.memmap_read {
+ let file = File::open(bidx_path).await?;
+ let mmap = unsafe { Mmap::map(&file)? };
+ mmap.to_vec()
+ } else {
+ tokio::fs::read(bidx_path).await?
+ };
+
+ // Verify file size includes at least the header
+ if bidx_content.len() < 6 {
+ return Err(ButckRWErrorKind::InvalidBidxFormat);
+ }
+
+ // Validate MAGIC bytes
+ if bidx_content[0..4] != BUTCK_INDEX_MAGIC {
+ return Err(ButckRWErrorKind::InvalidBidxFormat);
+ }
+
+ // Read filename
+ let filename_len = u16::from_le_bytes([bidx_content[4], bidx_content[5]]) as usize;
+ if bidx_content.len() < 6 + filename_len {
+ return Err(ButckRWErrorKind::InvalidBidxFormat);
+ }
+ let filename_bytes = &bidx_content[6..6 + filename_len];
+ let original_filename = String::from_utf8(filename_bytes.to_vec())
+ .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?;
+
+ trace!("Original filename from bidx: {}", original_filename);
+
+ let hash_data_start = 6 + filename_len;
+ let hash_data = &bidx_content[hash_data_start..];
+
+ // Verify that hash data size is a multiple of 32 bytes
+ if hash_data.len() % 32 != 0 {
+ return Err(ButckRWErrorKind::InvalidBidxFormat);
+ }
+
+ let chunk_count = hash_data.len() / 32;
+ info!("Found {} chunks in bidx file", chunk_count);
+
+ let mut chunk_hashes = Vec::with_capacity(chunk_count);
+ for i in 0..chunk_count {
+ let start = i * 32;
+ let end = start + 32;
+ let hash_bytes: [u8; 32] = hash_data[start..end]
+ .try_into()
+ .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?;
+ chunk_hashes.push(hash_bytes);
+ }
+
+ trace!("Parsed {} chunk hashes", chunk_hashes.len());
+
+ // Determine output file path
+ let output_path = if let Some(output_file) = &ctx.output_file {
+ output_file.clone()
+ } else {
+ // Use the original filename read from the bidx file
+ storage::generate_unique_path(&ctx.output_dir, &original_filename)
+ };
+
+ info!("Rebuilding file to: {}", output_path.display());
+
+ let progress_name = format!("Rebuild `{}`", output_path.display());
+ progress::update_progress(progress_name.as_str(), 0.0);
+ let step = 1.0 / chunk_count as f64;
+
+ let mut tasks = Vec::with_capacity(chunk_count);
+
+ for (index, hash_bytes) in chunk_hashes.iter().enumerate() {
+ let hash_hex = hex::encode(hash_bytes);
+ tasks.push(read_chunk(
+ progress_name.as_str(),
+ step,
+ hash_hex,
+ &ctx.output_dir,
+ index,
+ ));
+ }
+
+ trace!("Starting parallel read of {} chunks", tasks.len());
+ let results = join_all(tasks).await;
+ trace!("All read tasks completed");
+
+ // Collect chunk data and verify order
+ let mut chunk_data_list = Vec::with_capacity(chunk_count);
+ let mut success_count = 0;
+
+ for (index, result) in results.into_iter().enumerate() {
+ match result {
+ Ok(chunk_data) => {
+ let chunk_size = chunk_data.len();
+ success_count += 1;
+ chunk_data_list.push((index, chunk_data));
+ trace!(
+ "Chunk {} read successfully, size: {} bytes",
+ index, chunk_size
+ );
+ }
+ Err(e) => {
+ error!("Failed to read chunk {}: {:?}", index, e);
+ return Err(e);
+ }
+ }
+ }
+
+ if success_count != chunk_count {
+ return Err(ButckRWErrorKind::ChunkNotFound(format!(
+ "Only {}/{} chunks found in storage",
+ success_count, chunk_count
+ )));
+ }
+
+ info!("All {} chunks read successfully", success_count);
+
+ // Sort by index and concatenate files
+ chunk_data_list.sort_by_key(|(index, _)| *index);
+
+ // Calculate total size
+ let total_size: usize = chunk_data_list.iter().map(|(_, data)| data.len()).sum();
+ let (total_value, total_unit) = size_display(total_size);
+ info!(
+ "Rebuilding file: {} chunks, total size: {:.2} {} ({} bytes)",
+ chunk_count, total_value, total_unit, total_size
+ );
+
+ // Write to output file
+ trace!("Writing to output file: {}", output_path.display());
+ let mut output_file = File::create(&output_path).await?;
+
+ for (index, chunk_data) in chunk_data_list {
+ trace!("Writing chunk {} ({} bytes)", index, chunk_data.len());
+ output_file.write_all(&chunk_data).await?;
+ progress::increase(progress_name.as_str(), step as f32);
+ }
+
+ output_file.flush().await?;
+
+ info!("File successfully rebuilt: {}", output_path.display());
+ progress::complete(progress_name.as_str());
+
+ Ok(())
+}
+
+/// Read a single chunk from storage
+async fn read_chunk(
+ progress_name: &str,
+ step: f64,
+ hash_hex: String,
+ storage_dir: &PathBuf,
+ chunk_index: usize,
+) -> Result<Vec<u8>, ButckRWErrorKind> {
+ trace!("read_chunk[{}]: Starting, hash: {}", chunk_index, hash_hex);
+
+ // Build chunk file path
+ let file_path = storage::get_chunk_path(storage_dir, &hash_hex);
+
+ trace!(
+ "read_chunk[{}]: Looking for file at: {}",
+ chunk_index,
+ file_path.display()
+ );
+
+ // Read chunk file
+ match tokio::fs::read(&file_path).await {
+ Ok(data) => {
+ trace!(
+ "read_chunk[{}]: Read {} bytes successfully",
+ chunk_index,
+ data.len()
+ );
+ progress::increase(progress_name, step as f32);
+ Ok(data)
+ }
+ Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+ trace!("read_chunk[{}]: File not found", chunk_index);
+ Err(ButckRWErrorKind::ChunkNotFound(format!(
+ "Chunk {} (hash: {}) not found in storage",
+ chunk_index, hash_hex
+ )))
+ }
+ Err(e) => {
+ trace!("read_chunk[{}]: Read failed: {:?}", chunk_index, e);
+ Err(ButckRWErrorKind::IOError(e))
+ }
+ }
+}
diff --git a/src/chunker/rw/storage/write.rs b/src/chunker/rw/storage/write.rs
new file mode 100644
index 0000000..8b3acc7
--- /dev/null
+++ b/src/chunker/rw/storage/write.rs
@@ -0,0 +1,118 @@
+use std::{collections::HashMap, path::PathBuf};
+
+use log::trace;
+
+use crate::{
+ chunker::{
+ constants::BUTCK_INDEX_FILE_SUFFIX,
+ context::ButckContext,
+ rw::{
+ error::{ButckRWError, ButckRWErrorKind},
+ storage::generate_unique_path,
+ },
+ },
+ storage::{simple::write_file_simple, stream::write_file_stream},
+};
+
+pub mod simple;
+pub mod stream;
+
+pub async fn write(ctx: ButckContext) -> Result<(), ButckRWError> {
+ if ctx.storage_path.is_none() {
+ return Err(ButckRWErrorKind::NoButckStorageFound.pack(ctx));
+ }
+ if ctx.policy_name.is_none() {
+ return Err(ButckRWErrorKind::ChunkingPolicyNotSpecified.pack(ctx));
+ }
+ if ctx.file_paths.len() > 1 && ctx.output_file.is_some() {
+ return Err(ButckRWErrorKind::OutputCountMismatch.pack(ctx));
+ }
+
+ // Cannot enable both memory-mapped and stream reading simultaneously.
+ // Stream reading uses butck_policies::chunk_stream_with,
+ // while memory-mapped or default reading uses butck_policies::chunk_with.
+ if ctx.memmap_read && ctx.stream_read.is_some() {
+ return Err(ButckRWErrorKind::ReadingMethodAmbiguous.pack(ctx));
+ }
+
+ let param_refs: HashMap<&str, &str> = ctx
+ .params
+ .iter()
+ .map(|(k, v)| (k.as_str(), v.as_str()))
+ .collect();
+
+ let tasks: Vec<_> = ctx
+ .file_paths
+ .iter()
+ .map(|path| async {
+ trace!("Preparing to write file `{}`", path.display());
+ write_file(path, &ctx, &param_refs).await
+ })
+ .collect();
+
+ let results = futures::future::join_all(tasks).await;
+
+ for result in results {
+ if let Err(e) = result {
+ return Err(e.pack(ctx));
+ }
+ }
+
+ Ok(())
+}
+
+async fn write_file(
+ path: &PathBuf,
+ ctx: &ButckContext,
+ params: &HashMap<&str, &str>,
+) -> Result<(), ButckRWErrorKind> {
+ if let Some(stream_read_size) = ctx.stream_read {
+ write_file_stream(path, stream_read_size, ctx, params).await
+ } else {
+ write_file_simple(path, ctx, params).await
+ }
+}
+
+pub fn get_index_file_name(path: &PathBuf, ctx: &ButckContext) -> PathBuf {
+ let output_file = if let Some(output_file) = &ctx.output_file {
+ return output_file.clone();
+ } else {
+ ctx.output_dir.join(path.file_name().unwrap_or_default())
+ };
+
+ // Append .bidx suffix directly to the original file name
+ let desired_filename = if let Some(ext) = output_file.extension() {
+ let ext_str = ext.to_string_lossy();
+ if ext_str.is_empty() {
+ format!(
+ "{}.{}",
+ output_file
+ .file_stem()
+ .unwrap_or_default()
+ .to_string_lossy(),
+ BUTCK_INDEX_FILE_SUFFIX
+ )
+ } else {
+ format!(
+ "{}.{}.{}",
+ output_file
+ .file_stem()
+ .unwrap_or_default()
+ .to_string_lossy(),
+ ext_str,
+ BUTCK_INDEX_FILE_SUFFIX
+ )
+ }
+ } else {
+ format!(
+ "{}.{}",
+ output_file
+ .file_name()
+ .unwrap_or_default()
+ .to_string_lossy(),
+ BUTCK_INDEX_FILE_SUFFIX
+ )
+ };
+
+ generate_unique_path(&ctx.output_dir, &desired_filename)
+}
diff --git a/src/chunker/rw/storage/write/simple.rs b/src/chunker/rw/storage/write/simple.rs
new file mode 100644
index 0000000..75b9bd7
--- /dev/null
+++ b/src/chunker/rw/storage/write/simple.rs
@@ -0,0 +1,368 @@
+use futures::future::join_all;
+use just_progress::progress;
+use log::{error, info, trace};
+use std::{collections::HashMap, path::PathBuf};
+use tokio::{fs::File, io::AsyncReadExt};
+
+use crate::{
+ chunker::{
+ context::ButckContext,
+ rw::{error::ButckRWErrorKind, storage},
+ },
+ core::hash::ChunkWriteHash,
+ storage::get_index_file_name,
+ utils::size_display::size_display,
+};
+
+pub async fn write_file_simple(
+ path: &PathBuf,
+ ctx: &ButckContext,
+ params: &HashMap<&str, &str>,
+) -> Result<(), ButckRWErrorKind> {
+ read_file(path, ctx, params).await?;
+ Ok(())
+}
+
+async fn read_file(
+ path: &PathBuf,
+ ctx: &ButckContext,
+ params: &HashMap<&str, &str>,
+) -> Result<(), ButckRWErrorKind> {
+ let mut file = File::open(path).await?;
+
+ // Use butck_policies::chunk_with to locate chunk boundaries in the file
+ if ctx.memmap_read {
+ let mmap = unsafe { memmap2::Mmap::map(&file)? };
+ let raw_data = &mmap[..];
+ let (chunk_boundaries, total_bytes) =
+ (get_boundaries(raw_data, ctx, params).await?, raw_data.len());
+
+ // If output boundaries, do not execute actual write logic
+ if ctx.display_boundaries {
+ display_boundaries(&chunk_boundaries, total_bytes).await;
+ return Ok(());
+ } else {
+ write_file_to_storage(path, ctx, chunk_boundaries, raw_data).await?;
+ }
+ } else {
+ let mut contents = Vec::new();
+ file.read_to_end(&mut contents).await?;
+ let raw_data = &contents[..];
+ let (chunk_boundaries, total_bytes) =
+ (get_boundaries(raw_data, ctx, params).await?, raw_data.len());
+
+ // If output boundaries, do not execute actual write logic
+ if ctx.display_boundaries {
+ display_boundaries(&chunk_boundaries, total_bytes).await;
+ return Ok(());
+ } else {
+ write_file_to_storage(path, ctx, chunk_boundaries, raw_data).await?;
+ }
+ };
+ progress::clear_all();
+ Ok(())
+}
+
+async fn write_file_to_storage(
+ path: &PathBuf,
+ ctx: &ButckContext,
+ chunk_boundaries: Vec<u32>,
+ raw_data: &[u8],
+) -> Result<(), ButckRWErrorKind> {
+ let output_index_file = get_index_file_name(path, ctx);
+
+ let chunk_count = chunk_boundaries.len() + 1;
+ let progress_name = format!("Write `{}`", path.display());
+
+ progress::update_progress(progress_name.as_str(), 0.0);
+ let step = 1.0 / chunk_count as f64;
+
+ trace!("chunks_count={}", chunk_count);
+ trace!("chunk_hash={:?}", ctx.chunk_hash);
+ trace!("file_size={}", raw_data.len());
+ trace!("output_index_file={}", output_index_file.display());
+ trace!("policy_name={:?}", ctx.policy_name);
+ trace!("storage_dir={}", ctx.output_dir.display());
+
+ info!(
+ "{} chunks will be written to {}",
+ chunk_count,
+ ctx.output_dir.display()
+ );
+
+ tokio::fs::create_dir_all(&ctx.output_dir).await?;
+ trace!("Output directory created or already exists");
+
+ let mut tasks = Vec::new();
+ let mut start = 0;
+ let mut chunk_index = 0;
+
+ trace!("Processing chunk boundaries:");
+
+ for &boundary in &chunk_boundaries {
+ let end = boundary as usize;
+ if start < end && end <= raw_data.len() {
+ let chunk_data = &raw_data[start..end];
+ trace!(
+ "Chunk {}: bytes {}..{} (size: {} bytes)",
+ chunk_index,
+ start,
+ end - 1,
+ end - start
+ );
+ tasks.push(write_chunk(
+ progress_name.as_str(),
+ step,
+ chunk_data,
+ &ctx.output_dir,
+ &ctx.chunk_hash,
+ chunk_index,
+ start,
+ end,
+ ));
+ chunk_index += 1;
+ } else {
+ trace!(
+ "Skipping invalid chunk boundary: start={}, end={}, data_len={}",
+ start,
+ end,
+ raw_data.len()
+ );
+ }
+ start = end;
+ }
+
+ if start < raw_data.len() {
+ let chunk_data = &raw_data[start..];
+ trace!(
+ "Chunk {}: bytes {}..{} (size: {} bytes) - final chunk",
+ chunk_index,
+ start,
+ raw_data.len() - 1,
+ raw_data.len() - start
+ );
+ tasks.push(write_chunk(
+ progress_name.as_str(),
+ step,
+ chunk_data,
+ &ctx.output_dir,
+ &ctx.chunk_hash,
+ chunk_index,
+ start,
+ raw_data.len(),
+ ));
+ }
+
+ trace!("Total chunks prepared for writing: {}", tasks.len());
+
+ trace!("Starting parallel write of {} chunks", tasks.len());
+ let results = join_all(tasks).await;
+ trace!("All write tasks completed");
+
+ let mut success_count = 0;
+ let mut chunk_infos = Vec::new();
+
+ for result in results {
+ match result {
+ Ok(chunk_info) => {
+ success_count += 1;
+ chunk_infos.push(chunk_info);
+ }
+ Err(e) => {
+ trace!("Chunk write failed: {:?}", e);
+ return Err(e);
+ }
+ }
+ }
+
+ info!("All {} chunks written successfully", success_count);
+
+ // Write index file
+ trace!("Writing index file to: {}", output_index_file.display());
+ if let Err(e) = write_index_file(&output_index_file, &chunk_infos, path).await {
+ error!("Failed to write index file: {}", e);
+ return Err(ButckRWErrorKind::IOError(e));
+ }
+ info!("Index file written to: {}", output_index_file.display());
+
+ trace!("write_file_to_storage completed successfully");
+
+ progress::complete(progress_name.as_str());
+
+ Ok(())
+}
+
+async fn write_chunk(
+ progress_name: &str,
+ step: f64,
+ chunk_data: &[u8],
+ output_dir: &PathBuf,
+ chunk_hash: &ChunkWriteHash,
+ chunk_index: usize,
+ start: usize,
+ end: usize,
+) -> Result<crate::chunker::rw::storage::ChunkInfo, ButckRWErrorKind> {
+ trace!(
+ "write_chunk[{}]: Starting, data size: {} bytes",
+ chunk_index,
+ chunk_data.len()
+ );
+
+ trace!(
+ "write_chunk[{}]: Computing hash with algorithm: {:?}",
+ chunk_index, chunk_hash
+ );
+ let hash_bytes = chunk_hash.hash(chunk_data);
+ trace!(
+ "write_chunk[{}]: Hash computed: {:?}",
+ chunk_index, hash_bytes
+ );
+
+ let hash_hex = hex::encode(hash_bytes);
+ trace!("write_chunk[{}]: Hash hex: {}", chunk_index, hash_hex);
+
+ let file_path = storage::get_chunk_path(output_dir, &hash_hex);
+
+ if let Some(parent_dir) = file_path.parent() {
+ trace!(
+ "write_chunk[{}]: Creating directory structure: {}",
+ chunk_index,
+ parent_dir.display()
+ );
+ tokio::fs::create_dir_all(parent_dir).await?;
+ trace!("write_chunk[{}]: Directory created", chunk_index);
+ }
+
+ trace!(
+ "write_chunk[{}]: File path: {}",
+ chunk_index,
+ file_path.display()
+ );
+
+ trace!(
+ "write_chunk[{}]: Writing {} bytes to file",
+ chunk_index,
+ chunk_data.len()
+ );
+ if !file_path.exists() {
+ tokio::fs::write(&file_path, chunk_data).await?;
+ } else {
+ trace!(
+ "write_chunk[{}]: File already exists, skipping",
+ chunk_index
+ );
+ }
+ trace!("write_chunk[{}]: File written successfully", chunk_index);
+ progress::increase(progress_name, step as f32);
+ Ok(crate::chunker::rw::storage::ChunkInfo {
+ index: chunk_index,
+ hash: hash_hex,
+ size: chunk_data.len(),
+ start,
+ end,
+ })
+}
+
+async fn get_boundaries<'a>(
+ raw_data: &[u8],
+ ctx: &ButckContext,
+ params: &HashMap<&str, &str>,
+) -> Result<Vec<u32>, ButckRWErrorKind> {
+ let policy_name = ctx.policy_name.as_ref().unwrap().as_str();
+ match butck_policies::chunk_with(policy_name, raw_data, params).await {
+ Ok(s) => Ok(s),
+ Err(e) => Err(ButckRWErrorKind::ChunkFailed(e)),
+ }
+}
+
+async fn write_index_file(
+ index_path: &PathBuf,
+ chunk_infos: &[crate::chunker::rw::storage::ChunkInfo],
+ original_file_path: &PathBuf,
+) -> Result<(), std::io::Error> {
+ use std::io::Write;
+
+ let file = std::fs::File::create(index_path)?;
+ let mut writer = std::io::BufWriter::new(file);
+
+ // Write header: [u8; 4] magic + [u16] filename length + [u8] filename bytes
+ use crate::chunker::constants::BUTCK_INDEX_MAGIC;
+
+ // Write magic bytes
+ writer.write_all(&BUTCK_INDEX_MAGIC)?;
+
+ // Get original filename as bytes
+ let filename = original_file_path
+ .file_name()
+ .and_then(|n| n.to_str())
+ .unwrap_or("unknown");
+ let filename_bytes = filename.as_bytes();
+
+ // Write filename length as u16 (little-endian)
+ if filename_bytes.len() > u16::MAX as usize {
+ return Err(std::io::Error::new(
+ std::io::ErrorKind::InvalidInput,
+ format!("Filename too long: {} bytes", filename_bytes.len()),
+ ));
+ }
+ let filename_len = filename_bytes.len() as u16;
+ writer.write_all(&filename_len.to_le_bytes())?;
+
+ // Write filename bytes
+ writer.write_all(filename_bytes)?;
+
+ // Write chunk hashes: [u8; 32][u8; 32][u8; 32]...
+ for chunk_info in chunk_infos {
+ // Convert hex hash to bytes
+ match hex::decode(&chunk_info.hash) {
+ Ok(hash_bytes) => {
+ if hash_bytes.len() == 32 {
+ writer.write_all(&hash_bytes)?;
+ } else {
+ // Pad or truncate to 32 bytes if needed
+ let mut fixed_hash = [0u8; 32];
+ let len = hash_bytes.len().min(32);
+ fixed_hash[..len].copy_from_slice(&hash_bytes[..len]);
+ writer.write_all(&fixed_hash)?;
+ }
+ }
+ Err(e) => {
+ return Err(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ format!("Failed to decode hash hex: {}", e),
+ ));
+ }
+ }
+ }
+
+ Ok(())
+}
+
+async fn display_boundaries(chunk_boundaries: &Vec<u32>, total_bytes: usize) {
+ let total_chunks = chunk_boundaries.len() + 1;
+ let (total_value, total_unit) = size_display(total_bytes);
+ info!(
+ "{} chunks, ({:.2} {}, {})",
+ total_chunks, total_value, total_unit, total_bytes
+ );
+ let mut start = 0;
+ chunk_boundaries.iter().for_each(|p| {
+ let next = *p as usize;
+ let (size_value, size_unit) = size_display(next - start);
+ info!(
+ "{} - {} (size: {:.2} {})",
+ start,
+ next - 1,
+ size_value,
+ size_unit
+ );
+ start = next;
+ });
+ let last = start;
+ let r#final = total_bytes;
+ let (size_value, size_unit) = size_display(total_bytes - start);
+ info!(
+ "{} - {} (size: {:.2} {})",
+ last, r#final, size_value, size_unit
+ );
+}
diff --git a/src/chunker/rw/storage/write/stream.rs b/src/chunker/rw/storage/write/stream.rs
new file mode 100644
index 0000000..020cfcd
--- /dev/null
+++ b/src/chunker/rw/storage/write/stream.rs
@@ -0,0 +1,12 @@
+use std::{collections::HashMap, path::PathBuf};
+
+use crate::chunker::{context::ButckContext, rw::error::ButckRWErrorKind};
+
+pub async fn write_file_stream(
+ path: &PathBuf,
+ stream_read_size: u32,
+ ctx: &ButckContext,
+ params: &HashMap<&str, &str>,
+) -> Result<(), ButckRWErrorKind> {
+ todo!()
+}
diff --git a/src/core.rs b/src/core.rs
new file mode 100644
index 0000000..ec5d33c
--- /dev/null
+++ b/src/core.rs
@@ -0,0 +1 @@
+pub mod hash;
diff --git a/src/core/hash.rs b/src/core/hash.rs
new file mode 100644
index 0000000..36a62b3
--- /dev/null
+++ b/src/core/hash.rs
@@ -0,0 +1,38 @@
+use blake3::Hasher as Blake3Hasher;
+use sha2::{Digest as Sha2Digest, Sha256};
+
+const SALT: &[u8] = b"Dude@";
+
+#[derive(Debug, Default)]
+pub enum ChunkWriteHash {
+ #[default]
+ Blake3,
+ Sha256,
+}
+
+impl ChunkWriteHash {
+ pub fn hash(&self, d: &[u8]) -> [u8; 32] {
+ match self {
+ ChunkWriteHash::Blake3 => hash_blake3(d),
+ ChunkWriteHash::Sha256 => hash_sha256(d),
+ }
+ }
+}
+
+/// Compute the Blake3 hash of the data with a salt
+/// Returns a 32-byte hash value
+pub fn hash_blake3(d: &[u8]) -> [u8; 32] {
+ let mut hasher = Blake3Hasher::new();
+ hasher.update(SALT);
+ hasher.update(d);
+ *hasher.finalize().as_bytes()
+}
+
+/// Compute the SHA-256 hash of the data with a salt
+/// Returns a 32-byte hash value
+pub fn hash_sha256(d: &[u8]) -> [u8; 32] {
+ let mut hasher = Sha256::new();
+ hasher.update(SALT);
+ hasher.update(d);
+ hasher.finalize().into()
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..e4a55c2
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,11 @@
+pub mod chunker;
+pub mod core;
+pub mod log;
+pub mod macros;
+pub mod utils;
+
+pub mod storage {
+ pub use crate::chunker::rw::error::*;
+ pub use crate::chunker::rw::storage::build::*;
+ pub use crate::chunker::rw::storage::write::*;
+}
diff --git a/src/log.rs b/src/log.rs
new file mode 100644
index 0000000..5fc6160
--- /dev/null
+++ b/src/log.rs
@@ -0,0 +1,33 @@
+use env_logger::Builder;
+use log::Level;
+use std::io::Write;
+
+pub fn init_logger(level_filter: Option<log::LevelFilter>) {
+ let mut builder = match level_filter {
+ Some(f) => {
+ let mut b = Builder::new();
+ b.filter_level(f);
+ b
+ }
+ None => return,
+ };
+
+ builder
+ .format(|buf, record| {
+ let level = record.level();
+ let args = record.args();
+
+ let (prefix, color_code) = match level {
+ Level::Error => ("error: ", "\x1b[1;31m"),
+ Level::Warn => ("warn: ", "\x1b[1;33m"),
+ Level::Info => ("", "\x1b[37m"),
+ Level::Debug => ("debug: ", "\x1b[90m"),
+ Level::Trace => ("trace: ", "\x1b[36m"),
+ };
+
+ let colored_prefix = format!("{}{}\x1b[0m", color_code, prefix);
+
+ writeln!(buf, "{}{}", colored_prefix, args)
+ })
+ .init();
+}
diff --git a/src/macros.rs b/src/macros.rs
new file mode 100644
index 0000000..11b8da4
--- /dev/null
+++ b/src/macros.rs
@@ -0,0 +1,47 @@
+#[macro_export]
+macro_rules! special_flag {
+ ($args:expr, $($flag:expr),+) => {{
+ let mut found = false;
+ $(
+ let flag = $flag;
+ if $args.iter().any(|arg| arg == flag) {
+ found = true;
+ }
+ $args.retain(|arg| arg != flag);
+ )+
+ found
+ }};
+}
+
+#[macro_export]
+macro_rules! special_argument {
+ ($args:expr, $($flag:expr),+) => {{
+ let mut value: Option<String> = None;
+ let mut found = false;
+ $(
+ let flag = $flag;
+ if !found {
+ let mut i = 0;
+ while i < $args.len() {
+ if $args[i] == flag {
+ if i + 1 < $args.len() {
+ value = Some($args[i + 1].clone());
+ $args.remove(i + 1);
+ $args.remove(i);
+ } else {
+ value = None;
+ $args.remove(i);
+ }
+ #[allow(unused_assignments)]
+ {
+ found = true;
+ }
+ break;
+ }
+ i += 1;
+ }
+ }
+ )+
+ value
+ }};
+}
diff --git a/src/utils.rs b/src/utils.rs
new file mode 100644
index 0000000..b64c0c4
--- /dev/null
+++ b/src/utils.rs
@@ -0,0 +1,2 @@
+pub mod file_input_solve;
+pub mod size_display;
diff --git a/src/utils/file_input_solve.rs b/src/utils/file_input_solve.rs
new file mode 100644
index 0000000..30d5765
--- /dev/null
+++ b/src/utils/file_input_solve.rs
@@ -0,0 +1,82 @@
+use std::{
+ env::current_dir,
+ path::{Path, PathBuf},
+};
+
+use just_fmt::fmt_path::fmt_path;
+
+pub fn parse_path_input(
+ files: Vec<String>,
+ recursive: bool,
+ exclude_dir: Vec<&str>,
+) -> Vec<PathBuf> {
+ let current_dir = current_dir().unwrap();
+ let files = if recursive {
+ let mut result: Vec<PathBuf> = Vec::new();
+ for arg in files.iter().skip(1) {
+ if exclude_dir.contains(&arg.as_str()) {
+ continue;
+ }
+ let path = current_dir.join(arg);
+ if path.is_dir() {
+ if let Err(e) = collect_files_recursively(&path, &mut result) {
+ eprintln!("Error collecting files recursively: {}", e);
+ continue;
+ }
+ } else {
+ result.push(path);
+ }
+ }
+ result
+ } else {
+ let mut result = Vec::new();
+ for arg in files.iter().skip(1) {
+ if exclude_dir.contains(&arg.as_str()) {
+ continue;
+ }
+ let path = current_dir.join(arg);
+ if path.is_dir() {
+ if files.len() == 2 {
+ for entry in std::fs::read_dir(&path)
+ .unwrap_or_else(|e| {
+ eprintln!("Error reading directory: {}", e);
+ std::fs::read_dir(".").unwrap()
+ })
+ .flatten()
+ {
+ let entry_path = entry.path();
+ if !entry_path.is_dir() {
+ result.push(entry_path);
+ }
+ }
+ }
+ } else {
+ result.push(path);
+ }
+ }
+ result
+ };
+ files
+ .into_iter()
+ .filter_map(|path| match fmt_path(path) {
+ Ok(formatted_path) => Some(formatted_path),
+ Err(e) => {
+ eprintln!("Error formatting path: {}", e);
+ None
+ }
+ })
+ .collect()
+}
+
+fn collect_files_recursively(dir: &Path, files: &mut Vec<PathBuf>) -> std::io::Result<()> {
+ for entry in std::fs::read_dir(dir)? {
+ let entry = entry?;
+ let path = entry.path();
+ if path.is_dir() {
+ collect_files_recursively(&path, files)?;
+ } else {
+ files.push(path);
+ }
+ }
+ Ok(())
+}
diff --git a/src/utils/size_display.rs b/src/utils/size_display.rs
new file mode 100644
index 0000000..3e2bc29
--- /dev/null
+++ b/src/utils/size_display.rs
@@ -0,0 +1,14 @@
+pub fn size_display<'a>(total_bytes: usize) -> (f64, &'a str) {
+ let total_bytes = total_bytes as f64;
+ if total_bytes >= 1024.0 * 1024.0 * 1024.0 * 1024.0 {
+ (total_bytes / (1024.0 * 1024.0 * 1024.0 * 1024.0), "TB")
+ } else if total_bytes >= 1024.0 * 1024.0 * 1024.0 {
+ (total_bytes / (1024.0 * 1024.0 * 1024.0), "GB")
+ } else if total_bytes >= 1024.0 * 1024.0 {
+ (total_bytes / (1024.0 * 1024.0), "MB")
+ } else if total_bytes >= 1024.0 {
+ (total_bytes / 1024.0, "KB")
+ } else {
+ (total_bytes, "B")
+ }
+}