diff options
| author | 魏曹先生 <1992414357@qq.com> | 2026-03-04 21:26:04 +0800 |
|---|---|---|
| committer | 魏曹先生 <1992414357@qq.com> | 2026-03-04 21:35:09 +0800 |
| commit | 22926ce29e3f8e040ec349401aeb6a77f32eae72 (patch) | |
| tree | 678753ec49a61fb9d3e2d8e869393dec90ea7ef4 | |
Initialize Butchunker project structure and policy system
42 files changed, 3824 insertions, 0 deletions
diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..e0fdf5b --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +target-dir = "./.temp/target/" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e0fdece --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.temp diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..eb87eab --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,889 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "blake3" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "butchunker" +version = "0.1.0" +dependencies = [ + "blake3", + "butck_policies", + "colored", + "env_logger", + "futures", + "hex", + "just_fmt", + "just_progress", + "just_template", + "log", + "memmap2", + "sha2", + "syn", + "thiserror", + "tokio", + "toml", +] + +[[package]] +name = "butck_fixed_size" +version = "0.1.0" + +[[package]] +name = "butck_policies" +version = "0.1.0" +dependencies = [ + "butck_fixed_size", + "thiserror", + "tokio", +] + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "env_filter" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "jiff" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819b44bc7c87d9117eb522f14d46e918add69ff12713c475946b0a29363ed1c2" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", +] + +[[package]] +name = "jiff-static" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "470252db18ecc35fd766c0891b1e3ec6cbbcd62507e85276c01bf75d8e94d4a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "just_fmt" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5454cda0d57db59778608d7a47bff5b16c6705598265869fb052b657f66cf05e" + +[[package]] +name = "just_progress" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef1a564328a5061a4828b4f82b7275a7f3dbc7d4ed5778da986f6ab48563c88" +dependencies = [ + "tokio", +] + +[[package]] +name = "just_template" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3edb658c34b10b69c4b3b58f7ba989cd09c82c0621dee1eef51843c2327225" +dependencies = [ + "just_fmt", +] + +[[package]] +name = "libc" +version = "0.2.182" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_spanned" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" +dependencies = [ + "serde_core", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio" +version = "1.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "toml" +version = "1.0.3+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c" +dependencies = [ + "indexmap", + "serde_core", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + +[[package]] +name = "toml_datetime" +version = "1.0.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_parser" +version = "1.0.9+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_writer" +version = "1.0.6+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..cfab2b8 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "butchunker" +version = "0.1.0" +edition = "2024" +default-run = "butckrepo-guide" + +[dependencies] +butck_policies = { path = "policy/_policies" } + +# Code generate +just_template = "0.1.3" + +# Memory mapping +memmap2 = "0.9" + +# Error +thiserror = "2" + +# Syntax +just_fmt = "0.1.2" +syn = { version = "2", features = ["full"] } + +# Async +futures = "0.3" +tokio = { version = "1", features = ["full"] } + +# Display +colored = "3" +just_progress = "0.1.3" + +# Serialize & Config +toml = "1" + +# Logging +log = "0.4" +env_logger = "0.11" + +# Hashing +blake3 = "1.8" +sha2 = "0.10" +hex = "0.4" diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..f630f06 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2026 Butchunker Team, Weicao-CatilGrass + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..f4814bc --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Butchunker Team, Weicao-CatilGrass + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/policy/README.md b/policy/README.md new file mode 100644 index 0000000..c995506 --- /dev/null +++ b/policy/README.md @@ -0,0 +1,96 @@ +# Write your Policy! + +Welcome to the Butchunker Policy Development Guide. This guide explains how to create a custom chunking policy for Butchunker. A chunking policy defines how to split data streams or files into chunks. This is a core task for data deduplication, storage, and transfer. + +Before starting, you should know basic Rust and understand the Butchunker framework. Your policy will decide where to split the data based on its content and your settings. + +## Creating a Policy Crate + +First, create a new `Rust Crate` to host your chunking policy. + +### Writing `Cargo.toml` + +```toml +[package] +name = "butck_fixed_size" # Policy name +authors = ["Butchunker"] # Author info +version = "0.1.0" +edition = "2024" + +[dependencies] +``` + +## Implementing Policy Logic + +### Writing `src/lib.rs` + +In `src/lib.rs`, implement one or both of the following schemes: + +#### Scheme 1: Streaming Processing Scheme + +Suitable for processing large files where subsequent content cannot be predicted, but also does not require loading the entire file into memory. + +```rust +use std::collections::HashMap; + +// Streaming policy struct (must implement the Default trait) +#[derive(Default)] +pub struct YourPolicyStream { + // Define your state fields here +} + +// Streaming processing function +pub async fn your_policy_stream( + current_data: &[u8], // Current data chunk + len: u32, // Data length + stream: &mut FixedSizeStream, // Streaming processing context + params: &HashMap<&str, &str>, // Configuration parameters +) -> Option<u32> { + // Implement your chunking logic + // Return the split position (offset from the start of current_data), or None if no split + None +} +``` + +#### Scheme 2: Simple Processing Scheme + +Suitable for processing small to medium-sized files that can be loaded entirely at once, allowing knowledge of subsequent data during chunking for better results. + +```rust +use std::collections::HashMap; + +// Simple processing function +pub async fn your_policy( + raw_data: &[u8], // Raw data + params: &HashMap<&str, &str>, // Configuration parameters +) -> Vec<u32> { + // Implement your chunking logic + // Return a vector of all split positions (offsets from the start of raw_data) + vec![] +} +``` + +## Registration and Usage + +### Deploying the Policy + +1. Place the completed policy `Crate` into the `./policy/` directory of the Butchunker repository. +2. Use the `butckrepo-refresh` program to refresh the registry: + - If the program is not yet installed, you can execute the following in the root directory of the Butchunker repository: + + ```bash + cargo install --path ./ + ``` +3. After each policy library update, you must: + - Execute `butckrepo-refresh` to refresh the registry. + - Reinstall the `butck` binary: `cargo install --path ./`. + +### Calling the Policy + +- The policy will be automatically registered in Butchunker's registry. + + Use the following command to call the policy: + + ````rust + butck write <file> --policy <policy_name> --storage ./ + ```` diff --git a/policy/_policies/Cargo.lock b/policy/_policies/Cargo.lock new file mode 100644 index 0000000..8f7bc05 --- /dev/null +++ b/policy/_policies/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "butck_policies" +version = "0.1.0" diff --git a/policy/_policies/Cargo.toml b/policy/_policies/Cargo.toml new file mode 100644 index 0000000..d939dd2 --- /dev/null +++ b/policy/_policies/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "butck_policies" +version = "0.1.0" +edition = "2024" + +[dependencies] +thiserror = "2" +tokio = { version = "1", features = ["fs"] } + +# Auto generated dependencies +# If you find issues with the dependencies, please +# 1. Delete all code after this comment +# 2. Clear the file `policy/_policies/src/lib.rs` +# 3. Run `cargo run --bin butckrepo-refresh` in the Butchunker root directory +butck_fixed_size = { path = "../../policy/butck/butck_fixed_size" }
\ No newline at end of file diff --git a/policy/_policies/Cargo.toml.t b/policy/_policies/Cargo.toml.t new file mode 100644 index 0000000..aab90b9 --- /dev/null +++ b/policy/_policies/Cargo.toml.t @@ -0,0 +1,19 @@ +[package] +name = "butck_policies" +version = "0.1.0" +edition = "2024" + +[dependencies] +thiserror = "2" +tokio = { version = "1", features = ["fs"] } + +# Auto generated dependencies +# If you find issues with the dependencies, please +# 1. Delete all code after this comment +# 2. Clear the file `policy/_policies/src/lib.rs` +# 3. Run `cargo run --bin butckrepo-refresh` in the Butchunker root directory +>>>>>>>>>> deps + +@@@ >>> deps +<<<crate_name>>> = { path = "../../<<<path>>>" } +@@@ <<< diff --git a/policy/_policies/src/error.rs b/policy/_policies/src/error.rs new file mode 100644 index 0000000..975749d --- /dev/null +++ b/policy/_policies/src/error.rs @@ -0,0 +1,14 @@ +#[derive(Debug, thiserror::Error)] +pub enum ChunkFailed { + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("Target policy not found")] + PolicyNotFound, + + #[error("File read failed: {0}")] + FileReadFailed(std::path::PathBuf), + + #[error("File open failed: {0}")] + FileOpenFailed(std::path::PathBuf), +} diff --git a/policy/_policies/src/lib.rs b/policy/_policies/src/lib.rs new file mode 100644 index 0000000..397579a --- /dev/null +++ b/policy/_policies/src/lib.rs @@ -0,0 +1,75 @@ +// Auto generated dependencies +// If you find issues with the dependencies, please +// 1. Delete all code after this comment +// 2. Clear the auto generated part in `policy/_policies/Cargo.toml` +// 3. Run `cargo run --bin butckrepo-refresh` in the Butchunker root directory +pub mod error; +pub mod stream_read; + +use error::ChunkFailed; +use std::{collections::HashMap, path::Path}; + +use crate::stream_read::chunk_stream_process; + +/// Chunks the specified raw data using the specified chunking policy +/// +/// # Parameters +/// - `policy_name`: Chunking policy name, currently supports 1 policies +/// - `raw_data`: Raw data byte slice +/// - `params`: Hashmap of parameters required by the chunking policy +pub async fn chunk_with( + policy_name: &str, + raw_data: &[u8], + params: &HashMap<&str, &str>, +) -> Result<Vec<u32>, ChunkFailed> { + match policy_name { + "butck_fixed_size" => Ok(butck_fixed_size::chunk(raw_data, params).await), + _ => Err(ChunkFailed::PolicyNotFound), + } +} + +pub async fn chunk_stream_with( + policy_name: &str, + size: u32, + path: &Path, + params: &HashMap<&str, &str>, +) -> Result<Vec<u32>, ChunkFailed> { + match policy_name { + "butck_fixed_size" => { + let mut stream = butck_fixed_size::FixedSizeStream::default(); + chunk_stream_process( + path, &mut stream, size, params, + async |current_data, len, stream, params| { + butck_fixed_size::chunk_stream(current_data, len, stream, params).await + }, + ) + .await + } + _ => Err(ChunkFailed::PolicyNotFound), + } +} + +pub fn policies() -> Vec<&'static str> { + vec![ + // butck_fixed_size + "butck_fixed_size", + ] +} + +pub mod butck_fixed_size { + pub use butck_fixed_size::FixedSizeStream; + use std::collections::HashMap; + + pub async fn chunk(raw_data: &[u8], params: &HashMap<&str, &str>) -> Vec<u32> { + butck_fixed_size::chunk_fixed_size(raw_data, params).await + } + + pub async fn chunk_stream( + current_data: &[u8], + len: u32, + stream: &mut butck_fixed_size::FixedSizeStream, + params: &HashMap<&str, &str>, + ) -> Option<u32> { + butck_fixed_size::chunk_fixed_size_stream(current_data, len, stream, params).await + } +}
\ No newline at end of file diff --git a/policy/_policies/src/lib.rs.t b/policy/_policies/src/lib.rs.t new file mode 100644 index 0000000..873a4cd --- /dev/null +++ b/policy/_policies/src/lib.rs.t @@ -0,0 +1,117 @@ +// Auto generated dependencies +// If you find issues with the dependencies, please +// 1. Delete all code after this comment +// 2. Clear the auto generated part in `policy/_policies/Cargo.toml` +// 3. Run `cargo run --bin butckrepo-refresh` in the Butchunker root directory +pub mod error; +pub mod stream_read; + +use error::ChunkFailed; +use std::{collections::HashMap, path::Path}; + +use crate::stream_read::chunk_stream_process; + +/// Chunks the specified raw data using the specified chunking policy +/// +/// # Parameters +/// - `policy_name`: Chunking policy name, currently supports <<<policy_count>>> policies +/// - `raw_data`: Raw data byte slice +/// - `params`: Hashmap of parameters required by the chunking policy +pub async fn chunk_with( + policy_name: &str, + raw_data: &[u8], + params: &HashMap<&str, &str>, +) -> Result<Vec<u32>, ChunkFailed> { + match policy_name { +>>>>>>>>>> match_arms + _ => Err(ChunkFailed::PolicyNotFound), + } +} + +pub async fn chunk_stream_with( + policy_name: &str, + size: u32, + path: &Path, + params: &HashMap<&str, &str>, +) -> Result<Vec<u32>, ChunkFailed> { + match policy_name { +>>>>>>>>>> match_arms_stream + _ => Err(ChunkFailed::PolicyNotFound), + } +} + +pub fn policies() -> Vec<&'static str> { + vec![ +>>>>>>>>>> policy_names + ] +} + +>>>>>>>>>> exports_simple +>>>>>>>>>> exports_stream +>>>>>>>>>> exports_both + +@@@ >>> match_arms + "<<<crate_name>>>" => Ok(<<<crate_name>>>::chunk(raw_data, params).await), +@@@ <<< + +@@@ >>> match_arms_stream + "<<<crate_name>>>" => { + let mut stream = <<<stream_struct_id>>>::default(); + chunk_stream_process( + path, &mut stream, size, params, + async |current_data, len, stream, params| { + <<<crate_name>>>::chunk_stream(current_data, len, stream, params).await + }, + ) + .await + } +@@@ <<< + +@@@ >>> policy_names + // <<<name>>> + "<<<name>>>", +@@@ <<< + +@@@ >>> exports_simple +pub mod <<<crate_name>>> { + use std::collections::HashMap; + pub async fn chunk(raw_data: &[u8], params: &HashMap<&str, &str>) -> Vec<u32> { + <<<crate_name>>>::<<<matched_func>>>(raw_data, params)<<<has_await>>> + } +} +@@@ <<< + +@@@ >>> exports_stream +pub mod <<<crate_name>>> { + pub use <<<stream_struct_id>>>; + + pub async fn chunk_stream( + current_data: &[u8], + len: u32, + stream: &mut <<<stream_struct_id>>>, + params: &std::collections::HashMap<&str, &str>, + ) -> Option<u32> { + <<<crate_name>>>::<<<matched_func_stream>>>(current_data, len, stream, params)<<<has_await_stream>>> + } +} +@@@ <<< + +@@@ >>> exports_both +pub mod <<<crate_name>>> { + pub use <<<stream_struct_id>>>; + use std::collections::HashMap; + + pub async fn chunk(raw_data: &[u8], params: &HashMap<&str, &str>) -> Vec<u32> { + <<<crate_name>>>::<<<matched_func>>>(raw_data, params)<<<has_await>>> + } + + pub async fn chunk_stream( + current_data: &[u8], + len: u32, + stream: &mut <<<stream_struct_id>>>, + params: &HashMap<&str, &str>, + ) -> Option<u32> { + <<<crate_name>>>::<<<matched_func_stream>>>(current_data, len, stream, params)<<<has_await_stream>>> + } +} +@@@ <<< diff --git a/policy/_policies/src/stream_read.rs b/policy/_policies/src/stream_read.rs new file mode 100644 index 0000000..5cf7791 --- /dev/null +++ b/policy/_policies/src/stream_read.rs @@ -0,0 +1,46 @@ +use crate::error::ChunkFailed; +use std::{collections::HashMap, path::Path}; + +pub async fn chunk_stream_process<T, F>( + path: &Path, + stream_data: &mut T, + size: u32, + params: &HashMap<&str, &str>, + chunk_func: F, +) -> Result<Vec<u32>, ChunkFailed> +where + T: Default, + F: AsyncFn(&[u8], u32, &mut T, &HashMap<&str, &str>) -> Option<u32>, +{ + let mut file = tokio::fs::File::open(path) + .await + .map_err(|_| ChunkFailed::FileOpenFailed(path.to_path_buf()))?; + let mut buffer = vec![0u8; size as usize]; + let mut splits = Vec::new(); + let mut total_read = 0; + + loop { + let bytes_read = tokio::io::AsyncReadExt::read(&mut file, &mut buffer) + .await + .map_err(|_| ChunkFailed::FileReadFailed(path.to_path_buf()))?; + + if bytes_read == 0 { + break Ok(splits); + } + + // Process chunking on the buffer slice + let chunk_result = chunk_func( + &buffer[..bytes_read], + bytes_read as u32, + stream_data, + params, + ) + .await; + + if let Some(offset) = chunk_result { + splits.push(total_read + offset); + } + + total_read += bytes_read as u32; + } +} diff --git a/policy/butck/butck_fixed_size/Cargo.lock b/policy/butck/butck_fixed_size/Cargo.lock new file mode 100644 index 0000000..c1e1873 --- /dev/null +++ b/policy/butck/butck_fixed_size/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "butck_fixed_size" +version = "0.1.0" diff --git a/policy/butck/butck_fixed_size/Cargo.toml b/policy/butck/butck_fixed_size/Cargo.toml new file mode 100644 index 0000000..1550cb9 --- /dev/null +++ b/policy/butck/butck_fixed_size/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "butck_fixed_size" +authors = ["Butchunker"] +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/policy/butck/butck_fixed_size/src/lib.rs b/policy/butck/butck_fixed_size/src/lib.rs new file mode 100644 index 0000000..28cabff --- /dev/null +++ b/policy/butck/butck_fixed_size/src/lib.rs @@ -0,0 +1,48 @@ +use std::collections::HashMap; + +const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024; // 1MB + +fn get_chunk_size(params: &HashMap<&str, &str>) -> usize { + params + .get("size") + .and_then(|s| s.parse().ok()) + .unwrap_or(DEFAULT_CHUNK_SIZE) +} + +pub async fn chunk_fixed_size(raw_data: &[u8], params: &HashMap<&str, &str>) -> Vec<u32> { + let chunk_size = get_chunk_size(params); + (chunk_size..raw_data.len()) + .step_by(chunk_size) + .map(|pos| pos as u32) + .collect() +} + +#[derive(Default)] +pub struct FixedSizeStream { + processed_bytes: usize, +} + +pub async fn chunk_fixed_size_stream( + _current_data: &[u8], + len: u32, + stream: &mut FixedSizeStream, + params: &HashMap<&str, &str>, +) -> Option<u32> { + let chunk_size = get_chunk_size(params); + let valid_len = len as usize; + + let prev_chunk = stream.processed_bytes / chunk_size; + let new_processed = stream.processed_bytes + valid_len; + let new_chunk = new_processed / chunk_size; + + if prev_chunk != new_chunk { + // Find chunk boundary in current data, update processed bytes, return position + let boundary_in_chunk = chunk_size - (stream.processed_bytes % chunk_size); + stream.processed_bytes += boundary_in_chunk; + Some(boundary_in_chunk.min(valid_len) as u32) + } else { + // Update bytes processed + stream.processed_bytes = new_processed; + None + } +} diff --git a/resources/helps/butck.txt b/resources/helps/butck.txt new file mode 100644 index 0000000..3ee666c --- /dev/null +++ b/resources/helps/butck.txt @@ -0,0 +1,20 @@ +Usage: butck [-v | --version] [-h | --help] [-q | --quiet] + [-l | --log-level <trace/debug/info/warn/error>] + [-np | --no-progress] [-D | --display-boundaries] + + [-s | --storage <path>] [-p | --policy <policy_name>] + [-H | --chunk-hash <blake3/sha256>] + [-o | --output-dir <output>] [-O | --output-file <file>] + [-r | --recursive] [-R | --register <name>] + [-S | --stream-read <size_byte>] [-m | --memmap-read] + + [+p | +param key=value] + +Subcommands: + write <file> Write a file and output the index file + write <file> -R <name> Then, register the index + build <index/name> Input an index file and build the file from the storage + policies Output the available policies + +Butchunker 0.1.0 +Copyright (c) 2026 Weicao-CatilGrass diff --git a/resources/version_info.txt b/resources/version_info.txt new file mode 100644 index 0000000..2736fa7 --- /dev/null +++ b/resources/version_info.txt @@ -0,0 +1 @@ +Butchunker 0.1.0 diff --git a/scripts/sh/comp_butck.sh b/scripts/sh/comp_butck.sh new file mode 100644 index 0000000..8cb31f0 --- /dev/null +++ b/scripts/sh/comp_butck.sh @@ -0,0 +1,56 @@ +#!/bin/bash +_butck_completion() { + local cur prev words cword + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + words=("${COMP_WORDS[@]}") + cword=$COMP_CWORD + + if [[ $cur == -* ]]; then + return + fi + + case "$prev" in + -l|--log-level) + COMPREPLY=($(compgen -W "trace debug info warn error" -- "$cur")) + return + ;; + -H|--chunk-hash) + COMPREPLY=($(compgen -W "blake3 sha256" -- "$cur")) + return + ;; + -o|--output-dir) + COMPREPLY=($(compgen -d -- "$cur")) + return + ;; + -O|--output-file) + COMPREPLY=($(compgen -f -- "$cur")) + return + ;; + -p|--policy) + local policies + policies=$(butck policies 2>/dev/null) + COMPREPLY=($(compgen -W "$policies" -- "$cur")) + return + ;; + -R|--register) + return + ;; + esac + + if [[ $cword -eq 1 ]]; then + COMPREPLY=($(compgen -W "write build state policies" -- "$cur")) + fi + + if [[ $cword -ge 2 ]]; then + local subcommand="${COMP_WORDS[1]}" + case "$subcommand" in + "build"|"write") + COMPREPLY=($(compgen -f -- "$cur")) + ;; + esac + fi +} + +complete -F _butck_completion butck diff --git a/src/bin/butck.rs b/src/bin/butck.rs new file mode 100644 index 0000000..6a81fbb --- /dev/null +++ b/src/bin/butck.rs @@ -0,0 +1,109 @@ +use std::process::exit; + +use butchunker::{ + chunker::{ + context::ButckContext, + entry::{entry, print_help, print_version}, + rw::error::{ButckRWError, ButckRWErrorKind}, + }, + log::init_logger, + special_argument, special_flag, +}; +use just_progress::{progress, renderer}; +use log::error; +use tokio::join; + +#[tokio::main] +async fn main() { + // Collect arguments + let mut args: Vec<String> = std::env::args().skip(1).collect(); + + let version = special_flag!(args, "-v", "--version"); + let help = special_flag!(args, "-h", "--help"); + + if version { + print_version(); + exit(0) + } + + // Special arguments, early return + if help || args.is_empty() { + print_help(); + exit(0) + } + + // Init colored + #[cfg(windows)] + colored::control::set_virtual_terminal(true).unwrap(); + + // Output control flags + let quiet = special_flag!(args, "-q", "--quiet"); + let no_progress = special_flag!(args, "-np", "--no-progress"); + + // Logger + if !quiet { + let logger_level = match special_argument!(args, "-l", "--log-level") { + Some(level) => match level.trim().to_lowercase().as_str() { + "trace" => log::LevelFilter::Trace, + "debug" => log::LevelFilter::Debug, + "info" => log::LevelFilter::Info, + "warn" => log::LevelFilter::Warn, + "error" => log::LevelFilter::Error, + _ => log::LevelFilter::Info, + }, + None => log::LevelFilter::Info, + }; + init_logger(Some(logger_level)); + } + + let ctx = ButckContext::from_args(args.clone()); + + // When `--no-progress` or `--quiet` is enabled, + // the progress system will not be initialized + if no_progress || quiet { + handle_entry_result(entry(ctx, args).await); + } else { + let progress = progress::init(); + let renderer = renderer::ProgressSimpleRenderer::new().with_subprogress(true); + let bind = progress::bind(progress, move |name, state| renderer.update(name, state)); + join!( + async { + handle_entry_result(entry(ctx, args).await); + progress::close(); + }, + bind + ); + } +} + +fn handle_entry_result(r: Result<(), ButckRWError>) { + match r { + Ok(_) => {} + Err(e) => match e.kind() { + ButckRWErrorKind::NoButckStorageFound => { + error!("No butck storage found"); + error!("Use `--storage <PATH>` to specify or init butck storage"); + } + ButckRWErrorKind::ChunkingPolicyNotSpecified => { + error!("Chunking policy not specified"); + error!("Use `--policy <policy_name>` to specify chunking policy"); + error!("or use `butck policies` to output the available policies"); + } + ButckRWErrorKind::ReadingMethodAmbiguous => error!("Reading method ambiguous"), + ButckRWErrorKind::OutputCountMismatch => { + error!("Output count mismatch"); + error!("When processing a single file, use `--output-file` to specify output path"); + error!( + "When processing multiple files, use `--output-dir` to specify output directory" + ); + } + ButckRWErrorKind::ChunkNotFound(chunk_id) => { + error!("Chunk not found in storage: {}", chunk_id) + } + ButckRWErrorKind::RebuildFailed(reason) => error!("Failed to rebuild file: {}", reason), + ButckRWErrorKind::ChunkFailed(_chunk_failed) => error!("Chunk failed"), + ButckRWErrorKind::IOError(error) => error!("IO error: {}", error), + ButckRWErrorKind::InvalidBidxFormat => error!("Invalid bidx format"), + }, + } +} diff --git a/src/bin/butckrepo-guide.rs b/src/bin/butckrepo-guide.rs new file mode 100644 index 0000000..d694ba5 --- /dev/null +++ b/src/bin/butckrepo-guide.rs @@ -0,0 +1,13 @@ +use colored::Colorize; + +fn main() { + println!("Welcome to Butchunker!"); + println!( + "Please add your policy crates to the `{}` directory", + "./policy/".bright_green() + ); + println!( + "Then run `{}` to update the policy registry", + "cargo run --bin butckrepo-refresh".bright_green() + ); +} diff --git a/src/bin/butckrepo-refresh.rs b/src/bin/butckrepo-refresh.rs new file mode 100644 index 0000000..9184efb --- /dev/null +++ b/src/bin/butckrepo-refresh.rs @@ -0,0 +1,619 @@ +use colored::Colorize; +use just_fmt::fmt_path::fmt_path_str; +use just_template::{Template, tmpl, tmpl_param}; +use std::{ + env::current_dir, + path::{Path, PathBuf}, +}; +use tokio::fs; + +const LIB_RS_TEMPLATE_PATH: &str = "policy/_policies/src/lib.rs.t"; +const CARGO_TOML_TEMPLATE_PATH: &str = "policy/_policies/Cargo.toml.t"; +const LIB_RS_PATH: &str = "./policy/_policies/src/lib.rs"; +const CARGO_TOML_PATH: &str = "./policy/_policies/Cargo.toml"; + +#[tokio::main] +async fn main() { + let current_dir = current_dir().unwrap(); + precheck(¤t_dir).await; + + println!("Updating policies ..."); + let (mut lib_rs_template, mut cargo_toml_template) = { + let lib_rs_template_path = current_dir.join("policy/_policies/src/lib.rs.t"); + let cargo_toml_template_path = current_dir.join("policy/_policies/Cargo.toml.t"); + + let lib_rs_content = fs::read_to_string(&lib_rs_template_path) + .await + .unwrap_or_else(|_| { + eprintln!( + "{}", + format!( + "Error: Failed to read template file: {}", + lib_rs_template_path.display() + ) + .red() + ); + std::process::exit(1); + }); + + let cargo_toml_content = fs::read_to_string(&cargo_toml_template_path) + .await + .unwrap_or_else(|_| { + eprintln!( + "{}", + format!( + "Error: Failed to read template file: {}", + cargo_toml_template_path.display() + ) + .red() + ); + std::process::exit(1); + }); + + ( + Template::from(lib_rs_content), + Template::from(cargo_toml_content), + ) + }; + + let cargo_toml_pathes = find_cargo_toml_dirs(¤t_dir.join("policy")).await; + println!( + "Found {} crates, register to `{}`", + cargo_toml_pathes.len(), + CARGO_TOML_PATH.bright_green() + ); + + tmpl_param!(lib_rs_template, policy_count = cargo_toml_pathes.len()); + + let collect_futures = cargo_toml_pathes.iter().map(collect).collect::<Vec<_>>(); + + for policy in futures::future::join_all(collect_futures).await { + let Some(policy) = policy else { continue }; + tmpl!(cargo_toml_template += { + deps { (crate_name = policy.crate_name, path = policy.path) } + }); + // Determine which export template to use based on detected functions + if policy.matched_func_stream.is_some() { + let stream_struct_id = format!( + "{}::{}", + policy.crate_name, + policy.stream_struct_id.unwrap() + ); + if policy.matched_func.is_empty() { + // Only stream function + tmpl!(lib_rs_template += { + exports_stream { ( + crate_name = policy.crate_name, + matched_func_stream = policy.matched_func_stream.unwrap(), + has_await_stream = + if policy.matched_func_stream_has_await { ".await" } else { "" }, + stream_struct_id = stream_struct_id + ) }, + match_arms { ( + crate_name = policy.crate_name, + ) }, + match_arms_stream { ( + crate_name = policy.crate_name, + stream_struct_id = stream_struct_id + ) }, + policy_names { ( + name = policy.crate_name, + ) } + }); + } else { + // Both simple and stream functions + tmpl!(lib_rs_template += { + exports_both { ( + crate_name = policy.crate_name, + matched_func = policy.matched_func, + has_await = + if policy.matched_func_has_await { ".await" } else { "" }, + matched_func_stream = policy.matched_func_stream.unwrap(), + has_await_stream = + if policy.matched_func_stream_has_await { ".await" } else { "" }, + stream_struct_id = stream_struct_id + ) }, + match_arms { ( + crate_name = policy.crate_name, + ) }, + match_arms_stream { ( + crate_name = policy.crate_name, + stream_struct_id = stream_struct_id + ) }, + policy_names { ( + name = policy.crate_name, + ) } + }); + } + } else { + // Only simple function + tmpl!(lib_rs_template += { + exports_simple { ( + crate_name = policy.crate_name, + matched_func = policy.matched_func, + has_await = + if policy.matched_func_has_await { ".await" } else { "" } + ) }, + match_arms { ( + crate_name = policy.crate_name, + ) }, + policy_names { ( + name = policy.crate_name, + ) } + }); + } + } + + let (write_cargo, write_lib) = tokio::join!( + fs::write(CARGO_TOML_PATH, cargo_toml_template.expand().unwrap()), + fs::write(LIB_RS_PATH, lib_rs_template.expand().unwrap()) + ); + write_cargo.unwrap(); + write_lib.unwrap(); +} + +struct CollectedPolicy { + crate_name: String, + path: String, + matched_func: String, + matched_func_has_await: bool, + matched_func_stream: Option<String>, + matched_func_stream_has_await: bool, + stream_struct_id: Option<String>, +} + +async fn collect(policy_crate_path: &PathBuf) -> Option<CollectedPolicy> { + let lib_rs_path = policy_crate_path.join("src").join("lib.rs"); + let lib_rs_content = fs::read_to_string(&lib_rs_path).await.ok()?; + + let cargo_toml_content = fs::read_to_string(policy_crate_path.join("Cargo.toml")) + .await + .ok()?; + let cargo_toml: toml::Value = toml::from_str(&cargo_toml_content).ok()?; + let crate_name = cargo_toml + .get("package")? + .get("name")? + .as_str()? + .to_string(); + let crate_path = fmt_path_str( + policy_crate_path + .strip_prefix(current_dir().unwrap()) + .unwrap() + .to_string_lossy(), + ) + .ok()?; + + let ( + matched_func, + matched_func_has_await, + matched_func_stream, + matched_func_stream_has_await, + stream_struct_id, + ) = collect_matched_func(lib_rs_content.as_str())?; + + println!( + "{} {} (at: `{}`) with func `{}{}{}{}(..)`", + "Register:".bright_blue().bold(), + crate_name, + crate_path.bright_green(), + "pub ".bright_magenta(), + if matched_func_has_await { "async " } else { "" }.bright_magenta(), + "fn ".bright_magenta(), + matched_func.bright_blue(), + ); + if let Some(stream_func) = &matched_func_stream { + println!( + " and stream func `{}{}{}{}(..)`", + "pub ".bright_magenta(), + if matched_func_stream_has_await { + "async " + } else { + "" + } + .bright_magenta(), + "fn ".bright_magenta(), + stream_func.bright_blue() + ); + } + + Some(CollectedPolicy { + crate_name, + path: crate_path, + matched_func, + matched_func_has_await, + matched_func_stream, + matched_func_stream_has_await, + stream_struct_id, + }) +} + +fn collect_matched_func( + lib_rs_content: &str, +) -> Option<(String, bool, Option<String>, bool, Option<String>)> { + let syntax_tree = syn::parse_file(lib_rs_content).ok()?; + + let mut matched_func = None; + let mut matched_func_has_await = false; + let mut matched_func_stream = None; + let mut matched_func_stream_has_await = false; + let mut stream_struct_id = None; + + // Iterate over all items, looking for functions that match the criteria + for item in &syntax_tree.items { + let syn::Item::Fn(func) = item else { continue }; + + // Check if the function visibility is pub + if !matches!(func.vis, syn::Visibility::Public(_)) { + continue; + } + + let sig = &func.sig; + + // Check for simple chunk function (returns Vec<u32>) + if check_simple_chunk_function(sig) { + matched_func = Some(sig.ident.to_string()); + matched_func_has_await = sig.asyncness.is_some(); + } + // Check for stream chunk function (returns Option<u8>) + else if let Some(struct_id) = check_stream_chunk_function(sig, &syntax_tree) { + matched_func_stream = Some(sig.ident.to_string()); + matched_func_stream_has_await = sig.asyncness.is_some(); + stream_struct_id = Some(struct_id); + } + } + + if matched_func.is_some() || matched_func_stream.is_some() { + Some(( + matched_func.unwrap_or_default(), + matched_func_has_await, + matched_func_stream, + matched_func_stream_has_await, + stream_struct_id, + )) + } else { + None + } +} + +fn check_simple_chunk_function(sig: &syn::Signature) -> bool { + // Check if the return type is Vec<u32> + let return_type_matches = match &sig.output { + syn::ReturnType::Type(_, ty) => { + let syn::Type::Path(type_path) = &**ty else { + return false; + }; + let segments = &type_path.path.segments; + + segments.len() == 1 + && segments[0].ident == "Vec" + && matches!(&segments[0].arguments, syn::PathArguments::AngleBracketed(args) + if args.args.len() == 1 && + matches!(&args.args[0], syn::GenericArgument::Type(syn::Type::Path(inner_type)) + if inner_type.path.segments.len() == 1 && + inner_type.path.segments[0].ident == "u32" + ) + ) + } + _ => false, + }; + + if !return_type_matches { + return false; + } + + // Check that there are exactly 2 parameters + if sig.inputs.len() != 2 { + return false; + } + + // Check that the first parameter type is &[u8] + let first_param_matches = match &sig.inputs[0] { + syn::FnArg::Typed(pat_type) => { + let syn::Type::Reference(type_ref) = &*pat_type.ty else { + return false; + }; + let syn::Type::Slice(slice_type) = &*type_ref.elem else { + return false; + }; + let syn::Type::Path(type_path) = &*slice_type.elem else { + return false; + }; + + type_path.path.segments.len() == 1 && type_path.path.segments[0].ident == "u8" + } + _ => false, + }; + + // Check that the second parameter type is &HashMap<&str, &str> + let second_param_matches = match &sig.inputs[1] { + syn::FnArg::Typed(pat_type) => { + let syn::Type::Reference(type_ref) = &*pat_type.ty else { + return false; + }; + let syn::Type::Path(type_path) = &*type_ref.elem else { + return false; + }; + + type_path.path.segments.len() == 1 + && type_path.path.segments[0].ident == "HashMap" + && matches!(&type_path.path.segments[0].arguments, syn::PathArguments::AngleBracketed(args) + if args.args.len() == 2 && + matches!(&args.args[0], syn::GenericArgument::Type(syn::Type::Reference(first_ref)) + if matches!(&*first_ref.elem, syn::Type::Path(first_path) + if first_path.path.segments.len() == 1 && + first_path.path.segments[0].ident == "str" + ) + ) && + matches!(&args.args[1], syn::GenericArgument::Type(syn::Type::Reference(second_ref)) + if matches!(&*second_ref.elem, syn::Type::Path(second_path) + if second_path.path.segments.len() == 1 && + second_path.path.segments[0].ident == "str" + ) + ) + ) + } + _ => false, + }; + + first_param_matches && second_param_matches +} + +fn check_stream_chunk_function(sig: &syn::Signature, syntax_tree: &syn::File) -> Option<String> { + // Check if the return type is Option<u32> + let return_type_matches = match &sig.output { + syn::ReturnType::Type(_, ty) => { + let syn::Type::Path(type_path) = &**ty else { + return None; + }; + let segments = &type_path.path.segments; + + segments.len() == 1 + && segments[0].ident == "Option" + && matches!(&segments[0].arguments, syn::PathArguments::AngleBracketed(args) + if args.args.len() == 1 && + matches!(&args.args[0], syn::GenericArgument::Type(syn::Type::Path(inner_type)) + if inner_type.path.segments.len() == 1 && + inner_type.path.segments[0].ident == "u32" + ) + ) + } + _ => false, + }; + + if !return_type_matches { + return None; + } + + // Check that there are exactly 4 parameters + if sig.inputs.len() != 4 { + return None; + } + + // Check that the first parameter type is &[u8] + let first_param_matches = match &sig.inputs[0] { + syn::FnArg::Typed(pat_type) => { + let syn::Type::Reference(type_ref) = &*pat_type.ty else { + return None; + }; + let syn::Type::Slice(slice_type) = &*type_ref.elem else { + return None; + }; + let syn::Type::Path(type_path) = &*slice_type.elem else { + return None; + }; + + // Check it's u8 + type_path.path.segments.len() == 1 && type_path.path.segments[0].ident == "u8" + } + _ => false, + }; + + // Check that the second parameter type is u32 + let second_param_matches = match &sig.inputs[1] { + syn::FnArg::Typed(pat_type) => { + let syn::Type::Path(type_path) = &*pat_type.ty else { + return None; + }; + type_path.path.segments.len() == 1 && type_path.path.segments[0].ident == "u32" + } + _ => false, + }; + + // Check that the third parameter type is &mut T where T is a struct defined in this crate + let third_param_info = match &sig.inputs[2] { + syn::FnArg::Typed(pat_type) => { + let syn::Type::Reference(type_ref) = &*pat_type.ty else { + return None; + }; + + // Check it's mutable reference + type_ref.mutability?; + + // Get the inner type + let syn::Type::Path(type_path) = &*type_ref.elem else { + return None; + }; + + // Get the struct identifier + if type_path.path.segments.len() != 1 { + return None; + } + + let struct_ident = type_path.path.segments[0].ident.to_string(); + + // Check if this struct is defined in the current crate and implements Default + if is_struct_defined_in_crate(&struct_ident, syntax_tree) { + Some(struct_ident) + } else { + None + } + } + _ => None, + }; + + let struct_ident = third_param_info?; + + // Check that the fourth parameter type is &HashMap<&str, &str> + let fourth_param_matches = match &sig.inputs[3] { + syn::FnArg::Typed(pat_type) => { + let syn::Type::Reference(type_ref) = &*pat_type.ty else { + return None; + }; + let syn::Type::Path(type_path) = &*type_ref.elem else { + return None; + }; + + type_path.path.segments.len() == 1 + && type_path.path.segments[0].ident == "HashMap" + && matches!(&type_path.path.segments[0].arguments, syn::PathArguments::AngleBracketed(args) + if args.args.len() == 2 && + matches!(&args.args[0], syn::GenericArgument::Type(syn::Type::Reference(first_ref)) + if matches!(&*first_ref.elem, syn::Type::Path(first_path) + if first_path.path.segments.len() == 1 && + first_path.path.segments[0].ident == "str" + ) + ) && + matches!(&args.args[1], syn::GenericArgument::Type(syn::Type::Reference(second_ref)) + if matches!(&*second_ref.elem, syn::Type::Path(second_path) + if second_path.path.segments.len() == 1 && + second_path.path.segments[0].ident == "str" + ) + ) + ) + } + _ => false, + }; + + if first_param_matches && second_param_matches && fourth_param_matches { + Some(struct_ident) + } else { + None + } +} + +fn is_struct_defined_in_crate(struct_ident: &str, syntax_tree: &syn::File) -> bool { + for item in &syntax_tree.items { + match item { + syn::Item::Struct(item_struct) => { + if item_struct.ident == struct_ident { + // Check if it implements Default via derive attribute + return has_default_derive(&item_struct.attrs) + || has_default_trait_bound(&item_struct.generics); + } + } + _ => continue, + } + } + false +} + +fn has_default_derive(attrs: &[syn::Attribute]) -> bool { + for attr in attrs { + if attr.path().is_ident("derive") { + // Parse the attribute meta to check for Default + if let syn::Meta::List(list) = &attr.meta { + // Convert tokens to string and check for Default + let tokens = list.tokens.to_string(); + if tokens.contains("Default") { + return true; + } + } + } + } + false +} + +fn has_default_trait_bound(generics: &syn::Generics) -> bool { + for param in &generics.params { + if let syn::GenericParam::Type(type_param) = param { + for bound in &type_param.bounds { + if let syn::TypeParamBound::Trait(trait_bound) = bound { + let path = &trait_bound.path; + if path.segments.len() == 1 && path.segments[0].ident == "Default" { + return true; + } + } + } + } + } + false +} + +async fn find_cargo_toml_dirs(root: &Path) -> Vec<PathBuf> { + let mut result = Vec::new(); + let mut dirs_to_visit = vec![root.to_path_buf()]; + + while let Some(current_dir) = dirs_to_visit.pop() { + let cargo_toml_path = current_dir.join("Cargo.toml"); + if fs::metadata(&cargo_toml_path).await.is_ok() { + result.push(current_dir); + continue; + } + + let mut read_dir = match fs::read_dir(¤t_dir).await { + Ok(rd) => rd, + Err(_) => continue, + }; + + while let Ok(Some(entry)) = read_dir.next_entry().await { + if let Ok(file_type) = entry.file_type().await + && file_type.is_dir() + { + let path = entry.path(); + if let Some(file_name) = path.file_name() + && let Some(name_str) = file_name.to_str() + && name_str.starts_with('_') + { + continue; + } + dirs_to_visit.push(path); + } + } + } + + result +} + +async fn precheck(current_dir: &Path) { + let cargo_toml_path = current_dir.join("Cargo.toml"); + let cargo_toml_content = fs::read_to_string(&cargo_toml_path) + .await + .unwrap_or_else(|_| { + eprintln!( + "{}", + "Error: Cargo.toml not found in current directory".red() + ); + std::process::exit(1); + }); + let cargo_toml: toml::Value = toml::from_str(&cargo_toml_content).unwrap_or_else(|_| { + eprintln!("{}", "Error: Failed to parse Cargo.toml".red()); + std::process::exit(1); + }); + let package_name = cargo_toml + .get("package") + .unwrap_or_else(|| { + eprintln!("{}", "Error: No package section in Cargo.toml".red()); + std::process::exit(1); + }) + .get("name") + .unwrap_or_else(|| { + eprintln!("{}", "Error: No package.name in Cargo.toml".red()); + std::process::exit(1); + }) + .as_str() + .unwrap_or_else(|| { + eprintln!("{}", "Error: package.name is not a string".red()); + std::process::exit(1); + }); + if package_name != "butchunker" { + eprintln!( + "{}", + format!( + "Error: package.name must be 'butchunker', found '{}'", + package_name + ) + .red() + ); + std::process::exit(1); + } +} diff --git a/src/chunker.rs b/src/chunker.rs new file mode 100644 index 0000000..3143f68 --- /dev/null +++ b/src/chunker.rs @@ -0,0 +1,4 @@ +pub mod constants; +pub mod context; +pub mod entry; +pub mod rw; diff --git a/src/chunker/constants.rs b/src/chunker/constants.rs new file mode 100644 index 0000000..5e4870e --- /dev/null +++ b/src/chunker/constants.rs @@ -0,0 +1,3 @@ +pub const BUTCK_STORAGE_DIR_NAME: &str = ".butck"; +pub const BUTCK_INDEX_FILE_SUFFIX: &str = "bidx"; +pub const BUTCK_INDEX_MAGIC: [u8; 4] = [0xfe, 0xe1, 0xf0, 0x0d]; diff --git a/src/chunker/context.rs b/src/chunker/context.rs new file mode 100644 index 0000000..79254f5 --- /dev/null +++ b/src/chunker/context.rs @@ -0,0 +1,226 @@ +use std::{collections::HashMap, env::current_dir, path::PathBuf, process::exit, str::FromStr}; + +use log::{error, warn}; + +use crate::{ + chunker::constants::BUTCK_STORAGE_DIR_NAME, core::hash::ChunkWriteHash, special_argument, + special_flag, utils::file_input_solve::parse_path_input, +}; + +#[derive(Debug, Default)] +pub struct ButckContext { + /// All input files + pub file_paths: Vec<PathBuf>, + + /// Path of Butck Storage + pub storage_path: Option<PathBuf>, + + // Display chunk boundaries + pub display_boundaries: bool, + + /// Whether to read in stream mode + pub stream_read: Option<u32>, + + /// Whether to read files using memory mapping + pub memmap_read: bool, + + /// Register name + pub register_name: Option<String>, + + /// Chunking policy name + pub policy_name: Option<String>, + + /// Hash algorithm used for chunking + pub chunk_hash: ChunkWriteHash, + + /// Output directory + pub output_dir: PathBuf, + + /// Output file (not available for some commands) + pub output_file: Option<PathBuf>, + + /// Override parameters + pub params: HashMap<String, String>, +} + +impl ButckContext { + /// Apply the args of ChunkerContext to itself + pub fn from_args(mut args: Vec<String>) -> Self { + let mut ctx = ButckContext::default(); + let recursive = ctx.read_recursive(&mut args); + ctx.apply_stream_read(&mut args); + ctx.apply_memmap_read(&mut args); + ctx.apply_register_name(&mut args); + ctx.apply_policy_name(&mut args); + ctx.apply_chunk_hash(&mut args); + ctx.apply_storage_dir(&mut args); + ctx.apply_output_paths(&mut args); + ctx.apply_params(&mut args); + ctx.apply_display_boundaries(&mut args); + + // Finally, parse path input + ctx.file_paths = parse_path_input(args, recursive, vec![BUTCK_STORAGE_DIR_NAME]); + ctx + } + + fn read_recursive(&mut self, args: &mut Vec<String>) -> bool { + special_flag!(args, "-r", "--recursive") + } + + fn apply_stream_read(&mut self, args: &mut Vec<String>) { + if let Some(size_str) = special_argument!(args, "-S", "--stream-read") + && let Ok(size) = size_str.parse::<u32>() { + self.stream_read = Some(size); + } + } + + fn apply_memmap_read(&mut self, args: &mut Vec<String>) -> bool { + special_flag!(args, "-m", "--memmap-read") + } + + fn apply_register_name(&mut self, args: &mut Vec<String>) { + self.register_name = special_argument!(args, "-R", "--register"); + } + + fn apply_policy_name(&mut self, args: &mut Vec<String>) { + self.policy_name = special_argument!(args, "-p", "--policy"); + } + + fn apply_chunk_hash(&mut self, args: &mut Vec<String>) { + let chunk_hash_str = special_argument!(args, "-H", "--chunk-hash"); + self.chunk_hash = match chunk_hash_str { + Some(ref s) => match s.as_str() { + "blake3" => ChunkWriteHash::Blake3, + "sha256" => ChunkWriteHash::Sha256, + _ => ChunkWriteHash::default(), + }, + None => ChunkWriteHash::default(), + }; + } + + fn apply_output_paths(&mut self, args: &mut Vec<String>) { + let output_dir_str = special_argument!(args, "-o", "--output-dir"); + let output_file_str = special_argument!(args, "-O", "--output-file"); + + let current_dir = current_dir().unwrap(); + + let output_dir = if let Some(output_dir_str) = output_dir_str { + let path = PathBuf::from(output_dir_str); + if path.exists() { Some(path) } else { None } + } else { + None + }; + + self.output_dir = if let Some(output_dir) = output_dir { + output_dir + } else if let Some(storage_path) = &self.storage_path { + storage_path.clone() + } else { + current_dir + }; + + self.output_file = output_file_str.map(PathBuf::from) + } + + fn apply_params(&mut self, args: &mut Vec<String>) { + while let Some(arg) = special_argument!(args, "+p", "+param") { + let split = arg.split('=').collect::<Vec<&str>>(); + if split.len() == 2 { + self.params + .insert(split[0].to_string(), split[1].to_string()); + } + } + } + + fn apply_storage_dir(&mut self, args: &mut Vec<String>) { + self.storage_path = { + let storage_override = match special_argument!(args, "-s", "--storage") { + Some(o) => { + let path = PathBuf::from_str(o.as_str()); + if let Ok(p) = &path { + Self::init_butck_storage(p.clone()); + } + path.ok() + } + None => None, + }; + Self::find_butck_storage_dir(storage_override) + }; + } + + fn apply_display_boundaries(&mut self, args: &mut Vec<String>) { + self.display_boundaries = special_flag!(args, "-D", "--display-boundaries"); + } + + fn init_butck_storage(path: PathBuf) -> Option<PathBuf> { + if !path.exists() { + // If the path does not exist, create it and initialize Butck Storage here + if let Err(e) = std::fs::create_dir_all(&path) { + error!("Failed to create directory '{}': {}", path.display(), e); + exit(1); + } + let butck_dir = path.join(BUTCK_STORAGE_DIR_NAME); + if let Err(e) = std::fs::create_dir_all(&butck_dir) { + error!( + "Failed to create '{}' directory: {}", + BUTCK_STORAGE_DIR_NAME, e + ); + exit(1); + } + Some(path) + } else { + let butck_dir = path.join(BUTCK_STORAGE_DIR_NAME); + + // Check if Butck Storage already exists + if butck_dir.exists() { + // Butck Storage already exists, return the path + Some(path) + } else { + // Butck Storage doesn't exist, create it with a warning if directory is not empty + let is_empty = path + .read_dir() + .map(|mut entries| entries.next().is_none()) + .unwrap_or(false); + + if !is_empty { + // Warn about creating storage in non-empty directory + warn!( + "Creating '{}' storage in non-empty directory: {}", + BUTCK_STORAGE_DIR_NAME, + path.display() + ); + } + + // Create Butck Storage directory + if let Err(e) = std::fs::create_dir_all(&butck_dir) { + error!( + "Failed to create '{}' directory: {}", + BUTCK_STORAGE_DIR_NAME, e + ); + exit(1); + } + Some(path) + } + } + } + + // Get the ButckStorage directory based on context + fn find_butck_storage_dir(from: Option<PathBuf>) -> Option<PathBuf> { + let mut current_dir = match from { + Some(path) => path, + None => std::env::current_dir().ok()?, + }; + + loop { + let butck_dir = current_dir.join(BUTCK_STORAGE_DIR_NAME); + if butck_dir.is_dir() { + return Some(current_dir); + } + + if !current_dir.pop() { + break; + } + } + None + } +} diff --git a/src/chunker/entry.rs b/src/chunker/entry.rs new file mode 100644 index 0000000..4fdb1f8 --- /dev/null +++ b/src/chunker/entry.rs @@ -0,0 +1,39 @@ +use std::process::exit; + +use log::info; + +use crate::chunker::{ + context::ButckContext, + rw::{self, error::ButckRWError}, +}; + +pub async fn entry(ctx: ButckContext, args: Vec<String>) -> Result<(), ButckRWError> { + if let Some(subcommand) = args.first() { + return match subcommand.as_str() { + "write" => rw::storage::write(ctx).await, + "build" => rw::storage::build(ctx).await, + "policies" => { + butck_policies::policies() + .iter() + .for_each(|p| info!("{}", p)); + return Ok(()); + } + _ => { + print_help(); + exit(1) + } + }; + } + Ok(()) +} + +pub fn print_help() { + println!("{}", include_str!("../../resources/helps/butck.txt").trim()); +} + +pub fn print_version() { + println!( + "{}", + include_str!("../../resources/version_info.txt").trim() + ); +} diff --git a/src/chunker/rw.rs b/src/chunker/rw.rs new file mode 100644 index 0000000..85e734e --- /dev/null +++ b/src/chunker/rw.rs @@ -0,0 +1,2 @@ +pub mod error; +pub mod storage; diff --git a/src/chunker/rw/error.rs b/src/chunker/rw/error.rs new file mode 100644 index 0000000..7f263a5 --- /dev/null +++ b/src/chunker/rw/error.rs @@ -0,0 +1,61 @@ +use butck_policies::error::ChunkFailed; + +use crate::chunker::context::ButckContext; + +#[derive(Debug)] +pub struct ButckRWError { + kind: ButckRWErrorKind, + ctx: ButckContext, +} + +#[derive(thiserror::Error, Debug)] +pub enum ButckRWErrorKind { + #[error("No butck storage found")] + NoButckStorageFound, + + #[error("Chunking policy not specified")] + ChunkingPolicyNotSpecified, + + #[error("Cannot enable both MemmapRead and StreamRead")] + ReadingMethodAmbiguous, + + #[error("Multiple input files specified but only one output file allowed")] + OutputCountMismatch, + + #[error("Invalid bidx file format")] + InvalidBidxFormat, + + #[error("Chunk not found in storage: {0}")] + ChunkNotFound(String), + + #[error("Failed to rebuild file: {0}")] + RebuildFailed(String), + + #[error("Chunking failed: {0}")] + ChunkFailed(#[from] ChunkFailed), + + #[error("IO error: {0}")] + IOError(#[from] std::io::Error), +} + +impl std::fmt::Display for ButckRWError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.kind) + } +} + +impl ButckRWError { + pub fn ctx(&self) -> &ButckContext { + &self.ctx + } + + pub fn kind(&self) -> &ButckRWErrorKind { + &self.kind + } +} + +impl ButckRWErrorKind { + pub fn pack(self, ctx: ButckContext) -> ButckRWError { + ButckRWError { kind: self, ctx } + } +} diff --git a/src/chunker/rw/storage.rs b/src/chunker/rw/storage.rs new file mode 100644 index 0000000..13452d0 --- /dev/null +++ b/src/chunker/rw/storage.rs @@ -0,0 +1,88 @@ +pub mod build; +pub mod write; + +pub use build::build; +pub use write::write; + +use std::path::{Path, PathBuf}; + +/// Information about a chunk for index file +#[derive(Debug, Clone)] +pub struct ChunkInfo { + /// Index of the chunk in the file + pub index: usize, + /// Hash of the chunk (hex string) + pub hash: String, + /// Size of the chunk in bytes + pub size: usize, + /// Start position in the original file + pub start: usize, + /// End position in the original file (exclusive) + pub end: usize, +} + +/// 根据hash值计算chunk文件的存储路径 +/// +/// # 参数 +/// - `storage_dir`: 存储目录 +/// - `hash_hex`: chunk的hash值(16进制字符串) +/// +/// # 返回 +/// 返回chunk文件的完整路径 +pub fn get_chunk_path(storage_dir: &Path, hash_hex: &str) -> PathBuf { + let first_slice = &hash_hex[0..2]; + let second_slice = &hash_hex[2..4]; + storage_dir + .join(first_slice) + .join(second_slice) + .join(hash_hex) +} + +/// 根据hash字节数组计算chunk文件的存储路径 +/// +/// # 参数 +/// - `storage_dir`: 存储目录 +/// - `hash_bytes`: chunk的hash值(字节数组) +/// +/// # 返回 +/// 返回chunk文件的完整路径 +pub fn get_chunk_path_from_bytes(storage_dir: &Path, hash_bytes: &[u8; 32]) -> PathBuf { + let hash_hex = hex::encode(hash_bytes); + get_chunk_path(storage_dir, &hash_hex) +} + +/// 生成唯一的文件路径,如果文件已存在则添加数字后缀 +/// +/// # 参数 +/// - `output_dir`: 输出目录 +/// - `desired_filename`: 期望的文件名 +/// +/// # 返回 +/// 返回唯一的文件路径 +pub fn generate_unique_path(output_dir: &Path, desired_filename: &str) -> PathBuf { + let desired_path = output_dir.join(desired_filename); + let mut candidate = desired_path.clone(); + + let mut counter = 1; + while candidate.exists() { + let path_buf = PathBuf::from(desired_filename); + if let Some(stem) = path_buf.file_stem() { + if let Some(ext) = path_buf.extension() { + let ext_str = ext.to_string_lossy(); + let new_name = if ext_str.is_empty() { + format!("{}_{}", stem.to_string_lossy(), counter) + } else { + format!("{}.{}_{}", stem.to_string_lossy(), ext_str, counter) + }; + candidate = output_dir.join(new_name); + } else { + candidate = output_dir.join(format!("{}_{}", stem.to_string_lossy(), counter)); + } + } else { + candidate = output_dir.join(format!("{}_{}", desired_filename, counter)); + } + counter += 1; + } + + candidate +} diff --git a/src/chunker/rw/storage/build.rs b/src/chunker/rw/storage/build.rs new file mode 100644 index 0000000..7608b5c --- /dev/null +++ b/src/chunker/rw/storage/build.rs @@ -0,0 +1,250 @@ +use futures::future::join_all; +use just_progress::progress; +use log::{error, info, trace}; +use memmap2::Mmap; +use std::path::PathBuf; +use tokio::{fs::File, io::AsyncWriteExt}; + +use crate::{ + chunker::{ + constants::{BUTCK_INDEX_FILE_SUFFIX, BUTCK_INDEX_MAGIC}, + context::ButckContext, + rw::error::{ButckRWError, ButckRWErrorKind}, + rw::storage, + }, + utils::size_display::size_display, +}; + +pub async fn build(ctx: ButckContext) -> Result<(), ButckRWError> { + if ctx.storage_path.is_none() { + return Err(ButckRWErrorKind::NoButckStorageFound.pack(ctx)); + } + if ctx.file_paths.is_empty() { + return Err( + ButckRWErrorKind::RebuildFailed("No bidx files specified".to_string()).pack(ctx), + ); + } + + let tasks: Vec<_> = ctx + .file_paths + .iter() + .map(|bidx_path| async { + trace!( + "Preparing to rebuild from bidx file `{}`", + bidx_path.display() + ); + rebuild_from_bidx(bidx_path, &ctx).await + }) + .collect(); + + let results = join_all(tasks).await; + + for result in results { + if let Err(e) = result { + return Err(e.pack(ctx)); + } + } + + Ok(()) +} + +async fn rebuild_from_bidx( + bidx_path: &PathBuf, + ctx: &ButckContext, +) -> Result<(), ButckRWErrorKind> { + // Validate file extension + if let Some(ext) = bidx_path.extension() + && ext != BUTCK_INDEX_FILE_SUFFIX + { + return Err(ButckRWErrorKind::InvalidBidxFormat); + } + + info!("Rebuilding from bidx file: {}", bidx_path.display()); + + // Read bidx file content + let bidx_content = if ctx.memmap_read { + let file = File::open(bidx_path).await?; + let mmap = unsafe { Mmap::map(&file)? }; + mmap.to_vec() + } else { + tokio::fs::read(bidx_path).await? + }; + + // Verify file size includes at least the header + if bidx_content.len() < 6 { + return Err(ButckRWErrorKind::InvalidBidxFormat); + } + + // Validate MAGIC bytes + if bidx_content[0..4] != BUTCK_INDEX_MAGIC { + return Err(ButckRWErrorKind::InvalidBidxFormat); + } + + // Read filename + let filename_len = u16::from_le_bytes([bidx_content[4], bidx_content[5]]) as usize; + if bidx_content.len() < 6 + filename_len { + return Err(ButckRWErrorKind::InvalidBidxFormat); + } + let filename_bytes = &bidx_content[6..6 + filename_len]; + let original_filename = String::from_utf8(filename_bytes.to_vec()) + .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?; + + trace!("Original filename from bidx: {}", original_filename); + + let hash_data_start = 6 + filename_len; + let hash_data = &bidx_content[hash_data_start..]; + + // Verify that hash data size is a multiple of 32 bytes + if hash_data.len() % 32 != 0 { + return Err(ButckRWErrorKind::InvalidBidxFormat); + } + + let chunk_count = hash_data.len() / 32; + info!("Found {} chunks in bidx file", chunk_count); + + let mut chunk_hashes = Vec::with_capacity(chunk_count); + for i in 0..chunk_count { + let start = i * 32; + let end = start + 32; + let hash_bytes: [u8; 32] = hash_data[start..end] + .try_into() + .map_err(|_| ButckRWErrorKind::InvalidBidxFormat)?; + chunk_hashes.push(hash_bytes); + } + + trace!("Parsed {} chunk hashes", chunk_hashes.len()); + + // Determine output file path + let output_path = if let Some(output_file) = &ctx.output_file { + output_file.clone() + } else { + // Use the original filename read from the bidx file + storage::generate_unique_path(&ctx.output_dir, &original_filename) + }; + + info!("Rebuilding file to: {}", output_path.display()); + + let progress_name = format!("Rebuild `{}`", output_path.display()); + progress::update_progress(progress_name.as_str(), 0.0); + let step = 1.0 / chunk_count as f64; + + let mut tasks = Vec::with_capacity(chunk_count); + + for (index, hash_bytes) in chunk_hashes.iter().enumerate() { + let hash_hex = hex::encode(hash_bytes); + tasks.push(read_chunk( + progress_name.as_str(), + step, + hash_hex, + &ctx.output_dir, + index, + )); + } + + trace!("Starting parallel read of {} chunks", tasks.len()); + let results = join_all(tasks).await; + trace!("All read tasks completed"); + + // Collect chunk data and verify order + let mut chunk_data_list = Vec::with_capacity(chunk_count); + let mut success_count = 0; + + for (index, result) in results.into_iter().enumerate() { + match result { + Ok(chunk_data) => { + let chunk_size = chunk_data.len(); + success_count += 1; + chunk_data_list.push((index, chunk_data)); + trace!( + "Chunk {} read successfully, size: {} bytes", + index, chunk_size + ); + } + Err(e) => { + error!("Failed to read chunk {}: {:?}", index, e); + return Err(e); + } + } + } + + if success_count != chunk_count { + return Err(ButckRWErrorKind::ChunkNotFound(format!( + "Only {}/{} chunks found in storage", + success_count, chunk_count + ))); + } + + info!("All {} chunks read successfully", success_count); + + // Sort by index and concatenate files + chunk_data_list.sort_by_key(|(index, _)| *index); + + // Calculate total size + let total_size: usize = chunk_data_list.iter().map(|(_, data)| data.len()).sum(); + let (total_value, total_unit) = size_display(total_size); + info!( + "Rebuilding file: {} chunks, total size: {:.2} {} ({} bytes)", + chunk_count, total_value, total_unit, total_size + ); + + // Write to output file + trace!("Writing to output file: {}", output_path.display()); + let mut output_file = File::create(&output_path).await?; + + for (index, chunk_data) in chunk_data_list { + trace!("Writing chunk {} ({} bytes)", index, chunk_data.len()); + output_file.write_all(&chunk_data).await?; + progress::increase(progress_name.as_str(), step as f32); + } + + output_file.flush().await?; + + info!("File successfully rebuilt: {}", output_path.display()); + progress::complete(progress_name.as_str()); + + Ok(()) +} + +/// Read a single chunk from storage +async fn read_chunk( + progress_name: &str, + step: f64, + hash_hex: String, + storage_dir: &PathBuf, + chunk_index: usize, +) -> Result<Vec<u8>, ButckRWErrorKind> { + trace!("read_chunk[{}]: Starting, hash: {}", chunk_index, hash_hex); + + // Build chunk file path + let file_path = storage::get_chunk_path(storage_dir, &hash_hex); + + trace!( + "read_chunk[{}]: Looking for file at: {}", + chunk_index, + file_path.display() + ); + + // Read chunk file + match tokio::fs::read(&file_path).await { + Ok(data) => { + trace!( + "read_chunk[{}]: Read {} bytes successfully", + chunk_index, + data.len() + ); + progress::increase(progress_name, step as f32); + Ok(data) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + trace!("read_chunk[{}]: File not found", chunk_index); + Err(ButckRWErrorKind::ChunkNotFound(format!( + "Chunk {} (hash: {}) not found in storage", + chunk_index, hash_hex + ))) + } + Err(e) => { + trace!("read_chunk[{}]: Read failed: {:?}", chunk_index, e); + Err(ButckRWErrorKind::IOError(e)) + } + } +} diff --git a/src/chunker/rw/storage/write.rs b/src/chunker/rw/storage/write.rs new file mode 100644 index 0000000..8b3acc7 --- /dev/null +++ b/src/chunker/rw/storage/write.rs @@ -0,0 +1,118 @@ +use std::{collections::HashMap, path::PathBuf}; + +use log::trace; + +use crate::{ + chunker::{ + constants::BUTCK_INDEX_FILE_SUFFIX, + context::ButckContext, + rw::{ + error::{ButckRWError, ButckRWErrorKind}, + storage::generate_unique_path, + }, + }, + storage::{simple::write_file_simple, stream::write_file_stream}, +}; + +pub mod simple; +pub mod stream; + +pub async fn write(ctx: ButckContext) -> Result<(), ButckRWError> { + if ctx.storage_path.is_none() { + return Err(ButckRWErrorKind::NoButckStorageFound.pack(ctx)); + } + if ctx.policy_name.is_none() { + return Err(ButckRWErrorKind::ChunkingPolicyNotSpecified.pack(ctx)); + } + if ctx.file_paths.len() > 1 && ctx.output_file.is_some() { + return Err(ButckRWErrorKind::OutputCountMismatch.pack(ctx)); + } + + // Cannot enable both memory-mapped and stream reading simultaneously. + // Stream reading uses butck_policies::chunk_stream_with, + // while memory-mapped or default reading uses butck_policies::chunk_with. + if ctx.memmap_read && ctx.stream_read.is_some() { + return Err(ButckRWErrorKind::ReadingMethodAmbiguous.pack(ctx)); + } + + let param_refs: HashMap<&str, &str> = ctx + .params + .iter() + .map(|(k, v)| (k.as_str(), v.as_str())) + .collect(); + + let tasks: Vec<_> = ctx + .file_paths + .iter() + .map(|path| async { + trace!("Preparing to write file `{}`", path.display()); + write_file(path, &ctx, ¶m_refs).await + }) + .collect(); + + let results = futures::future::join_all(tasks).await; + + for result in results { + if let Err(e) = result { + return Err(e.pack(ctx)); + } + } + + Ok(()) +} + +async fn write_file( + path: &PathBuf, + ctx: &ButckContext, + params: &HashMap<&str, &str>, +) -> Result<(), ButckRWErrorKind> { + if let Some(stream_read_size) = ctx.stream_read { + write_file_stream(path, stream_read_size, ctx, params).await + } else { + write_file_simple(path, ctx, params).await + } +} + +pub fn get_index_file_name(path: &PathBuf, ctx: &ButckContext) -> PathBuf { + let output_file = if let Some(output_file) = &ctx.output_file { + return output_file.clone(); + } else { + ctx.output_dir.join(path.file_name().unwrap_or_default()) + }; + + // Append .bidx suffix directly to the original file name + let desired_filename = if let Some(ext) = output_file.extension() { + let ext_str = ext.to_string_lossy(); + if ext_str.is_empty() { + format!( + "{}.{}", + output_file + .file_stem() + .unwrap_or_default() + .to_string_lossy(), + BUTCK_INDEX_FILE_SUFFIX + ) + } else { + format!( + "{}.{}.{}", + output_file + .file_stem() + .unwrap_or_default() + .to_string_lossy(), + ext_str, + BUTCK_INDEX_FILE_SUFFIX + ) + } + } else { + format!( + "{}.{}", + output_file + .file_name() + .unwrap_or_default() + .to_string_lossy(), + BUTCK_INDEX_FILE_SUFFIX + ) + }; + + generate_unique_path(&ctx.output_dir, &desired_filename) +} diff --git a/src/chunker/rw/storage/write/simple.rs b/src/chunker/rw/storage/write/simple.rs new file mode 100644 index 0000000..75b9bd7 --- /dev/null +++ b/src/chunker/rw/storage/write/simple.rs @@ -0,0 +1,368 @@ +use futures::future::join_all; +use just_progress::progress; +use log::{error, info, trace}; +use std::{collections::HashMap, path::PathBuf}; +use tokio::{fs::File, io::AsyncReadExt}; + +use crate::{ + chunker::{ + context::ButckContext, + rw::{error::ButckRWErrorKind, storage}, + }, + core::hash::ChunkWriteHash, + storage::get_index_file_name, + utils::size_display::size_display, +}; + +pub async fn write_file_simple( + path: &PathBuf, + ctx: &ButckContext, + params: &HashMap<&str, &str>, +) -> Result<(), ButckRWErrorKind> { + read_file(path, ctx, params).await?; + Ok(()) +} + +async fn read_file( + path: &PathBuf, + ctx: &ButckContext, + params: &HashMap<&str, &str>, +) -> Result<(), ButckRWErrorKind> { + let mut file = File::open(path).await?; + + // Use butck_policies::chunk_with to locate chunk boundaries in the file + if ctx.memmap_read { + let mmap = unsafe { memmap2::Mmap::map(&file)? }; + let raw_data = &mmap[..]; + let (chunk_boundaries, total_bytes) = + (get_boundaries(raw_data, ctx, params).await?, raw_data.len()); + + // If output boundaries, do not execute actual write logic + if ctx.display_boundaries { + display_boundaries(&chunk_boundaries, total_bytes).await; + return Ok(()); + } else { + write_file_to_storage(path, ctx, chunk_boundaries, raw_data).await?; + } + } else { + let mut contents = Vec::new(); + file.read_to_end(&mut contents).await?; + let raw_data = &contents[..]; + let (chunk_boundaries, total_bytes) = + (get_boundaries(raw_data, ctx, params).await?, raw_data.len()); + + // If output boundaries, do not execute actual write logic + if ctx.display_boundaries { + display_boundaries(&chunk_boundaries, total_bytes).await; + return Ok(()); + } else { + write_file_to_storage(path, ctx, chunk_boundaries, raw_data).await?; + } + }; + progress::clear_all(); + Ok(()) +} + +async fn write_file_to_storage( + path: &PathBuf, + ctx: &ButckContext, + chunk_boundaries: Vec<u32>, + raw_data: &[u8], +) -> Result<(), ButckRWErrorKind> { + let output_index_file = get_index_file_name(path, ctx); + + let chunk_count = chunk_boundaries.len() + 1; + let progress_name = format!("Write `{}`", path.display()); + + progress::update_progress(progress_name.as_str(), 0.0); + let step = 1.0 / chunk_count as f64; + + trace!("chunks_count={}", chunk_count); + trace!("chunk_hash={:?}", ctx.chunk_hash); + trace!("file_size={}", raw_data.len()); + trace!("output_index_file={}", output_index_file.display()); + trace!("policy_name={:?}", ctx.policy_name); + trace!("storage_dir={}", ctx.output_dir.display()); + + info!( + "{} chunks will be written to {}", + chunk_count, + ctx.output_dir.display() + ); + + tokio::fs::create_dir_all(&ctx.output_dir).await?; + trace!("Output directory created or already exists"); + + let mut tasks = Vec::new(); + let mut start = 0; + let mut chunk_index = 0; + + trace!("Processing chunk boundaries:"); + + for &boundary in &chunk_boundaries { + let end = boundary as usize; + if start < end && end <= raw_data.len() { + let chunk_data = &raw_data[start..end]; + trace!( + "Chunk {}: bytes {}..{} (size: {} bytes)", + chunk_index, + start, + end - 1, + end - start + ); + tasks.push(write_chunk( + progress_name.as_str(), + step, + chunk_data, + &ctx.output_dir, + &ctx.chunk_hash, + chunk_index, + start, + end, + )); + chunk_index += 1; + } else { + trace!( + "Skipping invalid chunk boundary: start={}, end={}, data_len={}", + start, + end, + raw_data.len() + ); + } + start = end; + } + + if start < raw_data.len() { + let chunk_data = &raw_data[start..]; + trace!( + "Chunk {}: bytes {}..{} (size: {} bytes) - final chunk", + chunk_index, + start, + raw_data.len() - 1, + raw_data.len() - start + ); + tasks.push(write_chunk( + progress_name.as_str(), + step, + chunk_data, + &ctx.output_dir, + &ctx.chunk_hash, + chunk_index, + start, + raw_data.len(), + )); + } + + trace!("Total chunks prepared for writing: {}", tasks.len()); + + trace!("Starting parallel write of {} chunks", tasks.len()); + let results = join_all(tasks).await; + trace!("All write tasks completed"); + + let mut success_count = 0; + let mut chunk_infos = Vec::new(); + + for result in results { + match result { + Ok(chunk_info) => { + success_count += 1; + chunk_infos.push(chunk_info); + } + Err(e) => { + trace!("Chunk write failed: {:?}", e); + return Err(e); + } + } + } + + info!("All {} chunks written successfully", success_count); + + // Write index file + trace!("Writing index file to: {}", output_index_file.display()); + if let Err(e) = write_index_file(&output_index_file, &chunk_infos, path).await { + error!("Failed to write index file: {}", e); + return Err(ButckRWErrorKind::IOError(e)); + } + info!("Index file written to: {}", output_index_file.display()); + + trace!("write_file_to_storage completed successfully"); + + progress::complete(progress_name.as_str()); + + Ok(()) +} + +async fn write_chunk( + progress_name: &str, + step: f64, + chunk_data: &[u8], + output_dir: &PathBuf, + chunk_hash: &ChunkWriteHash, + chunk_index: usize, + start: usize, + end: usize, +) -> Result<crate::chunker::rw::storage::ChunkInfo, ButckRWErrorKind> { + trace!( + "write_chunk[{}]: Starting, data size: {} bytes", + chunk_index, + chunk_data.len() + ); + + trace!( + "write_chunk[{}]: Computing hash with algorithm: {:?}", + chunk_index, chunk_hash + ); + let hash_bytes = chunk_hash.hash(chunk_data); + trace!( + "write_chunk[{}]: Hash computed: {:?}", + chunk_index, hash_bytes + ); + + let hash_hex = hex::encode(hash_bytes); + trace!("write_chunk[{}]: Hash hex: {}", chunk_index, hash_hex); + + let file_path = storage::get_chunk_path(output_dir, &hash_hex); + + if let Some(parent_dir) = file_path.parent() { + trace!( + "write_chunk[{}]: Creating directory structure: {}", + chunk_index, + parent_dir.display() + ); + tokio::fs::create_dir_all(parent_dir).await?; + trace!("write_chunk[{}]: Directory created", chunk_index); + } + + trace!( + "write_chunk[{}]: File path: {}", + chunk_index, + file_path.display() + ); + + trace!( + "write_chunk[{}]: Writing {} bytes to file", + chunk_index, + chunk_data.len() + ); + if !file_path.exists() { + tokio::fs::write(&file_path, chunk_data).await?; + } else { + trace!( + "write_chunk[{}]: File already exists, skipping", + chunk_index + ); + } + trace!("write_chunk[{}]: File written successfully", chunk_index); + progress::increase(progress_name, step as f32); + Ok(crate::chunker::rw::storage::ChunkInfo { + index: chunk_index, + hash: hash_hex, + size: chunk_data.len(), + start, + end, + }) +} + +async fn get_boundaries<'a>( + raw_data: &[u8], + ctx: &ButckContext, + params: &HashMap<&str, &str>, +) -> Result<Vec<u32>, ButckRWErrorKind> { + let policy_name = ctx.policy_name.as_ref().unwrap().as_str(); + match butck_policies::chunk_with(policy_name, raw_data, params).await { + Ok(s) => Ok(s), + Err(e) => Err(ButckRWErrorKind::ChunkFailed(e)), + } +} + +async fn write_index_file( + index_path: &PathBuf, + chunk_infos: &[crate::chunker::rw::storage::ChunkInfo], + original_file_path: &PathBuf, +) -> Result<(), std::io::Error> { + use std::io::Write; + + let file = std::fs::File::create(index_path)?; + let mut writer = std::io::BufWriter::new(file); + + // Write header: [u8; 4] magic + [u16] filename length + [u8] filename bytes + use crate::chunker::constants::BUTCK_INDEX_MAGIC; + + // Write magic bytes + writer.write_all(&BUTCK_INDEX_MAGIC)?; + + // Get original filename as bytes + let filename = original_file_path + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown"); + let filename_bytes = filename.as_bytes(); + + // Write filename length as u16 (little-endian) + if filename_bytes.len() > u16::MAX as usize { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("Filename too long: {} bytes", filename_bytes.len()), + )); + } + let filename_len = filename_bytes.len() as u16; + writer.write_all(&filename_len.to_le_bytes())?; + + // Write filename bytes + writer.write_all(filename_bytes)?; + + // Write chunk hashes: [u8; 32][u8; 32][u8; 32]... + for chunk_info in chunk_infos { + // Convert hex hash to bytes + match hex::decode(&chunk_info.hash) { + Ok(hash_bytes) => { + if hash_bytes.len() == 32 { + writer.write_all(&hash_bytes)?; + } else { + // Pad or truncate to 32 bytes if needed + let mut fixed_hash = [0u8; 32]; + let len = hash_bytes.len().min(32); + fixed_hash[..len].copy_from_slice(&hash_bytes[..len]); + writer.write_all(&fixed_hash)?; + } + } + Err(e) => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Failed to decode hash hex: {}", e), + )); + } + } + } + + Ok(()) +} + +async fn display_boundaries(chunk_boundaries: &Vec<u32>, total_bytes: usize) { + let total_chunks = chunk_boundaries.len() + 1; + let (total_value, total_unit) = size_display(total_bytes); + info!( + "{} chunks, ({:.2} {}, {})", + total_chunks, total_value, total_unit, total_bytes + ); + let mut start = 0; + chunk_boundaries.iter().for_each(|p| { + let next = *p as usize; + let (size_value, size_unit) = size_display(next - start); + info!( + "{} - {} (size: {:.2} {})", + start, + next - 1, + size_value, + size_unit + ); + start = next; + }); + let last = start; + let r#final = total_bytes; + let (size_value, size_unit) = size_display(total_bytes - start); + info!( + "{} - {} (size: {:.2} {})", + last, r#final, size_value, size_unit + ); +} diff --git a/src/chunker/rw/storage/write/stream.rs b/src/chunker/rw/storage/write/stream.rs new file mode 100644 index 0000000..020cfcd --- /dev/null +++ b/src/chunker/rw/storage/write/stream.rs @@ -0,0 +1,12 @@ +use std::{collections::HashMap, path::PathBuf}; + +use crate::chunker::{context::ButckContext, rw::error::ButckRWErrorKind}; + +pub async fn write_file_stream( + path: &PathBuf, + stream_read_size: u32, + ctx: &ButckContext, + params: &HashMap<&str, &str>, +) -> Result<(), ButckRWErrorKind> { + todo!() +} diff --git a/src/core.rs b/src/core.rs new file mode 100644 index 0000000..ec5d33c --- /dev/null +++ b/src/core.rs @@ -0,0 +1 @@ +pub mod hash; diff --git a/src/core/hash.rs b/src/core/hash.rs new file mode 100644 index 0000000..36a62b3 --- /dev/null +++ b/src/core/hash.rs @@ -0,0 +1,38 @@ +use blake3::Hasher as Blake3Hasher; +use sha2::{Digest as Sha2Digest, Sha256}; + +const SALT: &[u8] = b"Dude@"; + +#[derive(Debug, Default)] +pub enum ChunkWriteHash { + #[default] + Blake3, + Sha256, +} + +impl ChunkWriteHash { + pub fn hash(&self, d: &[u8]) -> [u8; 32] { + match self { + ChunkWriteHash::Blake3 => hash_blake3(d), + ChunkWriteHash::Sha256 => hash_sha256(d), + } + } +} + +/// Compute the Blake3 hash of the data with a salt +/// Returns a 32-byte hash value +pub fn hash_blake3(d: &[u8]) -> [u8; 32] { + let mut hasher = Blake3Hasher::new(); + hasher.update(SALT); + hasher.update(d); + *hasher.finalize().as_bytes() +} + +/// Compute the SHA-256 hash of the data with a salt +/// Returns a 32-byte hash value +pub fn hash_sha256(d: &[u8]) -> [u8; 32] { + let mut hasher = Sha256::new(); + hasher.update(SALT); + hasher.update(d); + hasher.finalize().into() +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e4a55c2 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,11 @@ +pub mod chunker; +pub mod core; +pub mod log; +pub mod macros; +pub mod utils; + +pub mod storage { + pub use crate::chunker::rw::error::*; + pub use crate::chunker::rw::storage::build::*; + pub use crate::chunker::rw::storage::write::*; +} diff --git a/src/log.rs b/src/log.rs new file mode 100644 index 0000000..5fc6160 --- /dev/null +++ b/src/log.rs @@ -0,0 +1,33 @@ +use env_logger::Builder; +use log::Level; +use std::io::Write; + +pub fn init_logger(level_filter: Option<log::LevelFilter>) { + let mut builder = match level_filter { + Some(f) => { + let mut b = Builder::new(); + b.filter_level(f); + b + } + None => return, + }; + + builder + .format(|buf, record| { + let level = record.level(); + let args = record.args(); + + let (prefix, color_code) = match level { + Level::Error => ("error: ", "\x1b[1;31m"), + Level::Warn => ("warn: ", "\x1b[1;33m"), + Level::Info => ("", "\x1b[37m"), + Level::Debug => ("debug: ", "\x1b[90m"), + Level::Trace => ("trace: ", "\x1b[36m"), + }; + + let colored_prefix = format!("{}{}\x1b[0m", color_code, prefix); + + writeln!(buf, "{}{}", colored_prefix, args) + }) + .init(); +} diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 0000000..11b8da4 --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,47 @@ +#[macro_export] +macro_rules! special_flag { + ($args:expr, $($flag:expr),+) => {{ + let mut found = false; + $( + let flag = $flag; + if $args.iter().any(|arg| arg == flag) { + found = true; + } + $args.retain(|arg| arg != flag); + )+ + found + }}; +} + +#[macro_export] +macro_rules! special_argument { + ($args:expr, $($flag:expr),+) => {{ + let mut value: Option<String> = None; + let mut found = false; + $( + let flag = $flag; + if !found { + let mut i = 0; + while i < $args.len() { + if $args[i] == flag { + if i + 1 < $args.len() { + value = Some($args[i + 1].clone()); + $args.remove(i + 1); + $args.remove(i); + } else { + value = None; + $args.remove(i); + } + #[allow(unused_assignments)] + { + found = true; + } + break; + } + i += 1; + } + } + )+ + value + }}; +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..b64c0c4 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,2 @@ +pub mod file_input_solve; +pub mod size_display; diff --git a/src/utils/file_input_solve.rs b/src/utils/file_input_solve.rs new file mode 100644 index 0000000..30d5765 --- /dev/null +++ b/src/utils/file_input_solve.rs @@ -0,0 +1,82 @@ +use std::{ + env::current_dir, + path::{Path, PathBuf}, +}; + +use just_fmt::fmt_path::fmt_path; + +pub fn parse_path_input( + files: Vec<String>, + recursive: bool, + exclude_dir: Vec<&str>, +) -> Vec<PathBuf> { + let current_dir = current_dir().unwrap(); + let files = if recursive { + let mut result: Vec<PathBuf> = Vec::new(); + for arg in files.iter().skip(1) { + if exclude_dir.contains(&arg.as_str()) { + continue; + } + let path = current_dir.join(arg); + if path.is_dir() { + if let Err(e) = collect_files_recursively(&path, &mut result) { + eprintln!("Error collecting files recursively: {}", e); + continue; + } + } else { + result.push(path); + } + } + result + } else { + let mut result = Vec::new(); + for arg in files.iter().skip(1) { + if exclude_dir.contains(&arg.as_str()) { + continue; + } + let path = current_dir.join(arg); + if path.is_dir() { + if files.len() == 2 { + for entry in std::fs::read_dir(&path) + .unwrap_or_else(|e| { + eprintln!("Error reading directory: {}", e); + std::fs::read_dir(".").unwrap() + }) + .flatten() + { + let entry_path = entry.path(); + if !entry_path.is_dir() { + result.push(entry_path); + } + } + } + } else { + result.push(path); + } + } + result + }; + files + .into_iter() + .filter_map(|path| match fmt_path(path) { + Ok(formatted_path) => Some(formatted_path), + Err(e) => { + eprintln!("Error formatting path: {}", e); + None + } + }) + .collect() +} + +fn collect_files_recursively(dir: &Path, files: &mut Vec<PathBuf>) -> std::io::Result<()> { + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_dir() { + collect_files_recursively(&path, files)?; + } else { + files.push(path); + } + } + Ok(()) +} diff --git a/src/utils/size_display.rs b/src/utils/size_display.rs new file mode 100644 index 0000000..3e2bc29 --- /dev/null +++ b/src/utils/size_display.rs @@ -0,0 +1,14 @@ +pub fn size_display<'a>(total_bytes: usize) -> (f64, &'a str) { + let total_bytes = total_bytes as f64; + if total_bytes >= 1024.0 * 1024.0 * 1024.0 * 1024.0 { + (total_bytes / (1024.0 * 1024.0 * 1024.0 * 1024.0), "TB") + } else if total_bytes >= 1024.0 * 1024.0 * 1024.0 { + (total_bytes / (1024.0 * 1024.0 * 1024.0), "GB") + } else if total_bytes >= 1024.0 * 1024.0 { + (total_bytes / (1024.0 * 1024.0), "MB") + } else if total_bytes >= 1024.0 { + (total_bytes / 1024.0, "KB") + } else { + (total_bytes, "B") + } +} |
