1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
//! Bidx (Butchunker Index) file format utilities
//!
//! The bidx file format:
//! - Magic number: [u8; 4] = b"G00d"
//! - Original filename length: u16 (little-endian)
//! - Original filename: [u8] (UTF-8, no null terminator)
//! - Chunk hashes: [u8; 32][u8; 32][u8; 32]... (binary hashes, not hex strings)
use std::io::{self, Write};
use std::path::Path;
use crate::chunker::constants::BUTCK_INDEX_MAGIC;
use crate::chunker::rw::storage::ChunkInfo;
/// Write a bidx index file
pub fn write_bidx_file(
index_path: &Path,
chunk_infos: &[ChunkInfo],
original_file_path: &Path,
) -> io::Result<()> {
let file = std::fs::File::create(index_path)?;
let mut writer = io::BufWriter::new(file);
// Magic bytes
writer.write_all(&BUTCK_INDEX_MAGIC)?;
// Get original filename
let filename = original_file_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
let filename_bytes = filename.as_bytes();
// Validate filename length
if filename_bytes.len() > u16::MAX as usize {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("Filename too long: {} bytes", filename_bytes.len()),
));
}
// Write filename length as u16
let filename_len = filename_bytes.len() as u16;
writer.write_all(&filename_len.to_le_bytes())?;
// Write filename bytes
writer.write_all(filename_bytes)?;
// Write chunk hashes
for chunk_info in chunk_infos {
// Convert hex hash to 32-byte binary representation
let hash_bytes = match hex::decode(&chunk_info.hash) {
Ok(bytes) => bytes,
Err(e) => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Failed to decode hash hex '{}': {}", chunk_info.hash, e),
));
}
};
// Ensure hash is exactly 32 bytes
if hash_bytes.len() != 32 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Hash must be 32 bytes, got {} bytes", hash_bytes.len()),
));
}
// Write hash
writer.write_all(&hash_bytes)?;
}
writer.flush()?;
Ok(())
}
/// Read a bidx index file
pub fn read_bidx_file(index_path: &Path) -> io::Result<(String, Vec<ChunkInfo>)> {
use std::io::Read;
let mut file = std::fs::File::open(index_path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
if buffer.len() < 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"File too short to contain magic number",
));
}
// Check magic number
if &buffer[0..4] != BUTCK_INDEX_MAGIC {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid magic number",
));
}
let mut offset = 4;
// Read filename length
if offset + 2 > buffer.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"File too short to contain filename length",
));
}
let filename_len = u16::from_le_bytes([buffer[offset], buffer[offset + 1]]) as usize;
offset += 2;
// Read filename
if offset + filename_len > buffer.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"File too short to contain filename",
));
}
let filename_bytes = &buffer[offset..offset + filename_len];
let filename = String::from_utf8(filename_bytes.to_vec()).map_err(|e| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Filename is not valid UTF-8: {}", e),
)
})?;
offset += filename_len;
// Read chunk hashes
let mut chunk_infos = Vec::new();
let hash_size = 32;
while offset + hash_size <= buffer.len() {
// Read hash
let hash_bytes = &buffer[offset..offset + hash_size];
let hash = hex::encode(hash_bytes);
offset += hash_size;
chunk_infos.push(ChunkInfo {
index: chunk_infos.len(),
hash,
});
}
// Check if we read exactly all data
if offset != buffer.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"File contains {} extra bytes after chunk hashes",
buffer.len() - offset
),
));
}
Ok((filename, chunk_infos))
}
|