1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
use sha1::{Digest, Sha1};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use tokio::fs::File;
use tokio::io::{AsyncReadExt, BufReader};
use tokio::task;
#[derive(Debug, Clone)]
pub struct Sha1Result {
pub file_path: PathBuf,
pub hash: String,
}
/// Calc SHA1 hash of a single file
pub async fn calc_sha1<P: AsRef<Path>>(
path: P,
buffer_size: usize,
) -> Result<Sha1Result, Box<dyn std::error::Error + Send + Sync>> {
let file_path = path.as_ref().to_string_lossy().to_string();
// Open file asynchronously
let file = File::open(&path).await?;
let mut reader = BufReader::with_capacity(buffer_size, file);
let mut hasher = Sha1::new();
let mut buffer = vec![0u8; buffer_size];
// Read file in chunks and update hash asynchronously
loop {
let n = reader.read(&mut buffer).await?;
if n == 0 {
break;
}
hasher.update(&buffer[..n]);
}
let hash_result = hasher.finalize();
// Convert to hex string
let hash_hex = hash_result
.iter()
.map(|b| format!("{:02x}", b))
.collect::<String>();
Ok(Sha1Result {
file_path: file_path.into(),
hash: hash_hex,
})
}
/// Calc SHA1 hashes for multiple files using multi-threading
pub async fn calc_sha1_multi<P, I>(
paths: I,
buffer_size: usize,
) -> Result<Vec<Sha1Result>, Box<dyn std::error::Error + Send + Sync>>
where
P: AsRef<Path> + Send + Sync + 'static,
I: IntoIterator<Item = P>,
{
let buffer_size = Arc::new(buffer_size);
// Collect all file paths
let file_paths: Vec<P> = paths.into_iter().collect();
if file_paths.is_empty() {
return Ok(Vec::new());
}
// Create tasks for each file
let tasks: Vec<_> = file_paths
.into_iter()
.map(|path| {
let buffer_size = Arc::clone(&buffer_size);
task::spawn(async move { calc_sha1(path, *buffer_size).await })
})
.collect();
// Execute tasks with concurrency limit using join_all
let results: Vec<Result<Sha1Result, Box<dyn std::error::Error + Send + Sync>>> =
futures::future::join_all(tasks)
.await
.into_iter()
.map(|task_result| match task_result {
Ok(Ok(calc_result)) => Ok(calc_result),
Ok(Err(e)) => Err(e),
Err(e) => Err(Box::new(e) as Box<dyn std::error::Error + Send + Sync>),
})
.collect();
// Check for any errors and collect successful results
let mut successful_results = Vec::new();
for result in results {
match result {
Ok(success) => successful_results.push(success),
Err(e) => return Err(e),
}
}
Ok(successful_results)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[tokio::test]
async fn test_sha1_accuracy() {
// Test file path relative to the crate root
let test_file_path = "res/story.txt";
let expected_hash_path = "res/story.sha1";
// Calculate SHA1 hash
let result = calc_sha1(test_file_path, 8192)
.await
.expect("Failed to calculate SHA1");
// Read expected hash from file
let expected_hash = fs::read_to_string(expected_hash_path)
.expect("Failed to read expected hash file")
.trim()
.to_string();
// Verify the calculated hash matches expected hash
assert_eq!(
result.hash, expected_hash,
"SHA1 hash mismatch for test file"
);
println!("Test file: {}", result.file_path.display());
println!("Calculated hash: {}", result.hash);
println!("Expected hash: {}", expected_hash);
}
#[tokio::test]
async fn test_sha1_empty_file() {
// Create a temporary empty file for testing
let temp_file = "test_empty.txt";
fs::write(temp_file, "").expect("Failed to create empty test file");
let result = calc_sha1(temp_file, 4096)
.await
.expect("Failed to calculate SHA1 for empty file");
// SHA1 of empty string is "da39a3ee5e6b4b0d3255bfef95601890afd80709"
let expected_empty_hash = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
assert_eq!(
result.hash, expected_empty_hash,
"SHA1 hash mismatch for empty file"
);
// Clean up
fs::remove_file(temp_file).expect("Failed to remove temporary test file");
}
#[tokio::test]
async fn test_sha1_simple_text() {
// Create a temporary file with simple text
let temp_file = "test_simple.txt";
let test_content = "Hello, SHA1!";
fs::write(temp_file, test_content).expect("Failed to create simple test file");
let result = calc_sha1(temp_file, 4096)
.await
.expect("Failed to calculate SHA1 for simple text");
// Note: This test just verifies that the function works without errors
// The actual hash value is not critical for this test
println!("Simple text test - Calculated hash: {}", result.hash);
// Clean up
fs::remove_file(temp_file).expect("Failed to remove temporary test file");
}
#[tokio::test]
async fn test_sha1_multi_files() {
// Test multiple files calculation
let test_files = vec!["res/story.txt"];
let results = calc_sha1_multi(test_files, 8192)
.await
.expect("Failed to calculate SHA1 for multiple files");
assert_eq!(results.len(), 1, "Should have calculated hash for 1 file");
// Read expected hash from file
let expected_hash = fs::read_to_string("res/story.sha1")
.expect("Failed to read expected hash file")
.trim()
.to_string();
assert_eq!(
results[0].hash, expected_hash,
"SHA1 hash mismatch in multi-file test"
);
}
}
|