headless_lms_utils/
folder_checksum.rs

1/*!
2Provides functionality for checksumming directory trees.
3
4This is meant for checking whether a exercise in a exercise template repository
5has changed after a refresh.
6*/
7
8#[cfg(unix)]
9use std::os::unix::fs::PermissionsExt;
10use std::{fs::Permissions, path::Path};
11
12use crate::prelude::*;
13use blake3::Hash;
14use futures::StreamExt;
15use tokio::{fs::File, io::BufReader};
16use tokio_util::io::ReaderStream;
17use walkdir::WalkDir;
18
19/**
20Recursively hashes a folder returning a checksum.
21
22The hashing function takes into account all files, folders, and symlinks. All
23contents, the file type, and the unix file mode are included as well.
24
25Please note that if you have very large files in the directory, the running time
26might take a while since reading a lot of data from disk is not fast.
27*/
28pub async fn hash_folder(root_path: &Path) -> UtilResult<Hash> {
29    // Blake3 hasher lets us build the hash incrementally, avoiding the need to load everything to be hashed to memory at once.
30    let mut hasher = blake3::Hasher::new();
31
32    let walker = WalkDir::new(root_path)
33        // Instead of following a symbolic link, we just hash the link itself.
34        // Following the link would lead to recursion problems.
35        .follow_links(false)
36        .max_open(10)
37        .contents_first(false)
38        // Paths are sorted to make sure we process them in the same order on all systems.
39        .sort_by_file_name();
40    // Note that the filesystem operations used for walking the tree are executed synchronously.
41    // This is because the walkdir library has no async support at the moment.
42    for entry in walker {
43        let entry = entry?;
44        let metadata = entry.metadata()?;
45        let file_type = metadata.file_type();
46        let permissions = metadata.permissions();
47        let full_path = entry.path();
48
49        // Metadata to be included in the hash
50        let directory = file_type.is_dir();
51        let file = file_type.is_file();
52        let symlink = file_type.is_symlink();
53        let permissions_mode = determine_permissions_mode_for_hashing(&permissions);
54        let relative_path = full_path.strip_prefix(root_path)?;
55
56        // The extra separators at the end and at the beginning are to prevent
57        // accidental collisions in the hashed string.
58        let serialized_metadata = format!(
59            "-{}{}{}{}{:?}-",
60            directory as u8, file as u8, symlink as u8, &permissions_mode, &relative_path
61        );
62
63        hasher.update(serialized_metadata.as_bytes());
64
65        if file {
66            let file = File::open(full_path).await?;
67            // We read the file contents with a bufreader so that handling really big files wouldn't cause us problems.
68            let reader = BufReader::new(file);
69            let mut stream = ReaderStream::new(reader);
70            while let Some(chunk) = stream.next().await {
71                hasher.update(&chunk?);
72            }
73        }
74        if symlink {
75            let res = tokio::fs::read_link(full_path).await?;
76            // Only relative links would work here since the files should origitate from a remote repository
77            // so relativizing this would be pointless.
78            hasher.update(res.display().to_string().as_bytes());
79        }
80    }
81    let hash = hasher.finalize();
82    Ok(hash)
83}
84
85fn determine_permissions_mode_for_hashing(permissions: &Permissions) -> u32 {
86    if cfg!(unix) {
87        return permissions.mode();
88    }
89    // Default implementation for mostly windows that has no unix like modes.
90    // Another approach here could be to pull the file mode from git.
91    if permissions.readonly() { 0o444 } else { 0o644 }
92}
93
94#[cfg(test)]
95mod tests {
96    use tempdir::TempDir;
97    use tokio::{
98        fs::{self, create_dir, remove_dir, symlink},
99        io::AsyncWriteExt,
100    };
101
102    use super::*;
103
104    async fn do_the_test() {
105        let dir = TempDir::new("test-folder-checksum").expect("Failed to create a temp dir");
106        File::open(dir.path())
107            .await
108            .unwrap()
109            .set_permissions(Permissions::from_mode(0o755))
110            .await
111            .unwrap();
112        let first_hash = hash_folder(dir.path()).await.unwrap();
113        assert_eq!(
114            first_hash.to_hex().to_string(),
115            "01444ae9678097d0214e449568b68eb351c4743b2697bfc3d517b5c601535823"
116        );
117        let mut file = File::create(dir.path().join("test-file")).await.unwrap();
118        file.set_permissions(Permissions::from_mode(0o644))
119            .await
120            .unwrap();
121        file.write_all(b"Test file").await.unwrap();
122
123        let second_hash = hash_folder(dir.path()).await.unwrap();
124
125        assert_eq!(
126            second_hash.to_hex().to_string(),
127            "c2f4caaaafeb41dfd5e5381ea9c1583ccaa7d09378745def8c979b1e1f0e5c2a"
128        );
129
130        fs::set_permissions(dir.path().join("test-file"), Permissions::from_mode(0o444))
131            .await
132            .unwrap();
133
134        let third_hash = hash_folder(dir.path()).await.unwrap();
135
136        assert_eq!(
137            third_hash.to_hex().to_string(),
138            "1b1820abcb400974e0eb751c103303864f7b0ae7ad387c5135521d9968dbb4de"
139        );
140
141        let inner_dir_path = dir.path().join("directory");
142        create_dir(&inner_dir_path).await.unwrap();
143        File::open(inner_dir_path)
144            .await
145            .unwrap()
146            .set_permissions(Permissions::from_mode(0o755))
147            .await
148            .unwrap();
149
150        let fourth_hash = hash_folder(dir.path()).await.unwrap();
151
152        assert_eq!(
153            fourth_hash.to_hex().to_string(),
154            "f1113337a98c5fe5d7ed0f2a9fc17490993b1149ea44784a027e53d1a1884c9e"
155        );
156
157        remove_dir(&dir.path().join("directory")).await.unwrap();
158
159        let fifth_hash = hash_folder(dir.path()).await.unwrap();
160
161        assert_eq!(
162            fifth_hash.to_hex().to_string(),
163            "1b1820abcb400974e0eb751c103303864f7b0ae7ad387c5135521d9968dbb4de"
164        );
165
166        // Should not have the same checksum as with the directory created before
167        let file = File::create(&dir.path().join("directory")).await.unwrap();
168        file.set_permissions(Permissions::from_mode(0o755))
169            .await
170            .unwrap();
171        let sixth_hash = hash_folder(dir.path()).await.unwrap();
172
173        // Tells if we can tell folders apart from files
174        assert_ne!(
175            fifth_hash.to_hex().to_string(),
176            sixth_hash.to_hex().to_string()
177        );
178        assert_eq!(
179            sixth_hash.to_hex().to_string(),
180            "4b9255096a4b233be4a24b0fb74fa5e955a0261a422c8e9cfbe7ac11f1256030"
181        );
182        let symlink_path = &dir.path().join("symlink");
183        symlink(Path::new("directory"), &symlink_path)
184            .await
185            .unwrap();
186        File::open(symlink_path)
187            .await
188            .unwrap()
189            .set_permissions(Permissions::from_mode(0o644))
190            .await
191            .unwrap();
192
193        let seventh_hash = hash_folder(dir.path()).await.unwrap();
194        assert_eq!(
195            seventh_hash.to_hex().to_string(),
196            "5144015ff90807ec6448a0b6bfcc470de495182441e0af019c6483da8edaa05c"
197        );
198    }
199
200    #[cfg(not(target_os = "windows"))]
201    #[tokio::test]
202    async fn it_works() {
203        let res = std::panic::catch_unwind(|| {
204            futures::executor::block_on(do_the_test());
205        });
206        if res.is_ok() {
207            return;
208        }
209
210        warn!(
211            "First attempt at the folder checksum test failed. Retrying in case there was a file corruption issue on this machine."
212        );
213        do_the_test().await;
214    }
215}