tmc_langs_plugins/
compression.rs

1//! Contains functions for compressing and uncompressing projects.
2
3use blake3::Hash;
4use std::{
5    io::{Cursor, Read, Seek},
6    path::{Path, PathBuf},
7};
8use tmc_langs_framework::{ArchiveBuilder, Compression, StudentFilePolicy, TmcError};
9use tmc_langs_util::file_util;
10use walkdir::{DirEntry, WalkDir};
11use zip::ZipArchive;
12pub use zip::result::ZipError;
13
14/// Compresses the given directory, only including student files according to the given policy.
15pub fn compress_student_files(
16    policy: &dyn StudentFilePolicy,
17    root_directory: &Path,
18    compression: Compression,
19    deterministic: bool,
20    hash: bool,
21    size_limit_mb: u32,
22) -> Result<(Vec<u8>, Option<Hash>), TmcError> {
23    let mut writer = ArchiveBuilder::new(
24        Cursor::new(vec![]),
25        compression,
26        Some(size_limit_mb),
27        deterministic,
28        hash,
29    );
30    let size_limit_b = usize::try_from(size_limit_mb)
31        .unwrap_or(usize::MAX) // saturating from...
32        .saturating_mul(1000 * 1000);
33    let mut total_size_b = 0;
34
35    for entry in WalkDir::new(root_directory)
36        .sort_by(|a, b| a.path().cmp(b.path()))
37        .into_iter()
38        .filter_entry(|e| !contains_tmcnosubmit(e))
39        .filter_map(|e| e.ok())
40    {
41        let relative = entry
42            .path()
43            .strip_prefix(root_directory)
44            .expect("all entries are inside root");
45        log::trace!(
46            "processing {} ({})",
47            entry.path().display(),
48            relative.display()
49        );
50        if policy.is_student_file(relative) {
51            let path = root_directory
52                .parent()
53                .map(|p| {
54                    entry
55                        .path()
56                        .strip_prefix(p)
57                        .expect("entries are inside root_directory")
58                })
59                .unwrap_or_else(|| entry.path());
60            if entry.path().is_dir() {
61                let path_in_archive = path_to_zip_compatible_string(path);
62                writer.add_directory(entry.path(), &path_in_archive)?;
63            } else {
64                let contents = file_util::read_file(entry.path())?;
65                total_size_b += contents.len();
66                if total_size_b > size_limit_b {
67                    return Err(TmcError::ArchiveSizeLimitExceeded {
68                        limit: size_limit_mb,
69                    });
70                }
71                let path_in_archive = path_to_zip_compatible_string(path);
72                writer.add_file(entry.path(), &path_in_archive)?;
73            }
74        }
75    }
76    let (cursor, hash) = writer.finish()?;
77    let size_limit_b = usize::try_from(size_limit_mb)
78        .unwrap_or(usize::MAX)
79        .saturating_mul(1000 * 1000);
80    if cursor.get_ref().len() > size_limit_b {
81        return Err(TmcError::ArchiveSizeLimitExceeded {
82            limit: size_limit_mb,
83        });
84    }
85    let data = cursor.into_inner();
86    Ok((data, hash))
87}
88
89// ensures the / separator is used
90fn path_to_zip_compatible_string(path: &Path) -> String {
91    let mut string = String::new();
92    for component in path.components() {
93        if !string.is_empty() {
94            string.push('/');
95        }
96        string.push_str(component.as_os_str().to_string_lossy().as_ref());
97    }
98    string
99}
100
101/// Finds a project directory in the given zip and unzips it according to the given student policy. Also cleans unnecessary non-student files.
102pub fn unzip(zip: impl std::io::Read + std::io::Seek, target: &Path) -> Result<(), TmcError> {
103    log::debug!("Unzipping to {}", target.display());
104
105    let mut zip_archive = ZipArchive::new(zip)?;
106
107    let project_dir = find_project_dir(&mut zip_archive)?;
108    log::debug!("Project dir in zip: {}", project_dir.display());
109
110    for i in 0..zip_archive.len() {
111        let mut file = zip_archive.by_index(i)?;
112        let file_path = PathBuf::from(file.name());
113        let relative = match file_path.strip_prefix(&project_dir) {
114            Ok(relative) => relative,
115            _ => {
116                log::trace!("skip {}, not in project dir", file.name());
117                continue;
118            }
119        };
120        let path_in_target = target.join(relative);
121        log::trace!("processing {file_path:?} -> {path_in_target:?}");
122
123        if file.is_dir() {
124            log::trace!("creating {path_in_target:?}");
125            file_util::create_dir_all(&path_in_target)?;
126        } else {
127            log::trace!("writing to {}", path_in_target.display());
128            if let Some(parent) = path_in_target.parent() {
129                file_util::create_dir_all(parent)?;
130            }
131            file_util::read_to_file(&mut file, path_in_target)?;
132        }
133    }
134
135    Ok(())
136}
137
138// TODO: make more robust, use language plugins?
139fn find_project_dir<R: Read + Seek>(zip_archive: &mut ZipArchive<R>) -> Result<PathBuf, TmcError> {
140    let mut lowest_ipynb_dir = None::<PathBuf>;
141
142    for i in 0..zip_archive.len() {
143        let file = zip_archive.by_index(i)?;
144        let file_path = Path::new(file.name());
145
146        // directories may not have entries in the zip, e.g. it may only have
147        // exercise/src/main... without an entry for src, so we need to check
148        // the path components to find src
149        let mut components = file_path.components().peekable();
150        let mut collected = vec![];
151        while let Some(component) = components.next() {
152            if components.peek().is_none() {
153                // do not inspect the last component,
154                // they will have an entry that is
155                // processed in the next step
156                break;
157            }
158
159            let comp = component.as_os_str();
160            if comp == "nbproject" || comp == "src" || comp == "test" {
161                let path: PathBuf = collected.into_iter().collect();
162                return Ok(path);
163            }
164            collected.push(comp);
165        }
166
167        let file_name = file_path.file_name().unwrap_or_default();
168        if file.is_dir() && (file_name == "nbproject" || file_name == "src" || file_name == "test")
169            || file.is_file()
170                && (file_name == "pom.xml" || file_name == ".idea" || file_name == "Makefile")
171        {
172            let parent = file_path.parent().unwrap_or_else(|| Path::new(""));
173            log::debug!("found project dir {}", parent.display());
174            return Ok(parent.to_path_buf());
175        }
176
177        if file_path
178            .extension()
179            .map(|ext| ext == "ipynb")
180            .unwrap_or_default()
181        {
182            let parent = file_path.parent().unwrap_or_else(|| Path::new(""));
183            if let Some(lowest_ipynb_dir) = lowest_ipynb_dir.as_mut() {
184                if lowest_ipynb_dir.components().count() > parent.components().count() {
185                    *lowest_ipynb_dir = parent.to_path_buf();
186                }
187            } else {
188                lowest_ipynb_dir = Some(parent.to_path_buf());
189            }
190        }
191    }
192    if let Some(lowest_ipynb_dir) = lowest_ipynb_dir {
193        Ok(lowest_ipynb_dir)
194    } else {
195        Err(TmcError::NoProjectDirInArchive)
196    }
197}
198
199fn contains_tmcnosubmit(entry: &DirEntry) -> bool {
200    for entry in WalkDir::new(entry.path())
201        .max_depth(1)
202        .into_iter()
203        .filter_map(|e| e.ok())
204    {
205        if entry.file_name() == ".tmcnosubmit" {
206            log::debug!("contains .tmcnosubmit: {}", entry.path().display());
207            return true;
208        }
209    }
210    false
211}
212
213#[cfg(test)]
214#[allow(clippy::unwrap_used)]
215mod test {
216    use super::*;
217    use std::{
218        collections::HashSet,
219        fs::{self, *},
220    };
221    use tempfile::tempdir;
222    use tmc_langs_framework::{EverythingIsStudentFilePolicy, TmcProjectYml};
223
224    fn init() {
225        use log::*;
226        use simple_logger::*;
227        let _ = SimpleLogger::new().with_level(LevelFilter::Debug).init();
228    }
229
230    fn get_relative_file_paths(dir: &Path) -> HashSet<PathBuf> {
231        WalkDir::new(dir)
232            .into_iter()
233            .map(|e| e.unwrap())
234            .map(|e| e.into_path())
235            .filter(|e| e.is_file())
236            .map(|e| e.strip_prefix(dir).unwrap().to_path_buf())
237            .collect()
238    }
239
240    #[test]
241    fn zips() {
242        init();
243
244        let temp = tempdir().unwrap();
245        let student_file_path = temp
246            .path()
247            .join("exercise-name/src/main/java/AdaLovelace.java");
248        let missing_file_path = temp.path().join("exercise-name/pom.xml");
249        fs::create_dir_all(student_file_path.parent().unwrap()).unwrap();
250        File::create(student_file_path).unwrap();
251        fs::create_dir_all(missing_file_path.parent().unwrap()).unwrap();
252        File::create(missing_file_path).unwrap();
253
254        let path = temp.path().join("exercise-name");
255        let tmcprojectyml = TmcProjectYml::load_or_default(&path).unwrap();
256        let (zipped, _hash) = compress_student_files(
257            &EverythingIsStudentFilePolicy::new(&path).unwrap(),
258            &path,
259            Compression::Zip,
260            true,
261            false,
262            tmcprojectyml.get_submission_size_limit_mb(),
263        )
264        .unwrap();
265        let mut archive = ZipArchive::new(Cursor::new(zipped)).unwrap();
266        assert!(!archive.is_empty());
267        for i in 0..archive.len() {
268            log::debug!("{:?}", archive.by_index(i).unwrap().name());
269        }
270        assert!(
271            archive
272                .by_name("exercise-name/src/main/java/AdaLovelace.java")
273                .is_ok()
274        );
275        assert!(archive.by_name("exercise-name/pom.xml").is_ok());
276    }
277
278    #[test]
279    fn unzips_simple() {
280        init();
281
282        let temp = tempdir().unwrap();
283        let zip = file_util::open_file("tests/data/zip/module-trivial.zip").unwrap();
284        unzip(zip, temp.path()).unwrap();
285
286        let expected = get_relative_file_paths(Path::new("tests/data/zip/module-trivial"));
287        let actual = get_relative_file_paths(temp.path());
288        assert_eq!(expected, actual)
289    }
290
291    #[test]
292    fn unzips_complex() {
293        init();
294
295        let temp = tempdir().unwrap();
296        let zip = file_util::open_file("tests/data/zip/course-module-trivial.zip").unwrap();
297        unzip(zip, temp.path()).unwrap();
298
299        let expected = get_relative_file_paths(Path::new("tests/data/zip/module-trivial"));
300        let actual = get_relative_file_paths(temp.path());
301        assert_eq!(expected, actual)
302    }
303
304    #[test]
305    fn no_src_entry() {
306        init();
307
308        let temp = tempdir().unwrap();
309        let zip = file_util::open_file("tests/data/zip/no-src-entry.zip").unwrap();
310        unzip(zip, temp.path()).unwrap();
311        assert!(temp.path().join("src").exists());
312    }
313
314    #[cfg(windows)]
315    #[test]
316    fn windows_paths_get_converted() {
317        let zipped = file_util::read_file("tests/data/zip/compressed.zip").unwrap();
318        let mut ziparch = ZipArchive::new(Cursor::new(zipped)).unwrap();
319        assert!(ziparch.len() > 0);
320        for i in 0..ziparch.len() {
321            let file = ziparch.by_index(i).unwrap();
322            assert!(file.name().chars().find(|c| c == &'\\').is_none())
323        }
324    }
325}