tmc_langs_plugins/
compression.rs1use blake3::Hash;
4use std::{
5 io::{Cursor, Read, Seek},
6 path::{Path, PathBuf},
7};
8use tmc_langs_framework::{ArchiveBuilder, Compression, StudentFilePolicy, TmcError};
9use tmc_langs_util::file_util;
10use walkdir::{DirEntry, WalkDir};
11use zip::ZipArchive;
12pub use zip::result::ZipError;
13
14pub fn compress_student_files(
16 policy: &dyn StudentFilePolicy,
17 root_directory: &Path,
18 compression: Compression,
19 deterministic: bool,
20 hash: bool,
21 size_limit_mb: u32,
22) -> Result<(Vec<u8>, Option<Hash>), TmcError> {
23 let mut writer = ArchiveBuilder::new(
24 Cursor::new(vec![]),
25 compression,
26 Some(size_limit_mb),
27 deterministic,
28 hash,
29 );
30 let size_limit_b = usize::try_from(size_limit_mb)
31 .unwrap_or(usize::MAX) .saturating_mul(1000 * 1000);
33 let mut total_size_b = 0;
34
35 for entry in WalkDir::new(root_directory)
36 .sort_by(|a, b| a.path().cmp(b.path()))
37 .into_iter()
38 .filter_entry(|e| !contains_tmcnosubmit(e))
39 .filter_map(|e| e.ok())
40 {
41 let relative = entry
42 .path()
43 .strip_prefix(root_directory)
44 .expect("all entries are inside root");
45 log::trace!(
46 "processing {} ({})",
47 entry.path().display(),
48 relative.display()
49 );
50 if policy.is_student_file(relative) {
51 let path = root_directory
52 .parent()
53 .map(|p| {
54 entry
55 .path()
56 .strip_prefix(p)
57 .expect("entries are inside root_directory")
58 })
59 .unwrap_or_else(|| entry.path());
60 if entry.path().is_dir() {
61 let path_in_archive = path_to_zip_compatible_string(path);
62 writer.add_directory(entry.path(), &path_in_archive)?;
63 } else {
64 let contents = file_util::read_file(entry.path())?;
65 total_size_b += contents.len();
66 if total_size_b > size_limit_b {
67 return Err(TmcError::ArchiveSizeLimitExceeded {
68 limit: size_limit_mb,
69 });
70 }
71 let path_in_archive = path_to_zip_compatible_string(path);
72 writer.add_file(entry.path(), &path_in_archive)?;
73 }
74 }
75 }
76 let (cursor, hash) = writer.finish()?;
77 let size_limit_b = usize::try_from(size_limit_mb)
78 .unwrap_or(usize::MAX)
79 .saturating_mul(1000 * 1000);
80 if cursor.get_ref().len() > size_limit_b {
81 return Err(TmcError::ArchiveSizeLimitExceeded {
82 limit: size_limit_mb,
83 });
84 }
85 let data = cursor.into_inner();
86 Ok((data, hash))
87}
88
89fn path_to_zip_compatible_string(path: &Path) -> String {
91 let mut string = String::new();
92 for component in path.components() {
93 if !string.is_empty() {
94 string.push('/');
95 }
96 string.push_str(component.as_os_str().to_string_lossy().as_ref());
97 }
98 string
99}
100
101pub fn unzip(zip: impl std::io::Read + std::io::Seek, target: &Path) -> Result<(), TmcError> {
103 log::debug!("Unzipping to {}", target.display());
104
105 let mut zip_archive = ZipArchive::new(zip)?;
106
107 let project_dir = find_project_dir(&mut zip_archive)?;
108 log::debug!("Project dir in zip: {}", project_dir.display());
109
110 for i in 0..zip_archive.len() {
111 let mut file = zip_archive.by_index(i)?;
112 let file_path = PathBuf::from(file.name());
113 let relative = match file_path.strip_prefix(&project_dir) {
114 Ok(relative) => relative,
115 _ => {
116 log::trace!("skip {}, not in project dir", file.name());
117 continue;
118 }
119 };
120 let path_in_target = target.join(relative);
121 log::trace!("processing {file_path:?} -> {path_in_target:?}");
122
123 if file.is_dir() {
124 log::trace!("creating {path_in_target:?}");
125 file_util::create_dir_all(&path_in_target)?;
126 } else {
127 log::trace!("writing to {}", path_in_target.display());
128 if let Some(parent) = path_in_target.parent() {
129 file_util::create_dir_all(parent)?;
130 }
131 file_util::read_to_file(&mut file, path_in_target)?;
132 }
133 }
134
135 Ok(())
136}
137
138fn find_project_dir<R: Read + Seek>(zip_archive: &mut ZipArchive<R>) -> Result<PathBuf, TmcError> {
140 let mut lowest_ipynb_dir = None::<PathBuf>;
141
142 for i in 0..zip_archive.len() {
143 let file = zip_archive.by_index(i)?;
144 let file_path = Path::new(file.name());
145
146 let mut components = file_path.components().peekable();
150 let mut collected = vec![];
151 while let Some(component) = components.next() {
152 if components.peek().is_none() {
153 break;
157 }
158
159 let comp = component.as_os_str();
160 if comp == "nbproject" || comp == "src" || comp == "test" {
161 let path: PathBuf = collected.into_iter().collect();
162 return Ok(path);
163 }
164 collected.push(comp);
165 }
166
167 let file_name = file_path.file_name().unwrap_or_default();
168 if file.is_dir() && (file_name == "nbproject" || file_name == "src" || file_name == "test")
169 || file.is_file()
170 && (file_name == "pom.xml" || file_name == ".idea" || file_name == "Makefile")
171 {
172 let parent = file_path.parent().unwrap_or_else(|| Path::new(""));
173 log::debug!("found project dir {}", parent.display());
174 return Ok(parent.to_path_buf());
175 }
176
177 if file_path
178 .extension()
179 .map(|ext| ext == "ipynb")
180 .unwrap_or_default()
181 {
182 let parent = file_path.parent().unwrap_or_else(|| Path::new(""));
183 if let Some(lowest_ipynb_dir) = lowest_ipynb_dir.as_mut() {
184 if lowest_ipynb_dir.components().count() > parent.components().count() {
185 *lowest_ipynb_dir = parent.to_path_buf();
186 }
187 } else {
188 lowest_ipynb_dir = Some(parent.to_path_buf());
189 }
190 }
191 }
192 if let Some(lowest_ipynb_dir) = lowest_ipynb_dir {
193 Ok(lowest_ipynb_dir)
194 } else {
195 Err(TmcError::NoProjectDirInArchive)
196 }
197}
198
199fn contains_tmcnosubmit(entry: &DirEntry) -> bool {
200 for entry in WalkDir::new(entry.path())
201 .max_depth(1)
202 .into_iter()
203 .filter_map(|e| e.ok())
204 {
205 if entry.file_name() == ".tmcnosubmit" {
206 log::debug!("contains .tmcnosubmit: {}", entry.path().display());
207 return true;
208 }
209 }
210 false
211}
212
213#[cfg(test)]
214#[allow(clippy::unwrap_used)]
215mod test {
216 use super::*;
217 use std::{
218 collections::HashSet,
219 fs::{self, *},
220 };
221 use tempfile::tempdir;
222 use tmc_langs_framework::{EverythingIsStudentFilePolicy, TmcProjectYml};
223
224 fn init() {
225 use log::*;
226 use simple_logger::*;
227 let _ = SimpleLogger::new().with_level(LevelFilter::Debug).init();
228 }
229
230 fn get_relative_file_paths(dir: &Path) -> HashSet<PathBuf> {
231 WalkDir::new(dir)
232 .into_iter()
233 .map(|e| e.unwrap())
234 .map(|e| e.into_path())
235 .filter(|e| e.is_file())
236 .map(|e| e.strip_prefix(dir).unwrap().to_path_buf())
237 .collect()
238 }
239
240 #[test]
241 fn zips() {
242 init();
243
244 let temp = tempdir().unwrap();
245 let student_file_path = temp
246 .path()
247 .join("exercise-name/src/main/java/AdaLovelace.java");
248 let missing_file_path = temp.path().join("exercise-name/pom.xml");
249 fs::create_dir_all(student_file_path.parent().unwrap()).unwrap();
250 File::create(student_file_path).unwrap();
251 fs::create_dir_all(missing_file_path.parent().unwrap()).unwrap();
252 File::create(missing_file_path).unwrap();
253
254 let path = temp.path().join("exercise-name");
255 let tmcprojectyml = TmcProjectYml::load_or_default(&path).unwrap();
256 let (zipped, _hash) = compress_student_files(
257 &EverythingIsStudentFilePolicy::new(&path).unwrap(),
258 &path,
259 Compression::Zip,
260 true,
261 false,
262 tmcprojectyml.get_submission_size_limit_mb(),
263 )
264 .unwrap();
265 let mut archive = ZipArchive::new(Cursor::new(zipped)).unwrap();
266 assert!(!archive.is_empty());
267 for i in 0..archive.len() {
268 log::debug!("{:?}", archive.by_index(i).unwrap().name());
269 }
270 assert!(
271 archive
272 .by_name("exercise-name/src/main/java/AdaLovelace.java")
273 .is_ok()
274 );
275 assert!(archive.by_name("exercise-name/pom.xml").is_ok());
276 }
277
278 #[test]
279 fn unzips_simple() {
280 init();
281
282 let temp = tempdir().unwrap();
283 let zip = file_util::open_file("tests/data/zip/module-trivial.zip").unwrap();
284 unzip(zip, temp.path()).unwrap();
285
286 let expected = get_relative_file_paths(Path::new("tests/data/zip/module-trivial"));
287 let actual = get_relative_file_paths(temp.path());
288 assert_eq!(expected, actual)
289 }
290
291 #[test]
292 fn unzips_complex() {
293 init();
294
295 let temp = tempdir().unwrap();
296 let zip = file_util::open_file("tests/data/zip/course-module-trivial.zip").unwrap();
297 unzip(zip, temp.path()).unwrap();
298
299 let expected = get_relative_file_paths(Path::new("tests/data/zip/module-trivial"));
300 let actual = get_relative_file_paths(temp.path());
301 assert_eq!(expected, actual)
302 }
303
304 #[test]
305 fn no_src_entry() {
306 init();
307
308 let temp = tempdir().unwrap();
309 let zip = file_util::open_file("tests/data/zip/no-src-entry.zip").unwrap();
310 unzip(zip, temp.path()).unwrap();
311 assert!(temp.path().join("src").exists());
312 }
313
314 #[cfg(windows)]
315 #[test]
316 fn windows_paths_get_converted() {
317 let zipped = file_util::read_file("tests/data/zip/compressed.zip").unwrap();
318 let mut ziparch = ZipArchive::new(Cursor::new(zipped)).unwrap();
319 assert!(ziparch.len() > 0);
320 for i in 0..ziparch.len() {
321 let file = ziparch.by_index(i).unwrap();
322 assert!(file.name().chars().find(|c| c == &'\\').is_none())
323 }
324 }
325}