tmc_langs/
submission_processing.rs

1//! Functions for processing submissions.
2
3use crate::error::LangsError;
4use once_cell::sync::Lazy;
5use regex::Regex;
6use serde_json::Value;
7use std::{
8    fs::File,
9    io::{BufRead, BufReader, BufWriter, Write},
10    path::Path,
11};
12use tmc_langs_framework::{MetaString, MetaSyntaxParser};
13use tmc_langs_util::{FileError, deserialize, file_util};
14use walkdir::{DirEntry, WalkDir};
15
16#[allow(clippy::unwrap_used)]
17static FILES_TO_SKIP_ALWAYS: Lazy<Regex> =
18    Lazy::new(|| Regex::new(r"\.tmcrc|^metadata\.yml$").unwrap());
19
20/// Note: used by tmc-server.
21/// Walks through each given path, processing files and copying them into the destination.
22///
23/// Skips hidden directories, directories that contain a `.tmcignore` file in their root, as well as
24/// files matching patterns defined in ```FILES_TO_SKIP_ALWAYS``` and directories and files named ```private```.
25///
26/// Binary files are copied without extra processing, while text files are parsed to remove solution tags and stubs.
27pub fn prepare_solution(exercise_path: &Path, dest_root: &Path) -> Result<(), LangsError> {
28    log::debug!(
29        "preparing solution from {} to {}",
30        exercise_path.display(),
31        dest_root.display()
32    );
33
34    let line_filter = |meta: &MetaString| {
35        !matches!(meta, MetaString::Stub(_)) && !matches!(meta, MetaString::Hidden(_))
36        // hide stub and hidden lines
37    };
38    let file_filter = |metas: &[MetaString]| {
39        !metas
40            .iter()
41            .any(|ms| matches!(ms, MetaString::HiddenFileMarker)) // exclude hidden files
42    };
43    process_files(exercise_path, dest_root, line_filter, file_filter)?;
44    Ok(())
45}
46
47/// Walks through each given path, processing files and copying them into the destination.
48///
49/// Skips hidden directories, directories that contain a ```.tmcignore``` file in their root, as well as
50/// files matching patterns defined in ```FILES_TO_SKIP_ALWAYS``` and directories and files named ```private```.
51///
52/// Binary files are copied without extra processing, while text files are parsed to remove stub tags and solutions.
53///
54/// Additionally, copies any shared files with the corresponding language plugins.
55pub fn prepare_stub(exercise_path: &Path, dest_root: &Path) -> Result<(), LangsError> {
56    log::debug!(
57        "preparing stub from {} to {}",
58        exercise_path.display(),
59        dest_root.display()
60    );
61
62    let line_filter = |meta: &MetaString| {
63        !matches!(meta, MetaString::Solution(_)) && !matches!(meta, MetaString::Hidden(_))
64        // exclude solution and hidden lines
65    };
66    let file_filter = |metas: &[MetaString]| {
67        !metas.iter().any(|ms| {
68            matches!(ms, MetaString::SolutionFileMarker) // exclude solution files
69                || matches!(ms, MetaString::HiddenFileMarker) // exclude hidden files
70        })
71    };
72    process_files(exercise_path, dest_root, line_filter, file_filter)?;
73    Ok(())
74}
75
76// Processes all files in path, copying files in directories that are not skipped.
77fn process_files(
78    source: &Path,
79    dest_root: &Path,
80    mut line_filter: impl Fn(&MetaString) -> bool,
81    mut file_filter: impl Fn(&[MetaString]) -> bool,
82) -> Result<(), LangsError> {
83    log::info!("Project: {source:?}");
84
85    let walker = WalkDir::new(source).min_depth(1).into_iter();
86    // silently skips over errors, for example when there's a directory we don't have permissions for
87    for entry in walker
88        .filter_entry(|e| !is_hidden_dir(e) && !on_skip_list(e) && !contains_tmcignore(e))
89        .filter_map(|e| e.ok())
90    {
91        process_file(entry, source, dest_root, &mut line_filter, &mut file_filter)?;
92    }
93    Ok(())
94}
95
96fn process_file(
97    entry: DirEntry,
98    source: &Path,
99    dest_root: &Path,
100    line_filter: &mut impl Fn(&MetaString) -> bool,
101    file_filter: &mut impl Fn(&[MetaString]) -> bool,
102) -> Result<(), LangsError> {
103    if entry.path().is_dir() {
104        return Ok(());
105    }
106
107    let relative_path = entry
108        .path()
109        .strip_prefix(source)
110        .unwrap_or_else(|_| Path::new(""));
111    let dest_path = dest_root.join(relative_path);
112    if let Some(extension) = entry.path().extension().and_then(|o| o.to_str()) {
113        // todo: stop checking extension twice here and in meta_syntax
114        // NOTE: if you change these extensions make sure to change them in meta_syntax.rs as well
115        match extension {
116            "java" | "c" | "cpp" | "h" | "hpp" | "js" | "css" | "rs" | "qml" | "cs" | "xml"
117            | "http" | "html" | "qrc" | "properties" | "py" | "R" | "pro" => {
118                // process line by line
119                let source_file = file_util::open_file(entry.path())?;
120                let iter = LossyFileIterator {
121                    file: BufReader::new(source_file),
122                };
123                if let Some(lines) = process_lines(iter, line_filter, file_filter, extension)
124                    .map_err(|e| FileError::FileRead(entry.path().to_path_buf(), e))?
125                {
126                    // write all lines to target file
127                    if let Some(parent) = dest_path.parent() {
128                        file_util::create_dir_all(parent)?;
129                    }
130                    let mut file = BufWriter::new(file_util::create_file(&dest_path)?);
131                    for line in lines {
132                        file.write_all(line.as_bytes())
133                            .map_err(|e| FileError::FileWrite(dest_path.to_path_buf(), e))?;
134                    }
135                }
136            }
137            "ipynb" => {
138                // process each cell in the notebook
139                let file = file_util::open_file(entry.path())?;
140                let mut json: Value = deserialize::json_from_reader(file)
141                    .map_err(|e| LangsError::DeserializeJson(entry.path().to_path_buf(), e))?;
142                let cells = json
143                    .get_mut("cells")
144                    .and_then(|cs| cs.as_array_mut())
145                    .ok_or(LangsError::InvalidNotebook(
146                        "Invalid or missing value for 'cells'",
147                    ))?;
148
149                for cell in cells {
150                    let is_cell_type_code = cell
151                        .get("cell_type")
152                        .and_then(|c| c.as_str())
153                        .map(|c| c == "code")
154                        .unwrap_or_default();
155
156                    if is_cell_type_code {
157                        // read the source for each code cell
158                        let cell_source = cell
159                            .get_mut("source")
160                            .and_then(|s| s.as_array_mut())
161                            .ok_or(LangsError::InvalidNotebook(
162                                "Invalid or missing value for 'source'",
163                            ))?;
164                        let source = cell_source.iter().map(|v| {
165                            v.as_str()
166                                .map(String::from)
167                                .ok_or(LangsError::InvalidNotebook("Invalid value in 'source'"))
168                        });
169
170                        let lines: Option<Vec<Value>> =
171                            process_lines(source, line_filter, file_filter, extension)?
172                                .map(|i| i.map(Value::String).collect());
173                        if let Some(lines) = lines {
174                            // replace cell source with filtered output
175                            *cell_source = lines;
176                        } else {
177                            // file should be skipped
178                            return Ok(());
179                        }
180                    }
181                }
182                // writes the JSON with filtered sources to the target path
183                file_util::write_to_file(serde_json::to_vec_pretty(&json)?, &dest_path)?;
184                log::trace!(
185                    "filtered file {} to {}",
186                    entry.path().display(),
187                    dest_path.display()
188                );
189            }
190            _ => {
191                // copy other files as is
192                file_util::copy(entry.path(), dest_path)?;
193            }
194        }
195    }
196    Ok(())
197}
198
199// Filter for hidden directories (directories with names starting with '.')
200pub fn is_hidden_dir(entry: &DirEntry) -> bool {
201    let skip = entry.metadata().map(|e| e.is_dir()).unwrap_or_default()
202        && entry
203            .file_name()
204            .to_str()
205            .map(|s| s.starts_with('.'))
206            .unwrap_or_default();
207    if skip {
208        log::debug!("is hidden dir: {:?}", entry.path());
209    }
210    skip
211}
212
213// Filter for skipping directories on `FILES_TO_SKIP_ALWAYS` or named 'private', and files in a 'test' directory that contain 'Hidden' in their name.
214fn on_skip_list(entry: &DirEntry) -> bool {
215    // check if entry's filename matchees the skip list or is 'private'
216    let entry_file_name = entry.file_name().to_str();
217    let on_skip_list = entry_file_name
218        .map(|s| FILES_TO_SKIP_ALWAYS.is_match(s) || s == "private")
219        .unwrap_or_default();
220
221    // check if the current entry is a file that contains "Hidden" in its name in a directory that contains "test" in its name
222    let hidden_in_test = if entry.path().is_file() {
223        let in_test = entry
224            .path()
225            .parent()
226            .and_then(|p| p.file_name())
227            .and_then(|f| f.to_str())
228            .map(|f| f.contains("test"))
229            .unwrap_or_default();
230        let contains_hidden = entry_file_name
231            .map(|n| n.contains("Hidden"))
232            .unwrap_or_default();
233        in_test && contains_hidden
234    } else {
235        false
236    };
237
238    let skip = on_skip_list || hidden_in_test;
239    if skip {
240        log::debug!("on skip list: {:?}", entry.path());
241    }
242    skip
243}
244
245// Filter for skipping directories that contain a '.tmcignore' file
246pub fn contains_tmcignore(entry: &DirEntry) -> bool {
247    for entry in WalkDir::new(entry.path())
248        .max_depth(1)
249        .into_iter()
250        .filter_map(|e| e.ok())
251    {
252        let is_file = entry.metadata().map(|e| e.is_file()).unwrap_or_default();
253        if is_file && entry.file_name() == ".tmcignore" {
254            log::debug!("contains .tmcignore: {:?}", entry.path());
255            return true;
256        }
257    }
258    false
259}
260
261/// Serves the same functionality as BufRead::lines, but uses lossy string conversion.
262struct LossyFileIterator {
263    file: BufReader<File>,
264}
265
266impl Iterator for LossyFileIterator {
267    type Item = Result<String, std::io::Error>;
268
269    fn next(&mut self) -> Option<Self::Item> {
270        let mut buf = vec![];
271        match self.file.read_until(b'\n', &mut buf) {
272            Ok(0) => None,
273            Ok(_) => Some(Ok(String::from_utf8_lossy(&buf).into_owned())),
274            Err(e) => Some(Err(e)),
275        }
276    }
277}
278
279/// Processes the lines from the given iterator according to the filters and extension given.
280/// Returns None if the file should be skipped.
281fn process_lines<'a, 'b, I, E>(
282    line_iterator: I,
283    line_filter: &'b mut impl Fn(&MetaString) -> bool,
284    file_filter: &'b mut impl Fn(&[MetaString]) -> bool,
285    extension: &str,
286) -> Result<Option<impl Iterator<Item = String> + 'a>, E>
287where
288    I: Iterator<Item = Result<String, E>>,
289    'b: 'a,
290{
291    let parser = MetaSyntaxParser::new(line_iterator, extension);
292    let parse_result: Result<Vec<_>, _> = parser.collect();
293    let parsed = parse_result?;
294
295    // files that don't pass the filter are skipped
296    if !file_filter(&parsed) {
297        return Ok(None);
298    }
299
300    // filter into iterator of strings
301    let iter = parsed.into_iter().filter(line_filter).filter_map(|ms| {
302        match ms {
303            MetaString::Solution(s) | MetaString::String(s) | MetaString::Stub(s) => Some(s),
304            MetaString::SolutionFileMarker | MetaString::HiddenFileMarker => None, // write nothing for file markers
305            MetaString::Hidden(_) => None, // write nothing for hidden text
306        }
307    });
308    Ok(Some(iter))
309}
310
311#[cfg(test)]
312#[allow(clippy::unwrap_used)]
313mod test {
314    use super::*;
315    use std::{fs::File, io::Write, path::PathBuf};
316    use tmc_langs_framework::TmcProjectYml;
317
318    fn init() {
319        use log::*;
320        use simple_logger::*;
321        let _ = SimpleLogger::new().with_level(LevelFilter::Trace).init();
322    }
323
324    fn file_to(
325        target_dir: impl AsRef<std::path::Path>,
326        target_relative: impl AsRef<std::path::Path>,
327        contents: impl AsRef<[u8]>,
328    ) -> PathBuf {
329        let target = target_dir.as_ref().join(target_relative);
330        if let Some(parent) = target.parent() {
331            std::fs::create_dir_all(parent).unwrap();
332        }
333        std::fs::write(&target, contents.as_ref()).unwrap();
334        target
335    }
336
337    #[test]
338    fn prepare_solutions_preserves_structure() {
339        init();
340
341        let temp_source = tempfile::tempdir().unwrap();
342        file_to(&temp_source, "inner/binary.bin", "");
343        file_to(&temp_source, "File.java", "");
344
345        let temp_target = tempfile::tempdir().unwrap();
346
347        prepare_solution(temp_source.path(), temp_target.path()).unwrap();
348
349        assert!(temp_target.path().join("inner/binary.bin").exists());
350        assert!(temp_target.path().join("File.java").exists());
351    }
352
353    #[test]
354    fn prepare_solutions_filters_text_files() {
355        init();
356
357        let temp_source = tempfile::tempdir().unwrap();
358        file_to(
359            &temp_source,
360            "Test.java",
361            r#"public class JavaTestCase {
362    // BEGIN SOLUTION
363    public int foo() {
364        return 3;
365    }
366    // END SOLUTION
367
368    public void bar() {
369        // BEGIN SOLUTION
370        System.out.println("hello");
371        // END SOLUTION
372    }
373
374    public int xoo() {
375        // BEGIN SOLUTION
376        return 3;
377        // END SOLUTION
378        // STUB: return 0;
379    }
380}
381"#,
382        );
383
384        let temp_target = tempfile::tempdir().unwrap();
385
386        prepare_solution(temp_source.path(), temp_target.path()).unwrap();
387
388        let s = file_util::read_file_to_string(temp_target.path().join("Test.java")).unwrap();
389        let expected = r#"public class JavaTestCase {
390    public int foo() {
391        return 3;
392    }
393
394    public void bar() {
395        System.out.println("hello");
396    }
397
398    public int xoo() {
399        return 3;
400    }
401}
402"#;
403
404        assert_eq!(s, expected, "expected:\n{expected:#}\nfound:\n{s:#}");
405    }
406
407    #[test]
408    fn prepare_solutions_does_not_filter_binary_files() {
409        init();
410
411        let temp_source = tempfile::tempdir().unwrap();
412
413        let contents = r#"public class JavaTestCase {
414    // BEGIN SOLUTION
415    public int foo() {
416        return 3;
417    }
418    // END SOLUTION
419
420    public void bar() {
421        // BEGIN SOLUTION
422        System.out.println("hello");
423        // END SOLUTION
424    }
425
426    public int xoo() {
427        // BEGIN SOLUTION
428        return 3;
429        // END SOLUTION
430        // STUB: return 0;
431    }
432}
433"#;
434
435        file_to(&temp_source, "Test.bin", contents);
436
437        let temp_target = tempfile::tempdir().unwrap();
438
439        prepare_stub(temp_source.path(), temp_target.path()).unwrap();
440
441        let s = file_util::read_file_to_string(temp_target.path().join("Test.bin")).unwrap();
442
443        assert_eq!(s, contents, "expected:\n{contents:#}\nfound:\n{s:#}");
444    }
445
446    #[test]
447    fn prepare_solutions_does_not_filter_solution_files() {
448        init();
449
450        let temp_source = tempfile::tempdir().unwrap();
451        file_to(
452            &temp_source,
453            "Solution.java",
454            r#"// SOLUTION FILE
455class SomeClass {}
456"#,
457        );
458        file_to(
459            &temp_source,
460            "NonSolution.java",
461            r#"
462class SomeClass {}
463"#,
464        );
465
466        let temp_target = tempfile::tempdir().unwrap();
467
468        prepare_solution(temp_source.path(), temp_target.path()).unwrap();
469
470        assert!(dbg!(temp_source.path().join("Solution.java")).exists());
471        assert!(dbg!(temp_source.path().join("NonSolution.java")).exists());
472    }
473
474    #[test]
475    fn prepares_stubs() {
476        init();
477
478        let temp_source = tempfile::tempdir().unwrap();
479        file_to(
480            &temp_source,
481            "Test.java",
482            r#"public class JavaTestCase {
483    // BEGIN SOLUTION
484    public int foo() {
485        return 3;
486    }
487    // END SOLUTION
488
489    public void bar() {
490        // BEGIN SOLUTION
491        System.out.println("hello");
492        // END SOLUTION
493    }
494
495    public int xoo() {
496        // BEGIN SOLUTION
497        return 3;
498        // END SOLUTION
499        // STUB: return 0;
500    }
501}
502"#,
503        );
504
505        let temp_target = tempfile::tempdir().unwrap();
506
507        prepare_stub(temp_source.path(), temp_target.path()).unwrap();
508
509        let s = file_util::read_file_to_string(temp_target.path().join("Test.java")).unwrap();
510        let expected = r#"public class JavaTestCase {
511
512    public void bar() {
513    }
514
515    public int xoo() {
516        return 0;
517    }
518}
519"#
520        .to_string();
521
522        assert_eq!(s, expected, "expected:\n{expected:#}\nfound:\n{s:#}");
523    }
524
525    #[test]
526    fn prepare_stubs_filters_solution_files() {
527        init();
528
529        let temp_source = tempfile::tempdir().unwrap();
530        file_to(&temp_source, "NonSolution.java", "something something");
531        file_to(&temp_source, "SolutionFile.java", "// SOLUTION FILE");
532
533        let temp_target = tempfile::tempdir().unwrap();
534
535        prepare_stub(temp_source.path(), temp_target.path()).unwrap();
536
537        assert!(temp_target.path().join("NonSolution.java").exists());
538        assert!(!temp_target.path().join("SolutionFile.java").exists());
539    }
540
541    #[test]
542    fn tmc_project_yml_parses() {
543        let temp = tempfile::tempdir().unwrap();
544        let mut path = temp.path().to_owned();
545        path.push(".tmcproject.yml");
546        let mut file = File::create(&path).unwrap();
547        file.write_all(
548            r#"
549extra_student_files:
550  - test/StudentTest.java
551  - test/OtherTest.java
552"#
553            .as_bytes(),
554        )
555        .unwrap();
556        let conf = TmcProjectYml::load_or_default(temp.path()).unwrap();
557        assert!(conf.extra_student_files[0] == PathBuf::from("test/StudentTest.java"));
558        assert!(conf.extra_student_files[1] == PathBuf::from("test/OtherTest.java"));
559    }
560
561    #[test]
562    fn hides_test_hidden_files_in_test() {
563        init();
564
565        let temp_source = tempfile::tempdir().unwrap();
566        file_to(&temp_source, "NotHidden", "");
567        file_to(&temp_source, "test/ActuallyHidden", "");
568
569        let temp_target = tempfile::tempdir().unwrap();
570
571        prepare_solution(temp_source.path(), temp_target.path()).unwrap();
572
573        assert!(dbg!(temp_source.path().join("NotHidden")).exists());
574        assert!(!dbg!(temp_source.path().join("ActuallyHidden")).exists());
575    }
576
577    #[test]
578    fn solution_filters_hidden_files() {
579        init();
580
581        let temp_source = tempfile::tempdir().unwrap();
582        file_to(
583            &temp_source,
584            "H.java",
585            r"// HIDDEN FILE
586etc etc",
587        );
588        file_to(&temp_source, "NonH.java", "etc etc");
589
590        let temp_target = tempfile::tempdir().unwrap();
591
592        prepare_solution(temp_source.path(), temp_target.path()).unwrap();
593
594        assert!(!temp_target.path().join("H.java").exists());
595        assert!(temp_target.path().join("NonH.java").exists());
596    }
597
598    #[test]
599    fn stub_filters_hidden_files() {
600        init();
601
602        let temp_source = tempfile::tempdir().unwrap();
603        file_to(
604            &temp_source,
605            "H.java",
606            r"// HIDDEN FILE
607etc etc",
608        );
609        file_to(&temp_source, "NonH.java", "etc etc");
610
611        let temp_target = tempfile::tempdir().unwrap();
612
613        prepare_stub(temp_source.path(), temp_target.path()).unwrap();
614
615        assert!(!temp_target.path().join("H.java").exists());
616        assert!(temp_target.path().join("NonH.java").exists());
617    }
618
619    #[test]
620    fn solution_filters_hidden_lines() {
621        init();
622
623        let temp_source = tempfile::tempdir().unwrap();
624        file_to(
625            &temp_source,
626            "ContainsHidden.java",
627            r"etc etc
628// BEGIN HIDDEN
629hidden!
630// END HIDDEN
631etc etc",
632        );
633
634        let temp_target = tempfile::tempdir().unwrap();
635
636        prepare_solution(temp_source.path(), temp_target.path()).unwrap();
637
638        let s =
639            file_util::read_file_to_string(temp_target.path().join("ContainsHidden.java")).unwrap();
640
641        assert_eq!(
642            s,
643            r"etc etc
644etc etc"
645        );
646    }
647
648    #[test]
649    fn stub_filters_hidden_lines() {
650        init();
651
652        let temp_source = tempfile::tempdir().unwrap();
653        file_to(
654            &temp_source,
655            "ContainsHidden.java",
656            r"etc etc
657// BEGIN HIDDEN
658hidden!
659// END HIDDEN
660etc etc",
661        );
662
663        let temp_target = tempfile::tempdir().unwrap();
664
665        prepare_stub(temp_source.path(), temp_target.path()).unwrap();
666
667        let s =
668            file_util::read_file_to_string(temp_target.path().join("ContainsHidden.java")).unwrap();
669
670        assert_eq!(
671            s,
672            r"etc etc
673etc etc"
674        );
675    }
676
677    #[test]
678    fn filters_notebooks() {
679        init();
680
681        let temp_source = tempfile::tempdir().unwrap();
682        file_to(
683            &temp_source,
684            "hidden.ipynb",
685            serde_json::json!({
686                "cells": [
687                    {
688                        "cell_type": "code",
689                        "source": [
690                            "code"
691                        ]
692                    },
693                    {
694                        "cell_type": "code",
695                        "source": [
696                            "# HIDDEN FILE"
697                        ]
698                    },
699                ]
700            })
701            .to_string(),
702        );
703        file_to(
704            &temp_source,
705            "notebook.ipynb",
706            serde_json::json!({
707                "cells": [
708                    {
709                        "cell_type": "other",
710                        "source": [
711                            "# BEGIN SOLUTION",
712                            "solution code",
713                            "more code",
714                            "# END SOLUTION",
715                        ]
716                    },
717                    {
718                        "cell_type": "code",
719                        "source": [
720                            "code"
721                        ]
722                    },
723                    {
724                        "cell_type": "code",
725                        "source": [
726                            "code",
727                            "# BEGIN SOLUTION",
728                            "solution code",
729                            "more code",
730                            "# END SOLUTION",
731                            "non-solution code",
732                        ]
733                    },
734                ],
735                "some other key": "some other value",
736            })
737            .to_string(),
738        );
739
740        let temp_target = tempfile::tempdir().unwrap();
741
742        prepare_stub(temp_source.path(), temp_target.path()).unwrap();
743
744        assert!(!temp_target.path().join("hidden.ipynb").exists());
745
746        let val: serde_json::Value = deserialize::json_from_reader(
747            file_util::open_file(temp_target.path().join("notebook.ipynb")).unwrap(),
748        )
749        .unwrap();
750        assert_eq!(
751            val,
752            serde_json::json!({
753                "cells": [
754                    {
755                        "cell_type": "other",
756                        "source": [
757                            "# BEGIN SOLUTION",
758                            "solution code",
759                            "more code",
760                            "# END SOLUTION",
761                        ]
762                    },
763                    {
764                        "cell_type": "code",
765                        "source": [
766                            "code"
767                        ]
768                    },
769                    {
770                        "cell_type": "code",
771                        "source": [
772                            "code",
773                            "non-solution code",
774                        ]
775                    },
776                ],
777                "some other key": "some other value",
778            })
779        );
780    }
781}