headless_lms_server/domain/
exercise_repositories.rs

1use anyhow::Context;
2use blake3::Hash;
3use git2::{
4    CertificateCheckStatus, Cred, FetchOptions, RemoteCallbacks, Repository, build::RepoBuilder,
5};
6use headless_lms_models::{exercise_repositories, repository_exercises};
7use headless_lms_utils::{
8    ApplicationConfiguration,
9    file_store::{self, FileStore},
10    folder_checksum,
11};
12use sqlx::{Acquire, PgConnection};
13use std::{
14    collections::HashMap,
15    io::Cursor,
16    path::{Path, PathBuf},
17};
18use uuid::Uuid;
19use walkdir::{DirEntry, WalkDir};
20
21pub struct StoredRepositoryExercise {
22    pub url: String,
23}
24
25/// Processes an exercise repository, creating a repository exercise for each exercise in it.
26/// Each exercise is compressed and uploaded to file storage.
27pub async fn process(
28    conn: &mut PgConnection,
29    repository_id: Uuid,
30    url: &str,
31    public_key: Option<&str>,
32    deploy_key: Option<&str>,
33    file_store: &dyn FileStore,
34    app_conf: &ApplicationConfiguration,
35) -> anyhow::Result<Vec<StoredRepositoryExercise>> {
36    let mut stored_files = vec![];
37    match process_inner(InnerArgs {
38        conn,
39        repository_id,
40        url,
41        public_key,
42        deploy_key,
43        file_store,
44        stored_files: &mut stored_files,
45        app_conf,
46    })
47    .await
48    {
49        Ok(res) => {
50            exercise_repositories::mark_success(conn, repository_id).await?;
51            Ok(res)
52        }
53        Err(err) => {
54            if !stored_files.is_empty() {
55                warn!(
56                    "Failed while creating new exercise repository, cleaning files that were uploaded"
57                );
58                for file in stored_files {
59                    if let Err(err) = file_store.delete(&file).await {
60                        error!("Failed to clean file {}: {err}", file.display());
61                    }
62                }
63            }
64            exercise_repositories::mark_failure(conn, repository_id, &err.to_string()).await?;
65            Err(err)
66        }
67    }
68}
69
70struct InnerArgs<'a> {
71    conn: &'a mut PgConnection,
72    repository_id: Uuid,
73    url: &'a str,
74    public_key: Option<&'a str>,
75    deploy_key: Option<&'a str>,
76    file_store: &'a dyn FileStore,
77    stored_files: &'a mut Vec<PathBuf>,
78    app_conf: &'a ApplicationConfiguration,
79}
80
81// implements the logic for process so that we can conveniently handle all errors in process
82async fn process_inner(
83    InnerArgs {
84        conn,
85        repository_id,
86        url,
87        public_key,
88        deploy_key,
89        file_store,
90        stored_files,
91        app_conf,
92    }: InnerArgs<'_>,
93) -> anyhow::Result<Vec<StoredRepositoryExercise>> {
94    let mut tx = conn.begin().await?;
95
96    // clone repo to temp dir
97    let temp = tempfile::tempdir()?;
98    let mut fetch_opts = FetchOptions::new();
99    let mut remote_cbs = RemoteCallbacks::new();
100    if let Some(deploy_key) = deploy_key {
101        remote_cbs
102            .certificate_check(|_, _| Ok(CertificateCheckStatus::CertificateOk))
103            .credentials(|_, username, credential_type| {
104                if credential_type.is_ssh_memory() {
105                    Cred::ssh_key_from_memory(
106                        username.unwrap_or("git"),
107                        public_key,
108                        deploy_key,
109                        None,
110                    )
111                } else {
112                    Err(git2::Error::from_str(
113                        "The git server does not support the SSH_MEMORY credential type",
114                    ))
115                }
116            });
117    }
118    fetch_opts.remote_callbacks(remote_cbs);
119    info!("Cloning {url} to {:?}", temp.path());
120    RepoBuilder::new()
121        .fetch_options(fetch_opts)
122        .clone(url, temp.path())?;
123    info!("Finished cloning {url} to {:?}", temp.path());
124
125    // create exercises in db and store them in file store
126    let found_exercises = find_exercise_directories(temp.path()).await?;
127    let mut repository_exercises = vec![];
128    // (part, name) => exercise
129    let existing_exercises =
130        repository_exercises::get_for_repository(&mut tx, repository_id).await?;
131
132    // we try both the path and the checksum to find existing exercises
133    // these are the only attributes found in the exercise repositories, so if both
134    // the path and checksum change there's no way to detect that it's supposed to be an updated old exercise
135    // rather than a new one
136    // this way we can accommodate both renaming/relocating exercises and changing them, though not at the same time...
137    let existing_exercises_path_map = existing_exercises
138        .iter()
139        .map(|ex| ((&ex.part, &ex.name), ex))
140        .collect::<HashMap<_, _>>();
141    let existing_exercises_checksum_map = existing_exercises
142        .iter()
143        .map(|ex| (ex.checksum.as_slice(), ex))
144        .collect::<HashMap<_, _>>();
145    for fe in &found_exercises {
146        // check if the exercise is new
147        match (
148            existing_exercises_path_map.get(&(&fe.part, &fe.name)),
149            existing_exercises_checksum_map.get(fe.checksum.as_bytes().as_slice()),
150        ) {
151            (Some(_), Some(_)) => {
152                // both the path and checksum are unchanged, no-op
153            }
154            (Some(existing_exercise_by_path), None) => {
155                // found exercise by path but the checksum has changed, exercise has been updated
156                let path = update_exercise(
157                    &mut tx,
158                    repository_id,
159                    existing_exercise_by_path.id,
160                    fe,
161                    file_store,
162                    app_conf,
163                )
164                .await?;
165                stored_files.push(path.clone());
166                let url = file_store.get_direct_download_url(&path).await?;
167                repository_exercises.push(StoredRepositoryExercise { url });
168                repository_exercises::update_checksum(
169                    &mut tx,
170                    existing_exercise_by_path.id,
171                    fe.checksum.as_bytes(),
172                )
173                .await?;
174                // todo: uploaded files get cleaned up on an error, which means that if the refreshing fails
175                // the exercise data will be missing entirely...
176            }
177            (None, Some(existing_exercise_by_checksum)) => {
178                // found exercise by checksum but the path has changed, update path
179                repository_exercises::update_part_and_name(
180                    &mut tx,
181                    existing_exercise_by_checksum.id,
182                    &fe.part,
183                    &fe.name,
184                )
185                .await?;
186            }
187            (None, None) => {
188                // new exercise
189                let new_exercise_id = uuid::Uuid::new_v4();
190                let path = create_and_upload_exercise(
191                    &mut tx,
192                    repository_id,
193                    new_exercise_id,
194                    fe,
195                    file_store,
196                    app_conf,
197                )
198                .await?;
199                stored_files.push(path.clone());
200                let url = file_store.get_direct_download_url(&path).await?;
201                repository_exercises.push(StoredRepositoryExercise { url });
202            }
203        }
204    }
205
206    tx.commit().await?;
207    Ok(repository_exercises)
208}
209
210/// Updates the given repository using the given url.
211/// Exercises with a known checksum but changed part or name are updated to reflect the new part or name.
212/// Exercises with a known part and name but changed checksum are updated in the file store and the checksum updated.
213/// Errors may leave some exercises updated and others not, since there's no mechanism for rolling back any file store updates.
214/// However, these inconsistencies will be fixed after a successful retry.
215pub async fn update(
216    conn: &mut PgConnection,
217    repository: Uuid,
218    url: &str,
219    file_store: &dyn FileStore,
220    app_conf: &ApplicationConfiguration,
221) -> anyhow::Result<()> {
222    let mut new_stored_files = vec![];
223    match update_inner(
224        conn,
225        repository,
226        url,
227        file_store,
228        &mut new_stored_files,
229        app_conf,
230    )
231    .await
232    {
233        Ok(res) => Ok(res),
234        Err(err) => {
235            if !new_stored_files.is_empty() {
236                debug!(
237                    "Failed while updating exercise repository, cleaning new exercises that were uploaded"
238                );
239                for file in new_stored_files {
240                    if let Err(err) = file_store.delete(&file).await {
241                        error!("Failed to clean file {}: {err}", file.display());
242                    }
243                }
244            }
245            Err(err)
246        }
247    }
248}
249
250async fn update_inner(
251    conn: &mut PgConnection,
252    repository: Uuid,
253    url: &str,
254    file_store: &dyn FileStore,
255    new_stored_files: &mut Vec<PathBuf>,
256    app_conf: &ApplicationConfiguration,
257) -> anyhow::Result<()> {
258    let mut tx = conn.begin().await?;
259
260    let temp = tempfile::tempdir()?;
261    Repository::clone(url, &temp)?;
262
263    let repository_exercises = find_exercise_directories(temp.path()).await?;
264    let current_exercises = repository_exercises::get_for_repository(&mut tx, repository).await?;
265
266    let mut by_name = HashMap::new();
267    let mut by_checksum = HashMap::new();
268    for ex in &current_exercises {
269        by_name.insert((&ex.part, &ex.name), ex);
270        by_checksum.insert(ex.checksum.as_slice(), ex);
271    }
272    for ex in repository_exercises {
273        if let Some(&current) = by_name.get(&(&ex.part, &ex.name)) {
274            // found known exercise by part and name
275            if current.checksum != ex.checksum.as_bytes() {
276                // checksum changed, update files and checksum
277                create_and_upload_exercise(
278                    &mut tx, repository, current.id, &ex, file_store, app_conf,
279                )
280                .await?;
281                repository_exercises::update_checksum(&mut tx, current.id, ex.checksum.as_bytes())
282                    .await?;
283            }
284        } else if let Some(&current) = by_checksum.get(ex.checksum.as_bytes().as_slice()) {
285            // found known exercise by checksum
286            if current.part != ex.part || current.name != ex.name {
287                // part and/or name changed
288                repository_exercises::update_part_and_name(&mut tx, current.id, &ex.part, &ex.name)
289                    .await?;
290            }
291        } else {
292            // unknown part/name and checksum, assume new exercise
293            let path = create_and_upload_exercise(
294                &mut tx,
295                repository,
296                Uuid::new_v4(),
297                &ex,
298                file_store,
299                app_conf,
300            )
301            .await?;
302            new_stored_files.push(path);
303        }
304    }
305
306    tx.commit().await?;
307    Ok(())
308}
309
310/// Marks the exercises and repository as deleted and removes the associated files from the file store.
311/// Only returns the last error if there are multiple errors when trying to remove the files.
312pub async fn delete(
313    conn: &mut PgConnection,
314    repository_id: Uuid,
315    file_store: &dyn FileStore,
316) -> anyhow::Result<()> {
317    let mut tx = conn.begin().await?;
318
319    let mut latest_error = None;
320    let exercises = repository_exercises::delete_for_repository(&mut tx, repository_id).await?;
321    exercise_repositories::delete(&mut tx, repository_id).await?;
322    for exercise in exercises {
323        let path = file_store::repository_exercise_path(repository_id, exercise);
324        if let Err(err) = file_store.delete(&path).await {
325            error!(
326                "Failed to delete file while deleting repository {}: {err}",
327                path.display()
328            );
329            latest_error = Some(err);
330        }
331    }
332
333    match latest_error {
334        Some(latest_error) => Err(latest_error.into()),
335        _ => {
336            tx.commit().await?;
337            Ok(())
338        }
339    }
340}
341
342async fn create_and_upload_exercise(
343    conn: &mut PgConnection,
344    repository: Uuid,
345    exercise_id: Uuid,
346    exercise: &FoundExercise,
347    file_store: &dyn FileStore,
348    app_conf: &ApplicationConfiguration,
349) -> anyhow::Result<PathBuf> {
350    // archive and compress
351    let cursor = Cursor::new(vec![]);
352    let mut tar = tar::Builder::new(cursor);
353    tar.append_dir_all(".", &exercise.path)?;
354    let mut tar = tar.into_inner()?;
355    // rewind cursor back to the beginning
356    tar.set_position(0);
357    let tar_zstd = zstd::encode_all(tar, 0)?;
358
359    // upload
360    let path = file_store::repository_exercise_path(repository, exercise_id);
361    file_store
362        .upload(&path, tar_zstd, "application/zstd")
363        .await?;
364    let url = file_store.get_download_url(&path, app_conf);
365
366    // create
367    repository_exercises::new(
368        conn,
369        exercise_id,
370        repository,
371        &exercise.part,
372        &exercise.name,
373        exercise.checksum.as_bytes(),
374        &url,
375    )
376    .await?;
377    Ok(path)
378}
379
380async fn update_exercise(
381    conn: &mut PgConnection,
382    repository: Uuid,
383    exercise_id: Uuid,
384    exercise: &FoundExercise,
385    file_store: &dyn FileStore,
386    app_conf: &ApplicationConfiguration,
387) -> anyhow::Result<PathBuf> {
388    // archive and compress
389    let cursor = Cursor::new(vec![]);
390    let mut tar = tar::Builder::new(cursor);
391    tar.append_dir_all(".", &exercise.path)?;
392    let mut tar = tar.into_inner()?;
393    // rewind cursor back to the beginning
394    tar.set_position(0);
395    let tar_zstd = zstd::encode_all(tar, 0)?;
396
397    // upload
398    let path = file_store::repository_exercise_path(repository, exercise_id);
399    file_store
400        .upload(&path, tar_zstd, "application/zstd")
401        .await?;
402    let url = file_store.get_download_url(&path, app_conf);
403
404    // create
405    repository_exercises::new(
406        conn,
407        exercise_id,
408        repository,
409        &exercise.part,
410        &exercise.name,
411        exercise.checksum.as_bytes(),
412        &url,
413    )
414    .await?;
415    Ok(path)
416}
417
418#[derive(Debug)]
419struct FoundExercise {
420    part: String,
421    name: String,
422    checksum: Hash,
423    path: PathBuf,
424}
425
426async fn find_exercise_directories(clone_path: &Path) -> anyhow::Result<Vec<FoundExercise>> {
427    info!("finding exercise directories in {}", clone_path.display());
428
429    let mut exercises = vec![];
430    // exercises in repositories are in subdirs like
431    // part01/01_exercise
432    // part01/02_exercise
433    // part02/01_exercise
434    for entry in WalkDir::new(clone_path)
435        .min_depth(2)
436        .max_depth(2)
437        .into_iter()
438        .filter_entry(|e| {
439            e.file_type().is_dir()
440                && e.file_name() != "private"
441                && !is_hidden_dir(e)
442                && !contains_tmcignore(e)
443                && !is_in_git_dir(e.path())
444        })
445    {
446        let entry = entry?;
447        let checksum = folder_checksum::hash_folder(entry.path()).await?;
448
449        let path = entry.into_path().canonicalize()?;
450        let part = path
451            .parent()
452            .expect("Path should be in a subdirectory")
453            .file_name()
454            .expect("The parent file name cannot be missing")
455            .to_str()
456            .context("Invalid directory name in repository")?
457            .to_string();
458        let name = path
459            .file_name()
460            .expect("Path should be a file")
461            .to_str()
462            .context("Invalid directory name in repository")?
463            .to_string();
464        exercises.push(FoundExercise {
465            part,
466            name,
467            checksum,
468            path,
469        });
470    }
471    Ok(exercises)
472}
473
474// Filter for hidden directories (directories with names starting with '.')
475fn is_hidden_dir(entry: &DirEntry) -> bool {
476    let skip = entry.metadata().map(|e| e.is_dir()).unwrap_or_default()
477        && entry
478            .file_name()
479            .to_str()
480            .map(|s| s.starts_with('.'))
481            .unwrap_or_default();
482    if skip {
483        debug!("is hidden dir: {}", entry.path().display());
484    }
485    skip
486}
487
488// Filter for .git directory
489fn is_in_git_dir(path: &Path) -> bool {
490    let skip = path.parent().map(|p| p.ends_with(".git")).unwrap_or(false);
491    if skip {
492        debug!("is in git dir: {}", path.display());
493    }
494    skip
495}
496
497fn contains_tmcignore(entry: &DirEntry) -> bool {
498    for entry in WalkDir::new(entry.path())
499        .max_depth(1)
500        .into_iter()
501        .filter_map(|e| e.ok())
502    {
503        let is_file = entry.metadata().map(|e| e.is_file()).unwrap_or_default();
504        if is_file && entry.file_name() == ".tmcignore" {
505            debug!("contains .tmcignore: {}", entry.path().display());
506            return true;
507        }
508    }
509    false
510}
511
512#[cfg(test)]
513mod test {
514    use super::*;
515    use std::{fs::Permissions, os::unix::prelude::PermissionsExt, str::FromStr};
516
517    #[tokio::test]
518    async fn finds_exercise_dirs() {
519        let repo = tempfile::tempdir().unwrap();
520
521        std::fs::create_dir_all(repo.path().join("part01/01_exercise")).unwrap();
522        std::fs::write(repo.path().join("part01/01_exercise/file"), "1234").unwrap();
523
524        std::fs::create_dir_all(repo.path().join("part01/02_exercise")).unwrap();
525        std::fs::write(repo.path().join("part01/02_exercise/file"), "1234").unwrap();
526
527        std::fs::create_dir_all(repo.path().join("part02/01_exercise")).unwrap();
528        std::fs::write(repo.path().join("part02/01_exercise/file"), "1234").unwrap();
529
530        // Make sure permissions are the same on all systems. Some systems have different default permissions in the temp folder.
531        let file_paths = vec![
532            repo.path().join("part01/01_exercise/file"),
533            repo.path().join("part01/02_exercise/file"),
534            repo.path().join("part02/01_exercise/file"),
535        ];
536        let folder_paths = vec![
537            repo.path().join("part01/01_exercise"),
538            repo.path().join("part01/02_exercise"),
539            repo.path().join("part02/01_exercise"),
540            repo.path().to_path_buf(),
541        ];
542        for path in file_paths {
543            std::fs::set_permissions(path, Permissions::from_mode(0o644)).unwrap();
544        }
545        for path in folder_paths {
546            std::fs::set_permissions(path, Permissions::from_mode(0o755)).unwrap();
547        }
548
549        let mut paths = find_exercise_directories(repo.path()).await.unwrap();
550        paths.sort_by(|a, b| a.path.cmp(&b.path));
551        assert_eq!(paths.len(), 3);
552
553        assert_eq!(&paths[0].path, &repo.path().join("part01/01_exercise"));
554        assert_eq!(&paths[0].part, "part01");
555        assert_eq!(&paths[0].name, "01_exercise");
556        assert_eq!(
557            paths[0].checksum,
558            Hash::from_str("3a01c5d9a407deec294c4ac561cdeea1a7507464193e06387083853e3ca71c3a")
559                .unwrap()
560        );
561
562        assert_eq!(&paths[1].name, "02_exercise");
563        assert_eq!(&paths[2].name, "01_exercise");
564    }
565
566    #[test]
567    fn filters_git() {
568        assert!(is_in_git_dir(Path::new("something/.git/something")));
569        assert!(!is_in_git_dir(Path::new(
570            "something/.git/something/something"
571        )));
572    }
573}