Skip to main content

headless_lms_server/domain/
exercise_repositories.rs

1use anyhow::Context;
2use blake3::Hash;
3use git2::{
4    CertificateCheckStatus, Cred, FetchOptions, RemoteCallbacks, Repository, build::RepoBuilder,
5};
6use headless_lms_base::config::ApplicationConfiguration;
7use headless_lms_models::{exercise_repositories, repository_exercises};
8use headless_lms_utils::{
9    file_store::{self, FileStore},
10    folder_checksum,
11};
12use secrecy::{ExposeSecret, SecretString};
13use sqlx::{Acquire, PgConnection};
14use std::{
15    collections::HashMap,
16    io::Cursor,
17    path::{Path, PathBuf},
18};
19use uuid::Uuid;
20use walkdir::{DirEntry, WalkDir};
21
22pub struct StoredRepositoryExercise {
23    pub url: String,
24}
25
26/// Processes an exercise repository, creating a repository exercise for each exercise in it.
27/// Each exercise is compressed and uploaded to file storage.
28pub async fn process(
29    conn: &mut PgConnection,
30    repository_id: Uuid,
31    url: &str,
32    public_key: Option<&str>,
33    deploy_key: Option<&SecretString>,
34    file_store: &dyn FileStore,
35    app_conf: &ApplicationConfiguration,
36) -> anyhow::Result<Vec<StoredRepositoryExercise>> {
37    let mut stored_files = vec![];
38    match process_inner(InnerArgs {
39        conn,
40        repository_id,
41        url,
42        public_key,
43        deploy_key,
44        file_store,
45        stored_files: &mut stored_files,
46        app_conf,
47    })
48    .await
49    {
50        Ok(res) => {
51            exercise_repositories::mark_success(conn, repository_id).await?;
52            Ok(res)
53        }
54        Err(err) => {
55            if !stored_files.is_empty() {
56                warn!(
57                    "Failed while creating new exercise repository, cleaning files that were uploaded"
58                );
59                for file in stored_files {
60                    if let Err(err) = file_store.delete(&file).await {
61                        error!("Failed to clean file {}: {err}", file.display());
62                    }
63                }
64            }
65            exercise_repositories::mark_failure(conn, repository_id, &err.to_string()).await?;
66            Err(err)
67        }
68    }
69}
70
71struct InnerArgs<'a> {
72    conn: &'a mut PgConnection,
73    repository_id: Uuid,
74    url: &'a str,
75    public_key: Option<&'a str>,
76    deploy_key: Option<&'a SecretString>,
77    file_store: &'a dyn FileStore,
78    stored_files: &'a mut Vec<PathBuf>,
79    app_conf: &'a ApplicationConfiguration,
80}
81
82// implements the logic for process so that we can conveniently handle all errors in process
83async fn process_inner(
84    InnerArgs {
85        conn,
86        repository_id,
87        url,
88        public_key,
89        deploy_key,
90        file_store,
91        stored_files,
92        app_conf,
93    }: InnerArgs<'_>,
94) -> anyhow::Result<Vec<StoredRepositoryExercise>> {
95    let mut tx = conn.begin().await?;
96
97    // clone repo to temp dir
98    let temp = tempfile::tempdir()?;
99    let mut fetch_opts = FetchOptions::new();
100    let mut remote_cbs = RemoteCallbacks::new();
101    if let Some(deploy_key) = deploy_key {
102        remote_cbs
103            .certificate_check(|_, _| Ok(CertificateCheckStatus::CertificateOk))
104            .credentials(|_, username, credential_type| {
105                if credential_type.is_ssh_memory() {
106                    Cred::ssh_key_from_memory(
107                        username.unwrap_or("git"),
108                        public_key,
109                        // Exposed only here, where the key is handed to libgit2 in memory.
110                        deploy_key.expose_secret(),
111                        None,
112                    )
113                } else {
114                    Err(git2::Error::from_str(
115                        "The git server does not support the SSH_MEMORY credential type",
116                    ))
117                }
118            });
119    }
120    fetch_opts.remote_callbacks(remote_cbs);
121    info!("Cloning {url} to {:?}", temp.path());
122    RepoBuilder::new()
123        .fetch_options(fetch_opts)
124        .clone(url, temp.path())?;
125    info!("Finished cloning {url} to {:?}", temp.path());
126
127    // create exercises in db and store them in file store
128    let found_exercises = find_exercise_directories(temp.path()).await?;
129    let mut repository_exercises = vec![];
130    // (part, name) => exercise
131    let existing_exercises =
132        repository_exercises::get_for_repository(&mut tx, repository_id).await?;
133
134    // we try both the path and the checksum to find existing exercises
135    // these are the only attributes found in the exercise repositories, so if both
136    // the path and checksum change there's no way to detect that it's supposed to be an updated old exercise
137    // rather than a new one
138    // this way we can accommodate both renaming/relocating exercises and changing them, though not at the same time...
139    let existing_exercises_path_map = existing_exercises
140        .iter()
141        .map(|ex| ((&ex.part, &ex.name), ex))
142        .collect::<HashMap<_, _>>();
143    let existing_exercises_checksum_map = existing_exercises
144        .iter()
145        .map(|ex| (ex.checksum.as_slice(), ex))
146        .collect::<HashMap<_, _>>();
147    for fe in &found_exercises {
148        // check if the exercise is new
149        match (
150            existing_exercises_path_map.get(&(&fe.part, &fe.name)),
151            existing_exercises_checksum_map.get(fe.checksum.as_bytes().as_slice()),
152        ) {
153            (Some(_), Some(_)) => {
154                // both the path and checksum are unchanged, no-op
155            }
156            (Some(existing_exercise_by_path), None) => {
157                // found exercise by path but the checksum has changed, exercise has been updated
158                let path = update_exercise(
159                    &mut tx,
160                    repository_id,
161                    existing_exercise_by_path.id,
162                    fe,
163                    file_store,
164                    app_conf,
165                )
166                .await?;
167                stored_files.push(path.clone());
168                let url = file_store.get_direct_download_url(&path).await?;
169                repository_exercises.push(StoredRepositoryExercise { url });
170                repository_exercises::update_checksum(
171                    &mut tx,
172                    existing_exercise_by_path.id,
173                    fe.checksum.as_bytes(),
174                )
175                .await?;
176                // todo: uploaded files get cleaned up on an error, which means that if the refreshing fails
177                // the exercise data will be missing entirely...
178            }
179            (None, Some(existing_exercise_by_checksum)) => {
180                // found exercise by checksum but the path has changed, update path
181                repository_exercises::update_part_and_name(
182                    &mut tx,
183                    existing_exercise_by_checksum.id,
184                    &fe.part,
185                    &fe.name,
186                )
187                .await?;
188            }
189            (None, None) => {
190                // new exercise
191                let new_exercise_id = uuid::Uuid::new_v4();
192                let path = create_and_upload_exercise(
193                    &mut tx,
194                    repository_id,
195                    new_exercise_id,
196                    fe,
197                    file_store,
198                    app_conf,
199                )
200                .await?;
201                stored_files.push(path.clone());
202                let url = file_store.get_direct_download_url(&path).await?;
203                repository_exercises.push(StoredRepositoryExercise { url });
204            }
205        }
206    }
207
208    tx.commit().await?;
209    Ok(repository_exercises)
210}
211
212/// Updates the given repository using the given url.
213/// Exercises with a known checksum but changed part or name are updated to reflect the new part or name.
214/// Exercises with a known part and name but changed checksum are updated in the file store and the checksum updated.
215/// Errors may leave some exercises updated and others not, since there's no mechanism for rolling back any file store updates.
216/// However, these inconsistencies will be fixed after a successful retry.
217pub async fn update(
218    conn: &mut PgConnection,
219    repository: Uuid,
220    url: &str,
221    file_store: &dyn FileStore,
222    app_conf: &ApplicationConfiguration,
223) -> anyhow::Result<()> {
224    let mut new_stored_files = vec![];
225    match update_inner(
226        conn,
227        repository,
228        url,
229        file_store,
230        &mut new_stored_files,
231        app_conf,
232    )
233    .await
234    {
235        Ok(res) => Ok(res),
236        Err(err) => {
237            if !new_stored_files.is_empty() {
238                debug!(
239                    "Failed while updating exercise repository, cleaning new exercises that were uploaded"
240                );
241                for file in new_stored_files {
242                    if let Err(err) = file_store.delete(&file).await {
243                        error!("Failed to clean file {}: {err}", file.display());
244                    }
245                }
246            }
247            Err(err)
248        }
249    }
250}
251
252async fn update_inner(
253    conn: &mut PgConnection,
254    repository: Uuid,
255    url: &str,
256    file_store: &dyn FileStore,
257    new_stored_files: &mut Vec<PathBuf>,
258    app_conf: &ApplicationConfiguration,
259) -> anyhow::Result<()> {
260    let mut tx = conn.begin().await?;
261
262    let temp = tempfile::tempdir()?;
263    Repository::clone(url, &temp)?;
264
265    let repository_exercises = find_exercise_directories(temp.path()).await?;
266    let current_exercises = repository_exercises::get_for_repository(&mut tx, repository).await?;
267
268    let mut by_name = HashMap::new();
269    let mut by_checksum = HashMap::new();
270    for ex in &current_exercises {
271        by_name.insert((&ex.part, &ex.name), ex);
272        by_checksum.insert(ex.checksum.as_slice(), ex);
273    }
274    for ex in repository_exercises {
275        if let Some(&current) = by_name.get(&(&ex.part, &ex.name)) {
276            // found known exercise by part and name
277            if current.checksum != ex.checksum.as_bytes() {
278                // checksum changed, update files and checksum
279                create_and_upload_exercise(
280                    &mut tx, repository, current.id, &ex, file_store, app_conf,
281                )
282                .await?;
283                repository_exercises::update_checksum(&mut tx, current.id, ex.checksum.as_bytes())
284                    .await?;
285            }
286        } else if let Some(&current) = by_checksum.get(ex.checksum.as_bytes().as_slice()) {
287            // found known exercise by checksum
288            if current.part != ex.part || current.name != ex.name {
289                // part and/or name changed
290                repository_exercises::update_part_and_name(&mut tx, current.id, &ex.part, &ex.name)
291                    .await?;
292            }
293        } else {
294            // unknown part/name and checksum, assume new exercise
295            let path = create_and_upload_exercise(
296                &mut tx,
297                repository,
298                Uuid::new_v4(),
299                &ex,
300                file_store,
301                app_conf,
302            )
303            .await?;
304            new_stored_files.push(path);
305        }
306    }
307
308    tx.commit().await?;
309    Ok(())
310}
311
312/// Marks the exercises and repository as deleted and removes the associated files from the file store.
313/// Only returns the last error if there are multiple errors when trying to remove the files.
314pub async fn delete(
315    conn: &mut PgConnection,
316    repository_id: Uuid,
317    file_store: &dyn FileStore,
318) -> anyhow::Result<()> {
319    let mut tx = conn.begin().await?;
320
321    let mut latest_error = None;
322    let exercises = repository_exercises::delete_for_repository(&mut tx, repository_id).await?;
323    exercise_repositories::delete(&mut tx, repository_id).await?;
324    for exercise in exercises {
325        let path = file_store::repository_exercise_path(repository_id, exercise);
326        if let Err(err) = file_store.delete(&path).await {
327            error!(
328                "Failed to delete file while deleting repository {}: {err}",
329                path.display()
330            );
331            latest_error = Some(err);
332        }
333    }
334
335    match latest_error {
336        Some(latest_error) => Err(latest_error.into()),
337        _ => {
338            tx.commit().await?;
339            Ok(())
340        }
341    }
342}
343
344async fn create_and_upload_exercise(
345    conn: &mut PgConnection,
346    repository: Uuid,
347    exercise_id: Uuid,
348    exercise: &FoundExercise,
349    file_store: &dyn FileStore,
350    app_conf: &ApplicationConfiguration,
351) -> anyhow::Result<PathBuf> {
352    // archive and compress
353    let cursor = Cursor::new(vec![]);
354    let mut tar = tar::Builder::new(cursor);
355    tar.append_dir_all(".", &exercise.path)?;
356    let mut tar = tar.into_inner()?;
357    // rewind cursor back to the beginning
358    tar.set_position(0);
359    let tar_zstd = zstd::encode_all(tar, 0)?;
360
361    // upload
362    let path = file_store::repository_exercise_path(repository, exercise_id);
363    file_store
364        .upload(&path, tar_zstd, "application/zstd")
365        .await?;
366    let url = file_store.get_download_url(&path, app_conf);
367
368    // create
369    repository_exercises::new(
370        conn,
371        exercise_id,
372        repository,
373        &exercise.part,
374        &exercise.name,
375        exercise.checksum.as_bytes(),
376        &url,
377    )
378    .await?;
379    Ok(path)
380}
381
382async fn update_exercise(
383    conn: &mut PgConnection,
384    repository: Uuid,
385    exercise_id: Uuid,
386    exercise: &FoundExercise,
387    file_store: &dyn FileStore,
388    app_conf: &ApplicationConfiguration,
389) -> anyhow::Result<PathBuf> {
390    // archive and compress
391    let cursor = Cursor::new(vec![]);
392    let mut tar = tar::Builder::new(cursor);
393    tar.append_dir_all(".", &exercise.path)?;
394    let mut tar = tar.into_inner()?;
395    // rewind cursor back to the beginning
396    tar.set_position(0);
397    let tar_zstd = zstd::encode_all(tar, 0)?;
398
399    // upload
400    let path = file_store::repository_exercise_path(repository, exercise_id);
401    file_store
402        .upload(&path, tar_zstd, "application/zstd")
403        .await?;
404    let url = file_store.get_download_url(&path, app_conf);
405
406    // create
407    repository_exercises::new(
408        conn,
409        exercise_id,
410        repository,
411        &exercise.part,
412        &exercise.name,
413        exercise.checksum.as_bytes(),
414        &url,
415    )
416    .await?;
417    Ok(path)
418}
419
420#[derive(Debug)]
421struct FoundExercise {
422    part: String,
423    name: String,
424    checksum: Hash,
425    path: PathBuf,
426}
427
428async fn find_exercise_directories(clone_path: &Path) -> anyhow::Result<Vec<FoundExercise>> {
429    info!("finding exercise directories in {}", clone_path.display());
430
431    let mut exercises = vec![];
432    // exercises in repositories are in subdirs like
433    // part01/01_exercise
434    // part01/02_exercise
435    // part02/01_exercise
436    for entry in WalkDir::new(clone_path)
437        .min_depth(2)
438        .max_depth(2)
439        .into_iter()
440        .filter_entry(|e| {
441            e.file_type().is_dir()
442                && e.file_name() != "private"
443                && !is_hidden_dir(e)
444                && !contains_tmcignore(e)
445                && !is_in_git_dir(e.path())
446        })
447    {
448        let entry = entry?;
449        let checksum = folder_checksum::hash_folder(entry.path()).await?;
450
451        let path = entry.into_path().canonicalize()?;
452        let part = path
453            .parent()
454            .ok_or_else(|| anyhow::anyhow!("Path should be in a subdirectory: {}", path.display()))?
455            .file_name()
456            .ok_or_else(|| {
457                anyhow::anyhow!("The parent file name cannot be missing: {}", path.display())
458            })?
459            .to_str()
460            .context("Invalid directory name in repository")?
461            .to_string();
462        let name = path
463            .file_name()
464            .ok_or_else(|| anyhow::anyhow!("Path should be a directory: {}", path.display()))?
465            .to_str()
466            .context("Invalid directory name in repository")?
467            .to_string();
468        exercises.push(FoundExercise {
469            part,
470            name,
471            checksum,
472            path,
473        });
474    }
475    Ok(exercises)
476}
477
478// Filter for hidden directories (directories with names starting with '.')
479fn is_hidden_dir(entry: &DirEntry) -> bool {
480    let skip = entry.metadata().map(|e| e.is_dir()).unwrap_or_default()
481        && entry
482            .file_name()
483            .to_str()
484            .map(|s| s.starts_with('.'))
485            .unwrap_or_default();
486    if skip {
487        debug!("is hidden dir: {}", entry.path().display());
488    }
489    skip
490}
491
492// Filter for .git directory
493fn is_in_git_dir(path: &Path) -> bool {
494    let skip = path.parent().map(|p| p.ends_with(".git")).unwrap_or(false);
495    if skip {
496        debug!("is in git dir: {}", path.display());
497    }
498    skip
499}
500
501fn contains_tmcignore(entry: &DirEntry) -> bool {
502    for entry in WalkDir::new(entry.path())
503        .max_depth(1)
504        .into_iter()
505        .filter_map(|e| e.ok())
506    {
507        let is_file = entry.metadata().map(|e| e.is_file()).unwrap_or_default();
508        if is_file && entry.file_name() == ".tmcignore" {
509            debug!("contains .tmcignore: {}", entry.path().display());
510            return true;
511        }
512    }
513    false
514}
515
516#[cfg(test)]
517mod test {
518    use super::*;
519    use std::{fs::Permissions, os::unix::prelude::PermissionsExt, str::FromStr};
520
521    #[tokio::test]
522    async fn finds_exercise_dirs() {
523        let repo = tempfile::tempdir().unwrap();
524
525        std::fs::create_dir_all(repo.path().join("part01/01_exercise")).unwrap();
526        std::fs::write(repo.path().join("part01/01_exercise/file"), "1234").unwrap();
527
528        std::fs::create_dir_all(repo.path().join("part01/02_exercise")).unwrap();
529        std::fs::write(repo.path().join("part01/02_exercise/file"), "1234").unwrap();
530
531        std::fs::create_dir_all(repo.path().join("part02/01_exercise")).unwrap();
532        std::fs::write(repo.path().join("part02/01_exercise/file"), "1234").unwrap();
533
534        // Make sure permissions are the same on all systems. Some systems have different default permissions in the temp folder.
535        let file_paths = vec![
536            repo.path().join("part01/01_exercise/file"),
537            repo.path().join("part01/02_exercise/file"),
538            repo.path().join("part02/01_exercise/file"),
539        ];
540        let folder_paths = vec![
541            repo.path().join("part01/01_exercise"),
542            repo.path().join("part01/02_exercise"),
543            repo.path().join("part02/01_exercise"),
544            repo.path().to_path_buf(),
545        ];
546        for path in file_paths {
547            std::fs::set_permissions(path, Permissions::from_mode(0o644)).unwrap();
548        }
549        for path in folder_paths {
550            std::fs::set_permissions(path, Permissions::from_mode(0o755)).unwrap();
551        }
552
553        let mut paths = find_exercise_directories(repo.path()).await.unwrap();
554        paths.sort_by(|a, b| a.path.cmp(&b.path));
555        assert_eq!(paths.len(), 3);
556
557        assert_eq!(&paths[0].path, &repo.path().join("part01/01_exercise"));
558        assert_eq!(&paths[0].part, "part01");
559        assert_eq!(&paths[0].name, "01_exercise");
560        assert_eq!(
561            paths[0].checksum,
562            Hash::from_str("3a01c5d9a407deec294c4ac561cdeea1a7507464193e06387083853e3ca71c3a")
563                .unwrap()
564        );
565
566        assert_eq!(&paths[1].name, "02_exercise");
567        assert_eq!(&paths[2].name, "01_exercise");
568    }
569
570    #[test]
571    fn filters_git() {
572        assert!(is_in_git_dir(Path::new("something/.git/something")));
573        assert!(!is_in_git_dir(Path::new(
574            "something/.git/something/something"
575        )));
576    }
577}