1use anyhow::Context;
2use blake3::Hash;
3use git2::{
4 CertificateCheckStatus, Cred, FetchOptions, RemoteCallbacks, Repository, build::RepoBuilder,
5};
6use headless_lms_models::{exercise_repositories, repository_exercises};
7use headless_lms_utils::{
8 ApplicationConfiguration,
9 file_store::{self, FileStore},
10 folder_checksum,
11};
12use sqlx::{Acquire, PgConnection};
13use std::{
14 collections::HashMap,
15 io::Cursor,
16 path::{Path, PathBuf},
17};
18use uuid::Uuid;
19use walkdir::{DirEntry, WalkDir};
20
21pub struct StoredRepositoryExercise {
22 pub url: String,
23}
24
25pub async fn process(
28 conn: &mut PgConnection,
29 repository_id: Uuid,
30 url: &str,
31 public_key: Option<&str>,
32 deploy_key: Option<&str>,
33 file_store: &dyn FileStore,
34 app_conf: &ApplicationConfiguration,
35) -> anyhow::Result<Vec<StoredRepositoryExercise>> {
36 let mut stored_files = vec![];
37 match process_inner(InnerArgs {
38 conn,
39 repository_id,
40 url,
41 public_key,
42 deploy_key,
43 file_store,
44 stored_files: &mut stored_files,
45 app_conf,
46 })
47 .await
48 {
49 Ok(res) => {
50 exercise_repositories::mark_success(conn, repository_id).await?;
51 Ok(res)
52 }
53 Err(err) => {
54 if !stored_files.is_empty() {
55 warn!(
56 "Failed while creating new exercise repository, cleaning files that were uploaded"
57 );
58 for file in stored_files {
59 if let Err(err) = file_store.delete(&file).await {
60 error!("Failed to clean file {}: {err}", file.display());
61 }
62 }
63 }
64 exercise_repositories::mark_failure(conn, repository_id, &err.to_string()).await?;
65 Err(err)
66 }
67 }
68}
69
70struct InnerArgs<'a> {
71 conn: &'a mut PgConnection,
72 repository_id: Uuid,
73 url: &'a str,
74 public_key: Option<&'a str>,
75 deploy_key: Option<&'a str>,
76 file_store: &'a dyn FileStore,
77 stored_files: &'a mut Vec<PathBuf>,
78 app_conf: &'a ApplicationConfiguration,
79}
80
81async fn process_inner(
83 InnerArgs {
84 conn,
85 repository_id,
86 url,
87 public_key,
88 deploy_key,
89 file_store,
90 stored_files,
91 app_conf,
92 }: InnerArgs<'_>,
93) -> anyhow::Result<Vec<StoredRepositoryExercise>> {
94 let mut tx = conn.begin().await?;
95
96 let temp = tempfile::tempdir()?;
98 let mut fetch_opts = FetchOptions::new();
99 let mut remote_cbs = RemoteCallbacks::new();
100 if let Some(deploy_key) = deploy_key {
101 remote_cbs
102 .certificate_check(|_, _| Ok(CertificateCheckStatus::CertificateOk))
103 .credentials(|_, username, credential_type| {
104 if credential_type.is_ssh_memory() {
105 Cred::ssh_key_from_memory(
106 username.unwrap_or("git"),
107 public_key,
108 deploy_key,
109 None,
110 )
111 } else {
112 Err(git2::Error::from_str(
113 "The git server does not support the SSH_MEMORY credential type",
114 ))
115 }
116 });
117 }
118 fetch_opts.remote_callbacks(remote_cbs);
119 info!("Cloning {url} to {:?}", temp.path());
120 RepoBuilder::new()
121 .fetch_options(fetch_opts)
122 .clone(url, temp.path())?;
123 info!("Finished cloning {url} to {:?}", temp.path());
124
125 let found_exercises = find_exercise_directories(temp.path()).await?;
127 let mut repository_exercises = vec![];
128 let existing_exercises =
130 repository_exercises::get_for_repository(&mut tx, repository_id).await?;
131
132 let existing_exercises_path_map = existing_exercises
138 .iter()
139 .map(|ex| ((&ex.part, &ex.name), ex))
140 .collect::<HashMap<_, _>>();
141 let existing_exercises_checksum_map = existing_exercises
142 .iter()
143 .map(|ex| (ex.checksum.as_slice(), ex))
144 .collect::<HashMap<_, _>>();
145 for fe in &found_exercises {
146 match (
148 existing_exercises_path_map.get(&(&fe.part, &fe.name)),
149 existing_exercises_checksum_map.get(fe.checksum.as_bytes().as_slice()),
150 ) {
151 (Some(_), Some(_)) => {
152 }
154 (Some(existing_exercise_by_path), None) => {
155 let path = update_exercise(
157 &mut tx,
158 repository_id,
159 existing_exercise_by_path.id,
160 fe,
161 file_store,
162 app_conf,
163 )
164 .await?;
165 stored_files.push(path.clone());
166 let url = file_store.get_direct_download_url(&path).await?;
167 repository_exercises.push(StoredRepositoryExercise { url });
168 repository_exercises::update_checksum(
169 &mut tx,
170 existing_exercise_by_path.id,
171 fe.checksum.as_bytes(),
172 )
173 .await?;
174 }
177 (None, Some(existing_exercise_by_checksum)) => {
178 repository_exercises::update_part_and_name(
180 &mut tx,
181 existing_exercise_by_checksum.id,
182 &fe.part,
183 &fe.name,
184 )
185 .await?;
186 }
187 (None, None) => {
188 let new_exercise_id = uuid::Uuid::new_v4();
190 let path = create_and_upload_exercise(
191 &mut tx,
192 repository_id,
193 new_exercise_id,
194 fe,
195 file_store,
196 app_conf,
197 )
198 .await?;
199 stored_files.push(path.clone());
200 let url = file_store.get_direct_download_url(&path).await?;
201 repository_exercises.push(StoredRepositoryExercise { url });
202 }
203 }
204 }
205
206 tx.commit().await?;
207 Ok(repository_exercises)
208}
209
210pub async fn update(
216 conn: &mut PgConnection,
217 repository: Uuid,
218 url: &str,
219 file_store: &dyn FileStore,
220 app_conf: &ApplicationConfiguration,
221) -> anyhow::Result<()> {
222 let mut new_stored_files = vec![];
223 match update_inner(
224 conn,
225 repository,
226 url,
227 file_store,
228 &mut new_stored_files,
229 app_conf,
230 )
231 .await
232 {
233 Ok(res) => Ok(res),
234 Err(err) => {
235 if !new_stored_files.is_empty() {
236 debug!(
237 "Failed while updating exercise repository, cleaning new exercises that were uploaded"
238 );
239 for file in new_stored_files {
240 if let Err(err) = file_store.delete(&file).await {
241 error!("Failed to clean file {}: {err}", file.display());
242 }
243 }
244 }
245 Err(err)
246 }
247 }
248}
249
250async fn update_inner(
251 conn: &mut PgConnection,
252 repository: Uuid,
253 url: &str,
254 file_store: &dyn FileStore,
255 new_stored_files: &mut Vec<PathBuf>,
256 app_conf: &ApplicationConfiguration,
257) -> anyhow::Result<()> {
258 let mut tx = conn.begin().await?;
259
260 let temp = tempfile::tempdir()?;
261 Repository::clone(url, &temp)?;
262
263 let repository_exercises = find_exercise_directories(temp.path()).await?;
264 let current_exercises = repository_exercises::get_for_repository(&mut tx, repository).await?;
265
266 let mut by_name = HashMap::new();
267 let mut by_checksum = HashMap::new();
268 for ex in ¤t_exercises {
269 by_name.insert((&ex.part, &ex.name), ex);
270 by_checksum.insert(ex.checksum.as_slice(), ex);
271 }
272 for ex in repository_exercises {
273 if let Some(¤t) = by_name.get(&(&ex.part, &ex.name)) {
274 if current.checksum != ex.checksum.as_bytes() {
276 create_and_upload_exercise(
278 &mut tx, repository, current.id, &ex, file_store, app_conf,
279 )
280 .await?;
281 repository_exercises::update_checksum(&mut tx, current.id, ex.checksum.as_bytes())
282 .await?;
283 }
284 } else if let Some(¤t) = by_checksum.get(ex.checksum.as_bytes().as_slice()) {
285 if current.part != ex.part || current.name != ex.name {
287 repository_exercises::update_part_and_name(&mut tx, current.id, &ex.part, &ex.name)
289 .await?;
290 }
291 } else {
292 let path = create_and_upload_exercise(
294 &mut tx,
295 repository,
296 Uuid::new_v4(),
297 &ex,
298 file_store,
299 app_conf,
300 )
301 .await?;
302 new_stored_files.push(path);
303 }
304 }
305
306 tx.commit().await?;
307 Ok(())
308}
309
310pub async fn delete(
313 conn: &mut PgConnection,
314 repository_id: Uuid,
315 file_store: &dyn FileStore,
316) -> anyhow::Result<()> {
317 let mut tx = conn.begin().await?;
318
319 let mut latest_error = None;
320 let exercises = repository_exercises::delete_for_repository(&mut tx, repository_id).await?;
321 exercise_repositories::delete(&mut tx, repository_id).await?;
322 for exercise in exercises {
323 let path = file_store::repository_exercise_path(repository_id, exercise);
324 if let Err(err) = file_store.delete(&path).await {
325 error!(
326 "Failed to delete file while deleting repository {}: {err}",
327 path.display()
328 );
329 latest_error = Some(err);
330 }
331 }
332
333 match latest_error {
334 Some(latest_error) => Err(latest_error.into()),
335 _ => {
336 tx.commit().await?;
337 Ok(())
338 }
339 }
340}
341
342async fn create_and_upload_exercise(
343 conn: &mut PgConnection,
344 repository: Uuid,
345 exercise_id: Uuid,
346 exercise: &FoundExercise,
347 file_store: &dyn FileStore,
348 app_conf: &ApplicationConfiguration,
349) -> anyhow::Result<PathBuf> {
350 let cursor = Cursor::new(vec![]);
352 let mut tar = tar::Builder::new(cursor);
353 tar.append_dir_all(".", &exercise.path)?;
354 let mut tar = tar.into_inner()?;
355 tar.set_position(0);
357 let tar_zstd = zstd::encode_all(tar, 0)?;
358
359 let path = file_store::repository_exercise_path(repository, exercise_id);
361 file_store
362 .upload(&path, tar_zstd, "application/zstd")
363 .await?;
364 let url = file_store.get_download_url(&path, app_conf);
365
366 repository_exercises::new(
368 conn,
369 exercise_id,
370 repository,
371 &exercise.part,
372 &exercise.name,
373 exercise.checksum.as_bytes(),
374 &url,
375 )
376 .await?;
377 Ok(path)
378}
379
380async fn update_exercise(
381 conn: &mut PgConnection,
382 repository: Uuid,
383 exercise_id: Uuid,
384 exercise: &FoundExercise,
385 file_store: &dyn FileStore,
386 app_conf: &ApplicationConfiguration,
387) -> anyhow::Result<PathBuf> {
388 let cursor = Cursor::new(vec![]);
390 let mut tar = tar::Builder::new(cursor);
391 tar.append_dir_all(".", &exercise.path)?;
392 let mut tar = tar.into_inner()?;
393 tar.set_position(0);
395 let tar_zstd = zstd::encode_all(tar, 0)?;
396
397 let path = file_store::repository_exercise_path(repository, exercise_id);
399 file_store
400 .upload(&path, tar_zstd, "application/zstd")
401 .await?;
402 let url = file_store.get_download_url(&path, app_conf);
403
404 repository_exercises::new(
406 conn,
407 exercise_id,
408 repository,
409 &exercise.part,
410 &exercise.name,
411 exercise.checksum.as_bytes(),
412 &url,
413 )
414 .await?;
415 Ok(path)
416}
417
418#[derive(Debug)]
419struct FoundExercise {
420 part: String,
421 name: String,
422 checksum: Hash,
423 path: PathBuf,
424}
425
426async fn find_exercise_directories(clone_path: &Path) -> anyhow::Result<Vec<FoundExercise>> {
427 info!("finding exercise directories in {}", clone_path.display());
428
429 let mut exercises = vec![];
430 for entry in WalkDir::new(clone_path)
435 .min_depth(2)
436 .max_depth(2)
437 .into_iter()
438 .filter_entry(|e| {
439 e.file_type().is_dir()
440 && e.file_name() != "private"
441 && !is_hidden_dir(e)
442 && !contains_tmcignore(e)
443 && !is_in_git_dir(e.path())
444 })
445 {
446 let entry = entry?;
447 let checksum = folder_checksum::hash_folder(entry.path()).await?;
448
449 let path = entry.into_path().canonicalize()?;
450 let part = path
451 .parent()
452 .expect("Path should be in a subdirectory")
453 .file_name()
454 .expect("The parent file name cannot be missing")
455 .to_str()
456 .context("Invalid directory name in repository")?
457 .to_string();
458 let name = path
459 .file_name()
460 .expect("Path should be a file")
461 .to_str()
462 .context("Invalid directory name in repository")?
463 .to_string();
464 exercises.push(FoundExercise {
465 part,
466 name,
467 checksum,
468 path,
469 });
470 }
471 Ok(exercises)
472}
473
474fn is_hidden_dir(entry: &DirEntry) -> bool {
476 let skip = entry.metadata().map(|e| e.is_dir()).unwrap_or_default()
477 && entry
478 .file_name()
479 .to_str()
480 .map(|s| s.starts_with('.'))
481 .unwrap_or_default();
482 if skip {
483 debug!("is hidden dir: {}", entry.path().display());
484 }
485 skip
486}
487
488fn is_in_git_dir(path: &Path) -> bool {
490 let skip = path.parent().map(|p| p.ends_with(".git")).unwrap_or(false);
491 if skip {
492 debug!("is in git dir: {}", path.display());
493 }
494 skip
495}
496
497fn contains_tmcignore(entry: &DirEntry) -> bool {
498 for entry in WalkDir::new(entry.path())
499 .max_depth(1)
500 .into_iter()
501 .filter_map(|e| e.ok())
502 {
503 let is_file = entry.metadata().map(|e| e.is_file()).unwrap_or_default();
504 if is_file && entry.file_name() == ".tmcignore" {
505 debug!("contains .tmcignore: {}", entry.path().display());
506 return true;
507 }
508 }
509 false
510}
511
512#[cfg(test)]
513mod test {
514 use super::*;
515 use std::{fs::Permissions, os::unix::prelude::PermissionsExt, str::FromStr};
516
517 #[tokio::test]
518 async fn finds_exercise_dirs() {
519 let repo = tempfile::tempdir().unwrap();
520
521 std::fs::create_dir_all(repo.path().join("part01/01_exercise")).unwrap();
522 std::fs::write(repo.path().join("part01/01_exercise/file"), "1234").unwrap();
523
524 std::fs::create_dir_all(repo.path().join("part01/02_exercise")).unwrap();
525 std::fs::write(repo.path().join("part01/02_exercise/file"), "1234").unwrap();
526
527 std::fs::create_dir_all(repo.path().join("part02/01_exercise")).unwrap();
528 std::fs::write(repo.path().join("part02/01_exercise/file"), "1234").unwrap();
529
530 let file_paths = vec![
532 repo.path().join("part01/01_exercise/file"),
533 repo.path().join("part01/02_exercise/file"),
534 repo.path().join("part02/01_exercise/file"),
535 ];
536 let folder_paths = vec![
537 repo.path().join("part01/01_exercise"),
538 repo.path().join("part01/02_exercise"),
539 repo.path().join("part02/01_exercise"),
540 repo.path().to_path_buf(),
541 ];
542 for path in file_paths {
543 std::fs::set_permissions(path, Permissions::from_mode(0o644)).unwrap();
544 }
545 for path in folder_paths {
546 std::fs::set_permissions(path, Permissions::from_mode(0o755)).unwrap();
547 }
548
549 let mut paths = find_exercise_directories(repo.path()).await.unwrap();
550 paths.sort_by(|a, b| a.path.cmp(&b.path));
551 assert_eq!(paths.len(), 3);
552
553 assert_eq!(&paths[0].path, &repo.path().join("part01/01_exercise"));
554 assert_eq!(&paths[0].part, "part01");
555 assert_eq!(&paths[0].name, "01_exercise");
556 assert_eq!(
557 paths[0].checksum,
558 Hash::from_str("3a01c5d9a407deec294c4ac561cdeea1a7507464193e06387083853e3ca71c3a")
559 .unwrap()
560 );
561
562 assert_eq!(&paths[1].name, "02_exercise");
563 assert_eq!(&paths[2].name, "01_exercise");
564 }
565
566 #[test]
567 fn filters_git() {
568 assert!(is_in_git_dir(Path::new("something/.git/something")));
569 assert!(!is_in_git_dir(Path::new(
570 "something/.git/something/something"
571 )));
572 }
573}