headless_lms_chatbot/
citations.rs1use std::path::PathBuf;
2
3use secrecy::SecretString;
4
5use crate::{llm_utils::build_llm_headers, prelude::*};
6
7use headless_lms_models::chatbot_conversation_messages_citations::{
8 self, ChatbotConversationMessageCitation,
9};
10use headless_lms_utils::strings::truncate_utf8_at_boundary;
11use headless_lms_utils::url_encoding::url_decode;
12use reqwest::Response;
13use serde::{Deserialize, Serialize};
14use tracing::{error, instrument, trace};
15use url::Url;
16
17#[derive(Serialize, Deserialize, Debug, Clone)]
18pub struct CourseMaterialDocument {
19 pub chunk_id: String,
20 pub chunk: String,
21 pub title: String,
22 pub url: String,
23 pub filepath: String,
24}
25
26impl CourseMaterialDocument {
27 pub fn to_chatbot_conversation_message_citation(
30 &self,
31 conversation_message_id: Uuid,
32 conversation_id: Uuid,
33 citation_number: i32,
34 ) -> ChatbotResult<(ChatbotConversationMessageCitation, Option<Uuid>)> {
35 let content = if self.chunk.len() < 255 {
37 self.chunk.clone()
38 } else {
39 truncate_utf8_at_boundary(&self.chunk, 255).to_string()
40 };
41
42 let decoded_title = url_decode(&self.title)?;
46 let decoded_url = url_decode(&self.url)?;
47
48 let mut page_path = PathBuf::from(&self.filepath);
50 page_path.set_extension("");
51 let page_id_str = page_path.file_name();
52 let page_id =
53 page_id_str.and_then(|id_str| Uuid::parse_str(id_str.to_string_lossy().as_ref()).ok());
54
55 Ok((
56 ChatbotConversationMessageCitation {
57 conversation_message_id,
58 conversation_id,
59 title: decoded_title,
60 content,
61 document_url: decoded_url,
62 citation_number,
63 ..Default::default()
64 },
65 page_id,
66 ))
67 }
68}
69
70pub async fn chatbot_cited_documents_to_citations(
73 conn: &mut PgConnection,
74 test_chatbot: bool,
75 mut document_urls: Vec<Url>,
76 api_key: &SecretString,
77 conversation_message_id: Uuid,
78 conversation_id: Uuid,
79) -> anyhow::Result<Vec<ChatbotConversationMessageCitation>> {
80 let mut documents: Vec<(CourseMaterialDocument, i32)> = vec![];
81 for (idx, url) in document_urls.iter_mut().enumerate() {
82 let document = get_course_material_document(url, api_key).await?;
83 let citation_number = (idx + 1) as i32;
84 documents.push((document, citation_number));
85 }
86 let res = save_documents(
87 conn,
88 test_chatbot,
89 documents,
90 conversation_message_id,
91 conversation_id,
92 )
93 .await?;
94
95 Ok(res)
96}
97
98async fn get_course_material_document(
100 endpoint: &mut Url,
101 api_key: &SecretString,
102) -> anyhow::Result<CourseMaterialDocument> {
103 endpoint.set_query(Some(
104 "api-version=2024-07-01&$select=chunk_id,parent_id,chunk,title,url,filepath,course_id",
105 ));
106 let headers = build_llm_headers(api_key)?;
107
108 let response = REQWEST_CLIENT
109 .get(endpoint.clone())
110 .headers(headers)
111 .send()
112 .await?;
113
114 process_course_material_document_response(response).await
115}
116
117#[instrument(skip(response), fields(status = %response.status()))]
118async fn process_course_material_document_response(
119 response: Response,
120) -> anyhow::Result<CourseMaterialDocument> {
121 if !response.status().is_success() {
122 let status = response.status();
123 let error_text = response.text().await?;
124 error!(
125 status = %status,
126 error = %error_text,
127 "Error fetching document from search index."
128 );
129 return Err(anyhow::anyhow!(
130 "Error fetching document from search index: Status: {}. Error: {}",
131 status,
132 error_text
133 ));
134 }
135
136 trace!("Processing successful LLM response");
137 let document: CourseMaterialDocument = response.json().await?;
139
140 Ok(document)
141}
142
143async fn save_documents(
145 conn: &mut PgConnection,
146 test_chatbot: bool,
147 documents_with_citation_numbers: Vec<(CourseMaterialDocument, i32)>,
148 conversation_message_id: Uuid,
149 conversation_id: Uuid,
150) -> anyhow::Result<Vec<ChatbotConversationMessageCitation>> {
151 let (citations, page_ids): (Vec<ChatbotConversationMessageCitation>, Vec<Option<Uuid>>) =
152 documents_with_citation_numbers
153 .iter()
154 .map(|(d, citation_number)| {
155 d.to_chatbot_conversation_message_citation(
156 conversation_message_id,
157 conversation_id,
158 citation_number.to_owned(),
159 )
160 })
161 .collect::<ChatbotResult<Vec<(ChatbotConversationMessageCitation, Option<Uuid>)>>>()?
162 .into_iter()
163 .unzip();
164 if test_chatbot {
165 return save_documents_mock(conn, citations).await;
166 };
167 let res =
168 chatbot_conversation_messages_citations::insert_batch(conn, citations, page_ids).await?;
169
170 Ok(res)
171}
172
173async fn save_documents_mock(
174 conn: &mut PgConnection,
175 citations: Vec<ChatbotConversationMessageCitation>,
176) -> anyhow::Result<Vec<ChatbotConversationMessageCitation>> {
177 let mut res = vec![];
178 for input in citations {
179 let a = chatbot_conversation_messages_citations::insert(conn, input).await?;
180 res.push(a)
181 }
182 Ok(res)
183}