Skip to main content

headless_lms_chatbot/chatbot_tools/provider_tools/
azure_ai_search.rs

1use crate::{
2    azure_chatbot::CONTENT_FIELD_SEPARATOR, prelude::ChatbotResult, search_filter::SearchFilter,
3};
4use headless_lms_base::config::ApplicationConfiguration;
5use serde::{Deserialize, Serialize};
6use url::Url;
7use uuid::Uuid;
8
9#[derive(Serialize, Deserialize, Debug, Clone)]
10pub struct AzureAISearchToolDefinition {
11    #[serde(rename = "type")]
12    pub data_type: String,
13    pub azure_ai_search: AzureAISearch,
14}
15
16#[derive(Serialize, Deserialize, Debug, Clone)]
17pub struct AzureAISearch {
18    pub indexes: Vec<SearchIndex>,
19}
20
21#[derive(Serialize, Deserialize, Debug, Clone)]
22pub struct SearchIndex {
23    pub project_connection_id: String,
24    pub index_name: String,
25    pub query_type: String,
26    pub top_k: i32,
27    pub embedding_dependency: EmbeddingDependency,
28    pub in_scope: bool,
29    pub strictness: i32,
30    #[serde(skip_serializing_if = "Option::is_none")]
31    pub filter: Option<String>,
32    pub fields_mapping: FieldsMapping,
33    pub semantic_configuration: String,
34}
35
36#[derive(Serialize, Deserialize, Debug, Clone)]
37pub struct FieldsMapping {
38    pub content_fields_separator: String,
39    pub content_fields: Vec<String>,
40    pub filepath_field: String,
41    pub title_field: String,
42    pub url_field: String,
43    pub vector_fields: Vec<String>,
44}
45
46#[derive(Serialize, Deserialize, Debug, Clone)]
47pub struct EmbeddingDependency {
48    #[serde(rename = "type")]
49    pub dep_type: String,
50    pub deployment_name: String,
51}
52
53pub fn get_azure_ai_search_tool_definition(
54    app_config: &ApplicationConfiguration,
55    course_id: Uuid,
56    use_semantic_reranking: bool,
57) -> ChatbotResult<AzureAISearchToolDefinition> {
58    let index_name = Url::parse(&app_config.base_url)?
59        .host_str()
60        .ok_or_else(|| anyhow::anyhow!("Invalid application base url, no host"))?
61        .replace(".", "-");
62    let azure_config = app_config.azure_configuration.as_ref().ok_or_else(|| {
63        anyhow::anyhow!("Azure configuration is missing from the application configuration")
64    })?;
65
66    let search_config = azure_config.search_config.as_ref().ok_or_else(|| {
67        anyhow::anyhow!("Azure search configuration is missing from the Azure configuration")
68    })?;
69
70    let query_type = if use_semantic_reranking {
71        "vector_semantic_hybrid"
72    } else {
73        "vector_simple_hybrid"
74    };
75
76    let semantic_configuration = format!("{}-semantic-configuration", &index_name);
77
78    Ok(AzureAISearchToolDefinition {
79        data_type: "azure_ai_search".to_string(),
80        azure_ai_search: AzureAISearch {
81            indexes: vec![SearchIndex {
82                index_name,
83                project_connection_id: search_config.search_connection_id.to_owned(),
84                query_type: query_type.to_string(),
85                semantic_configuration,
86                embedding_dependency: EmbeddingDependency {
87                    dep_type: "deployment_name".to_string(),
88                    deployment_name: search_config.vectorizer_deployment_id.clone(),
89                },
90                in_scope: false,
91                top_k: 15,
92                strictness: 3,
93                filter: Some(SearchFilter::eq("course_id", course_id.to_string()).to_odata()?),
94                fields_mapping: FieldsMapping {
95                    content_fields_separator: CONTENT_FIELD_SEPARATOR.to_string(),
96                    content_fields: vec!["chunk_context".to_string(), "chunk".to_string()],
97                    filepath_field: "filepath".to_string(),
98                    title_field: "title".to_string(),
99                    url_field: "url".to_string(),
100                    vector_fields: vec!["text_vector".to_string()],
101                },
102            }],
103        },
104    })
105}