headless_lms_chatbot/
azure_skillset.rs

1use serde_json::json;
2
3use crate::prelude::*;
4
5const API_VERSION: &str = "2024-07-01";
6
7pub async fn does_skillset_exist(
8    skillset_name: &str,
9    app_config: &ApplicationConfiguration,
10) -> anyhow::Result<bool> {
11    // Retrieve Azure configurations from the application configuration
12    let azure_config = app_config.azure_configuration.as_ref().ok_or_else(|| {
13        anyhow::anyhow!("Azure configuration is missing from the application configuration")
14    })?;
15
16    let search_config = azure_config.search_config.as_ref().ok_or_else(|| {
17        anyhow::anyhow!("Azure search configuration is missing from the Azure configuration")
18    })?;
19
20    let mut url = search_config.search_endpoint.clone();
21    url.set_path(&format!("skillsets('{}')", skillset_name));
22    url.set_query(Some(&format!("api-version={}", API_VERSION)));
23
24    let response = REQWEST_CLIENT
25        .get(url)
26        .header("Content-Type", "application/json")
27        .header("api-key", search_config.search_api_key.clone())
28        .send()
29        .await?;
30
31    if response.status().is_success() {
32        Ok(true)
33    } else if response.status() == 404 {
34        Ok(false)
35    } else {
36        let status = response.status();
37        let error_text = response.text().await?;
38        Err(anyhow::anyhow!(
39            "Error checking if skillset exists. Status: {}. Error: {}",
40            status,
41            error_text
42        ))
43    }
44}
45
46pub async fn create_skillset(
47    skillset_name: &str,
48    target_index_name: &str,
49    app_config: &ApplicationConfiguration,
50) -> anyhow::Result<()> {
51    let azure_config = app_config.azure_configuration.as_ref().ok_or_else(|| {
52        anyhow::anyhow!("Azure configuration is missing from the application configuration")
53    })?;
54
55    let search_config = azure_config.search_config.as_ref().ok_or_else(|| {
56        anyhow::anyhow!("Azure search configuration is missing from the Azure configuration")
57    })?;
58
59    let azure_openai_api_key = search_config.vectorizer_api_key.clone();
60
61    let mut url = search_config.search_endpoint.clone();
62    url.set_path(&format!("skillsets/{}", skillset_name));
63    url.set_query(Some(&format!("api-version={}", API_VERSION)));
64
65    let skillset_definition = json!({
66        "name": skillset_name,
67        "description": "Skillset to chunk documents and generate embeddings",
68        "skills": [
69            {
70                "@odata.type": "#Microsoft.Skills.Text.SplitSkill",
71                "name": "#1",
72                "description": "Split skill to chunk documents",
73                "context": "/document",
74                "defaultLanguageCode": "en",
75                "textSplitMode": "pages",
76                "maximumPageLength": 2000,
77                "pageOverlapLength": 500,
78                "maximumPagesToTake": 1,
79                "inputs": [
80                    {
81                        "name": "text",
82                        "source": "/document/content"
83                    },
84                    {
85                        "name": "languageCode",
86                        "source": "/document/language"
87                    }
88                ],
89                "outputs": [
90                    {
91                        "name": "textItems",
92                        "targetName": "pages"
93                    }
94                ]
95            },
96            {
97                "@odata.type": "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill",
98                "name": "#2",
99                "description": null,
100                "context": "/document/pages/*",
101                "resourceUri": search_config.vectorizer_resource_uri.clone(),
102                "apiKey": azure_openai_api_key,
103                "deploymentId": search_config.vectorizer_deployment_id.clone(),
104                "dimensions": 1536,
105                "modelName": search_config.vectorizer_model_name.clone(),
106                "inputs": [
107                    {
108                        "name": "text",
109                        "source": "/document/pages/*",
110                        "sourceContext": null,
111                        "inputs": []
112                    }
113                ],
114                "outputs": [
115                    {
116                        "name": "embedding",
117                        "targetName": "text_vector"
118                    }
119                ],
120                "authIdentity": null
121            }
122        ],
123        "cognitiveServices": null,
124        "knowledgeStore": null,
125        "indexProjections": {
126            "selectors": [
127                {
128                    "targetIndexName": target_index_name,
129                    "parentKeyFieldName": "parent_id",
130                    "sourceContext": "/document/pages/*",
131                    "mappings": [
132                        {
133                            "name": "text_vector",
134                            "source": "/document/pages/*/text_vector",
135                            "sourceContext": null,
136                            "inputs": []
137                        },
138                        {
139                            "name": "chunk",
140                            "source": "/document/pages/*",
141                            "sourceContext": null,
142                            "inputs": []
143                        },
144                        {
145                            "name": "title",
146                            "source": "/document/title",
147                            "sourceContext": null,
148                            "inputs": []
149                        },
150                        {
151                          "name": "url",
152                          "source": "/document/url",
153                          "sourceContext": null,
154                          "inputs": []
155                        },
156                        {
157                          "name": "course_id",
158                          "source": "/document/course_id",
159                          "sourceContext": null,
160                          "inputs": []
161                        },
162                        {
163                          "name": "language",
164                          "source": "/document/language",
165                          "sourceContext": null,
166                          "inputs": []
167                        }
168                    ]
169                }
170            ],
171            "parameters": {
172                "projectionMode": "skipIndexingParentDocuments"
173            }
174        },
175        "encryptionKey": null
176    });
177
178    let response = REQWEST_CLIENT
179        .put(url)
180        .header("Content-Type", "application/json")
181        .header("api-key", search_config.search_api_key.clone())
182        .json(&skillset_definition)
183        .send()
184        .await?;
185
186    if response.status().is_success() {
187        Ok(())
188    } else {
189        let status = response.status();
190        let error_text = response.text().await?;
191        Err(anyhow::anyhow!(
192            "Error creating skillset. Status: {}. Error: {}",
193            status,
194            error_text
195        ))
196    }
197}