Skip to main content

headless_lms_chatbot/
azure_skillset.rs

1use secrecy::ExposeSecret;
2use serde_json::json;
3
4use crate::prelude::*;
5
6const API_VERSION: &str = "2024-07-01";
7
8pub async fn does_skillset_exist(
9    skillset_name: &str,
10    app_config: &ApplicationConfiguration,
11) -> anyhow::Result<bool> {
12    // Retrieve Azure configurations from the application configuration
13    let azure_config = app_config.azure_configuration.as_ref().ok_or_else(|| {
14        anyhow::anyhow!("Azure configuration is missing from the application configuration")
15    })?;
16
17    let search_config = azure_config.search_config.as_ref().ok_or_else(|| {
18        anyhow::anyhow!("Azure search configuration is missing from the Azure configuration")
19    })?;
20
21    let mut url = search_config.search_endpoint.clone();
22    url.set_path(&format!("skillsets('{}')", skillset_name));
23    url.set_query(Some(&format!("api-version={}", API_VERSION)));
24
25    let response = REQWEST_CLIENT
26        .get(url)
27        .header("Content-Type", "application/json")
28        .header("api-key", search_config.search_api_key.expose_secret())
29        .send()
30        .await?;
31
32    if response.status().is_success() {
33        Ok(true)
34    } else if response.status() == 404 {
35        Ok(false)
36    } else {
37        let status = response.status();
38        let error_text = response.text().await?;
39        Err(anyhow::anyhow!(
40            "Error checking if skillset exists. Status: {}. Error: {}",
41            status,
42            error_text
43        ))
44    }
45}
46
47pub async fn create_skillset(
48    skillset_name: &str,
49    target_index_name: &str,
50    app_config: &ApplicationConfiguration,
51) -> anyhow::Result<()> {
52    let azure_config = app_config.azure_configuration.as_ref().ok_or_else(|| {
53        anyhow::anyhow!("Azure configuration is missing from the application configuration")
54    })?;
55
56    let search_config = azure_config.search_config.as_ref().ok_or_else(|| {
57        anyhow::anyhow!("Azure search configuration is missing from the Azure configuration")
58    })?;
59
60    let mut url = search_config.search_endpoint.clone();
61    url.set_path(&format!("skillsets/{}", skillset_name));
62    url.set_query(Some(&format!("api-version={}", API_VERSION)));
63
64    let skillset_definition = json!({
65        "name": skillset_name,
66        "description": "Skillset to chunk documents and generate embeddings",
67        "skills": [
68            {
69                "@odata.type": "#Microsoft.Skills.Text.SplitSkill",
70                "name": "#1",
71                "description": "Split skill to chunk documents",
72                "context": "/document",
73                "defaultLanguageCode": "en",
74                "textSplitMode": "pages",
75                "maximumPageLength": 2000,
76                "pageOverlapLength": 500,
77                "maximumPagesToTake": 0,
78                "inputs": [
79                    {
80                        "name": "text",
81                        "source": "/document/content"
82                    },
83                    {
84                        "name": "languageCode",
85                        "source": "/document/language"
86                    }
87                ],
88                "outputs": [
89                    {
90                        "name": "textItems",
91                        "targetName": "pages"
92                    }
93                ]
94            },
95            {
96                "@odata.type": "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill",
97                "name": "#2",
98                "description": null,
99                "context": "/document/pages/*",
100                "resourceUri": search_config.vectorizer_resource_uri.clone(),
101                "apiKey": search_config.vectorizer_api_key.expose_secret(),
102                "deploymentId": search_config.vectorizer_deployment_id.clone(),
103                "dimensions": 1536,
104                "modelName": search_config.vectorizer_model_name.clone(),
105                "inputs": [
106                    {
107                        "name": "text",
108                        "source": "/document/pages/*",
109                        "sourceContext": null,
110                        "inputs": []
111                    }
112                ],
113                "outputs": [
114                    {
115                        "name": "embedding",
116                        "targetName": "text_vector"
117                    }
118                ],
119                "authIdentity": null
120            }
121        ],
122        "cognitiveServices": null,
123        "knowledgeStore": null,
124        "indexProjections": {
125            "selectors": [
126                {
127                    "targetIndexName": target_index_name,
128                    "parentKeyFieldName": "parent_id",
129                    "sourceContext": "/document/pages/*",
130                    "mappings": [
131                        {
132                            "name": "text_vector",
133                            "source": "/document/pages/*/text_vector",
134                            "sourceContext": null,
135                            "inputs": []
136                        },
137                        {
138                            "name": "chunk",
139                            "source": "/document/pages/*",
140                            "sourceContext": null,
141                            "inputs": []
142                        },
143                        {
144                            "name": "title",
145                            "source": "/document/title",
146                            "sourceContext": null,
147                            "inputs": []
148                        },
149                        {
150                          "name": "url",
151                          "source": "/document/url",
152                          "sourceContext": null,
153                          "inputs": []
154                        },
155                        {
156                          "name": "course_id",
157                          "source": "/document/course_id",
158                          "sourceContext": null,
159                          "inputs": []
160                        },
161                        {
162                          "name": "language",
163                          "source": "/document/language",
164                          "sourceContext": null,
165                          "inputs": []
166                        },
167                        {
168                          "name": "filepath",
169                          "source": "/document/filepath",
170                          "sourceContext": null,
171                          "inputs": []
172                        },
173                        {
174                            "name": "chunk_context",
175                            "source": "/document/chunk_context",
176                            "sourceContext": null,
177                            "inputs": []
178                        },
179                    ]
180                }
181            ],
182            "parameters": {
183                "projectionMode": "skipIndexingParentDocuments"
184            }
185        },
186        "encryptionKey": null
187    });
188
189    let response = REQWEST_CLIENT
190        .put(url)
191        .header("Content-Type", "application/json")
192        .header("api-key", search_config.search_api_key.expose_secret())
193        .json(&skillset_definition)
194        .send()
195        .await?;
196
197    if response.status().is_success() {
198        Ok(())
199    } else {
200        let status = response.status();
201        let error_text = response.text().await?;
202        Err(anyhow::anyhow!(
203            "Error creating skillset. Status: {}. Error: {}",
204            status,
205            error_text
206        ))
207    }
208}