1use secrecy::ExposeSecret;
2use serde_json::json;
3
4use crate::prelude::*;
5
6const API_VERSION: &str = "2024-07-01";
7
8pub async fn does_skillset_exist(
9 skillset_name: &str,
10 app_config: &ApplicationConfiguration,
11) -> anyhow::Result<bool> {
12 let azure_config = app_config.azure_configuration.as_ref().ok_or_else(|| {
14 anyhow::anyhow!("Azure configuration is missing from the application configuration")
15 })?;
16
17 let search_config = azure_config.search_config.as_ref().ok_or_else(|| {
18 anyhow::anyhow!("Azure search configuration is missing from the Azure configuration")
19 })?;
20
21 let mut url = search_config.search_endpoint.clone();
22 url.set_path(&format!("skillsets('{}')", skillset_name));
23 url.set_query(Some(&format!("api-version={}", API_VERSION)));
24
25 let response = REQWEST_CLIENT
26 .get(url)
27 .header("Content-Type", "application/json")
28 .header("api-key", search_config.search_api_key.expose_secret())
29 .send()
30 .await?;
31
32 if response.status().is_success() {
33 Ok(true)
34 } else if response.status() == 404 {
35 Ok(false)
36 } else {
37 let status = response.status();
38 let error_text = response.text().await?;
39 Err(anyhow::anyhow!(
40 "Error checking if skillset exists. Status: {}. Error: {}",
41 status,
42 error_text
43 ))
44 }
45}
46
47pub async fn create_skillset(
48 skillset_name: &str,
49 target_index_name: &str,
50 app_config: &ApplicationConfiguration,
51) -> anyhow::Result<()> {
52 let azure_config = app_config.azure_configuration.as_ref().ok_or_else(|| {
53 anyhow::anyhow!("Azure configuration is missing from the application configuration")
54 })?;
55
56 let search_config = azure_config.search_config.as_ref().ok_or_else(|| {
57 anyhow::anyhow!("Azure search configuration is missing from the Azure configuration")
58 })?;
59
60 let mut url = search_config.search_endpoint.clone();
61 url.set_path(&format!("skillsets/{}", skillset_name));
62 url.set_query(Some(&format!("api-version={}", API_VERSION)));
63
64 let skillset_definition = json!({
65 "name": skillset_name,
66 "description": "Skillset to chunk documents and generate embeddings",
67 "skills": [
68 {
69 "@odata.type": "#Microsoft.Skills.Text.SplitSkill",
70 "name": "#1",
71 "description": "Split skill to chunk documents",
72 "context": "/document",
73 "defaultLanguageCode": "en",
74 "textSplitMode": "pages",
75 "maximumPageLength": 2000,
76 "pageOverlapLength": 500,
77 "maximumPagesToTake": 0,
78 "inputs": [
79 {
80 "name": "text",
81 "source": "/document/content"
82 },
83 {
84 "name": "languageCode",
85 "source": "/document/language"
86 }
87 ],
88 "outputs": [
89 {
90 "name": "textItems",
91 "targetName": "pages"
92 }
93 ]
94 },
95 {
96 "@odata.type": "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill",
97 "name": "#2",
98 "description": null,
99 "context": "/document/pages/*",
100 "resourceUri": search_config.vectorizer_resource_uri.clone(),
101 "apiKey": search_config.vectorizer_api_key.expose_secret(),
102 "deploymentId": search_config.vectorizer_deployment_id.clone(),
103 "dimensions": 1536,
104 "modelName": search_config.vectorizer_model_name.clone(),
105 "inputs": [
106 {
107 "name": "text",
108 "source": "/document/pages/*",
109 "sourceContext": null,
110 "inputs": []
111 }
112 ],
113 "outputs": [
114 {
115 "name": "embedding",
116 "targetName": "text_vector"
117 }
118 ],
119 "authIdentity": null
120 }
121 ],
122 "cognitiveServices": null,
123 "knowledgeStore": null,
124 "indexProjections": {
125 "selectors": [
126 {
127 "targetIndexName": target_index_name,
128 "parentKeyFieldName": "parent_id",
129 "sourceContext": "/document/pages/*",
130 "mappings": [
131 {
132 "name": "text_vector",
133 "source": "/document/pages/*/text_vector",
134 "sourceContext": null,
135 "inputs": []
136 },
137 {
138 "name": "chunk",
139 "source": "/document/pages/*",
140 "sourceContext": null,
141 "inputs": []
142 },
143 {
144 "name": "title",
145 "source": "/document/title",
146 "sourceContext": null,
147 "inputs": []
148 },
149 {
150 "name": "url",
151 "source": "/document/url",
152 "sourceContext": null,
153 "inputs": []
154 },
155 {
156 "name": "course_id",
157 "source": "/document/course_id",
158 "sourceContext": null,
159 "inputs": []
160 },
161 {
162 "name": "language",
163 "source": "/document/language",
164 "sourceContext": null,
165 "inputs": []
166 },
167 {
168 "name": "filepath",
169 "source": "/document/filepath",
170 "sourceContext": null,
171 "inputs": []
172 },
173 {
174 "name": "chunk_context",
175 "source": "/document/chunk_context",
176 "sourceContext": null,
177 "inputs": []
178 },
179 ]
180 }
181 ],
182 "parameters": {
183 "projectionMode": "skipIndexingParentDocuments"
184 }
185 },
186 "encryptionKey": null
187 });
188
189 let response = REQWEST_CLIENT
190 .put(url)
191 .header("Content-Type", "application/json")
192 .header("api-key", search_config.search_api_key.expose_secret())
193 .json(&skillset_definition)
194 .send()
195 .await?;
196
197 if response.status().is_success() {
198 Ok(())
199 } else {
200 let status = response.status();
201 let error_text = response.text().await?;
202 Err(anyhow::anyhow!(
203 "Error creating skillset. Status: {}. Error: {}",
204 status,
205 error_text
206 ))
207 }
208}