pub const MAX_CONTEXT_UTILIZATION: f32 = 0.75;
Maximum percentage of context window to use in a single request