refactor: do not automatically convert temperature value (#368)

2 months ago · 527da63d18
parent 4d52479545
commit 527da63d18
8 changed files with 30 additions and 21 deletions
--- a/README.md
+++ b/README.md
@ -79,7 +79,7 @@ Feel free to adjust the configuration according to your needs.

 ```yaml
 model: openai:gpt-3.5-turbo      # LLM model
-temperature: 1.0                 # GPT temperature, between 0 and 2
+temperature: 1.0                 # LLM temperature
 save: true                       # Whether to save the message
 highlight: true                  # Set false to turn highlight
 light_theme: false               # Whether to use a light theme
--- a/config.example.yaml
+++ b/config.example.yaml
@ -1,5 +1,5 @@
 model: openai:gpt-3.5-turbo      # LLM model
-temperature: 1.0                 # GPT temperature, between 0 and 2
+temperature: 1.0                 # LLM temperature
 save: true                       # Whether to save the message
 highlight: true                  # Set false to turn highlight
 light_theme: false               # Whether to use a light theme
--- a/src/client/claude.rs
+++ b/src/client/claude.rs
@ -204,7 +204,7 @@ fn build_body(data: SendData, model: String) -> Result<Value> {
    });

    if let Some(v) = temperature {
-        body["temperature"] = (v / 2.0).into();
+        body["temperature"] = v.into();
    }
    if stream {
        body["stream"] = true.into();
--- a/src/client/ernie.rs
+++ b/src/client/ernie.rs
@ -5,11 +5,12 @@ use crate::{render::ReplyHandler, utils::PromptKind};
 use anyhow::{anyhow, bail, Context, Result};
 use async_trait::async_trait;
 use futures_util::StreamExt;
+use lazy_static::lazy_static;
 use reqwest::{Client as ReqwestClient, RequestBuilder};
 use reqwest_eventsource::{Error as EventSourceError, Event, RequestBuilderExt};
 use serde::Deserialize;
 use serde_json::{json, Value};
-use std::env;
+use std::{env, sync::Mutex};

 const API_BASE: &str = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1";
 const ACCESS_TOKEN_URL: &str = "https://aip.baidubce.com/oauth/2.0/token";
@ -36,7 +37,9 @@ const MODELS: [(&str, usize, &str); 6] = [
    ("ernie-lite-8k", 7168, "/wenxinworkshop/chat/ernie-lite-8k"),
 ];

-static mut ACCESS_TOKEN: String = String::new(); // safe under linear operation
+lazy_static! {
+    static ref ACCESS_TOKEN: Mutex<Option<String>> = Mutex::new(None);
+}

 #[derive(Debug, Clone, Deserialize, Default)]
 pub struct ErnieConfig {
@ -78,9 +81,7 @@ impl ErnieClient {
        let client_name = Self::name(local_config);
        MODELS
            .into_iter()
-            .map(|(name, max_input_tokens, _)| {
-                Model::new(client_name, name).set_max_input_tokens(Some(max_input_tokens))
-            })
+            .map(|(name, _, _)| Model::new(client_name, name)) // ERNIE tokenizer is different from cl100k_base
            .collect()
    }

@ -93,9 +94,13 @@ impl ErnieClient {
            .find(|(v, _, _)| v == &model)
            .ok_or_else(|| anyhow!("Miss Model '{}'", self.model.id()))?;

-        let url = format!("{API_BASE}{chat_endpoint}?access_token={}", unsafe {
-            &ACCESS_TOKEN
-        });
+        let access_token = ACCESS_TOKEN
+            .lock()
+            .unwrap()
+            .clone()
+            .ok_or_else(|| anyhow!("Failed to load access token"))?;
+
+        let url = format!("{API_BASE}{chat_endpoint}?access_token={access_token}");

        debug!("Ernie Request: {url} {body}");

@ -105,8 +110,7 @@ impl ErnieClient {
    }

    async fn prepare_access_token(&self) -> Result<()> {
-        if unsafe { ACCESS_TOKEN.is_empty() } {
-            // Note: cannot use config_get_fn!
+        if ACCESS_TOKEN.lock().unwrap().is_none() {
            let env_prefix = Self::name(&self.config).to_uppercase();
            let api_key = self.config.api_key.clone();
            let api_key = api_key
@ -122,7 +126,7 @@ impl ErnieClient {
            let token = fetch_access_token(&client, &api_key, &secret_key)
                .await
                .with_context(|| "Failed to fetch access token")?;
-            unsafe { ACCESS_TOKEN = token };
+            *ACCESS_TOKEN.lock().unwrap() = Some(token);
        }
        Ok(())
    }
@ -189,7 +193,7 @@ fn check_error(data: &Value) -> Result<()> {
    if let Some(err_msg) = data["error_msg"].as_str() {
        if let Some(code) = data["error_code"].as_number().and_then(|v| v.as_u64()) {
            if code == 110 {
-                unsafe { ACCESS_TOKEN = String::new() }
+                *ACCESS_TOKEN.lock().unwrap() = None;
            }
            bail!("{err_msg}. err_code: {code}");
        } else {
@ -213,7 +217,7 @@ fn build_body(data: SendData, _model: String) -> Value {
    });

    if let Some(temperature) = temperature {
-        body["temperature"] = (temperature / 2.0).into();
+        body["temperature"] = temperature.into();
    }
    if stream {
        body["stream"] = true.into();
--- a/src/client/openai.rs
+++ b/src/client/openai.rs
@ -115,7 +115,7 @@ pub async fn openai_send_message_streaming(
                            bail!("{err_msg}");
                        } else {
                            bail!("Request failed, {data}");
-                        } 
+                        }
                    }
                    EventSourceError::StreamEnded => {}
                    _ => {
--- a/src/client/qianwen.rs
+++ b/src/client/qianwen.rs
@ -1,4 +1,6 @@
-use super::{message::*, Client, ExtraConfig, Model, PromptType, QianwenClient, SendData};
+use super::{
+    message::*, Client, ExtraConfig, Model, PromptType, QianwenClient, SendData, TokensCountFactors,
+};

 use crate::{
    render::ReplyHandler,
@ -35,6 +37,8 @@ const MODELS: [(&str, usize, &str); 6] = [
    ("qwen-vl-max", 0, "text,vision"),
 ];

+const TOKENS_COUNT_FACTORS: TokensCountFactors = (4, 14);
+
 #[derive(Debug, Clone, Deserialize, Default)]
 pub struct QianwenConfig {
    pub name: Option<String>,
@ -84,6 +88,7 @@ impl QianwenClient {
                Model::new(client_name, name)
                    .set_capabilities(capabilities.into())
                    .set_max_input_tokens(Some(max_input_tokens))
+                    .set_tokens_count_factors(TOKENS_COUNT_FACTORS)
            })
            .collect()
    }
@ -221,7 +226,7 @@ fn build_body(data: SendData, model: String, is_vl: bool) -> Result<(Value, bool

        let mut parameters = json!({});
        if let Some(v) = temperature {
-            parameters["top_k"] = ((v * 50.0).round() as usize).into();
+            parameters["temperature"] = v.into();
        }
        (input, parameters)
    } else {
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@ -46,7 +46,7 @@ pub struct Config {
    /// LLM model
    #[serde(rename(serialize = "model", deserialize = "model"))]
    pub model_id: Option<String>,
-    /// GPT temperature, between 0 and 2
+    /// LLM temperature
    #[serde(rename(serialize = "temperature", deserialize = "temperature"))]
    pub default_temperature: Option<f64>,
    /// Dry-run flag
--- a/src/config/role.rs
+++ b/src/config/role.rs
@ -16,7 +16,7 @@ pub struct Role {
    pub name: String,
    /// Prompt text
    pub prompt: String,
-    /// What sampling temperature to use, between 0 and 2
+    /// Temperature value
    pub temperature: Option<f64>,
 }