refactor: extract prelude models to models.yaml (#451)

pull/439/head^2
sigoden 2 months ago committed by GitHub
parent eac01fb129
commit d6df1e84a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,324 @@
- type: openai
# docs:
# - https://platform.openai.com/docs/models
# - https://openai.com/pricing
# - https://platform.openai.com/docs/api-reference/chat
models:
- name: gpt-3.5-turbo
max_input_tokens: 16385
max_output_tokens?: 4096
input_price: 0.5
output_price: 1.5
- name: gpt-3.5-turbo-1106
max_input_tokens: 16385
max_output_tokens?: 4096
input_price: 1
output_price: 2
- name: gpt-4-turbo
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 10
output_price: 30
supports_vision: true
- name: gpt-4-turbo-preview
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 10
output_price: 30
- name: gpt-4-1106-preview
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 10
output_price: 30
- name: gpt-4-vision-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
- name: gpt-4
max_input_tokens: 8192
max_output_tokens?: 4096
input_price: 30
output_price: 60
- name: gpt-4-32k
max_input_tokens: 32768
max_output_tokens?: 4096
input_price: 60
output_price: 120
- type: gemini
docs:
# - https://ai.google.dev/models/gemini
# - https://ai.google.dev/pricing
# - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent
models:
- name: gemini-1.0-pro-latest
max_input_tokens: 30720
max_output_tokens?: 2048
input_price: 0.5
output_price: 1.5
- name: gemini-1.0-pro-vision-latest
max_input_tokens: 12288
max_output_tokens?: 4096
input_price: 0.5
output_price: 1.5
supports_vision: true
- name: gemini-1.5-pro-latest
max_input_tokens: 1048576
max_output_tokens?: 8192
input_price: 7
output_price: 21
supports_vision: true
- type: claude
# docs:
# - https://docs.anthropic.com/claude/docs/models-overview
# - https://docs.anthropic.com/claude/reference/messages-streaming
models:
- name: claude-3-opus-20240229
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet-20240229
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku-20240307
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 0.25
output_price: 1.25
supports_vision: true
- type: mistral
# docs:
# - https://docs.mistral.ai/platform/endpoints/
# - https://mistral.ai/technology/#pricing
# - https://docs.mistral.ai/api/
models:
- name: open-mistral-7b
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 0.25
output_price: 0.25
- name: open-mixtral-8x7b
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 0.7
output_price: 0.7
- name: open-mixtral-8x22b
max_input_tokens: 64000
max_output_tokens?: 8191
input_price: 2
output_price: 6
- name: mistral-small-latest
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 2
output_price: 6
- name: mistral-medium-latest
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 2.7
output_price: 8.1
- name: mistral-large-latest
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 8
output_price: 24
- type: cohere
# docs:
# - https://docs.cohere.com/docs/command-r
# - https://docs.cohere.com/docs/command-r-plus
# - https://cohere.com/pricing
# - https://docs.cohere.com/reference/chat
models:
- name: command-r
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 0.5
output_price: 1.5
- name: command-r-plus
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 3
output_price: 15
- type: perplexity
# docs:
# - https://docs.perplexity.ai/docs/model-cards
# - https://docs.perplexity.ai/docs/pricing
# - https://docs.perplexity.ai/reference/post_chat_completions
models:
- name: sonar-small-chat
max_input_tokens: 16384
- name: sonar-small-online
max_input_tokens: 12000
- name: sonar-medium-chat
max_input_tokens: 16384
- name: sonar-medium-online
max_input_tokens: 12000
- name: llama-3-8b-instruct
max_input_tokens: 8192
max_output_tokens?: 8192
input_price: 0.2
output_price: 0.2
- name: llama-3-70b-instruct
max_input_tokens: 8192
max_output_tokens?: 8192
input_price: 1
output_price: 1
- name: codellama-70b-instruct
max_input_tokens: 16384
max_output_tokens?: 4096
input_price: 1
output_price: 1
- name: mistral-7b-instruct
max_input_tokens: 16384
max_output_tokens?: 8191
input_price: 0.2
output_price: 0.2
- name: mixtral-8x7b-instruct
max_input_tokens: 16384
max_output_tokens?: 8191
input_price: 0.6
output_price: 0.6
- name: mixtral-8x22b-instruct
max_input_tokens: 16384
max_output_tokens?: 8191
input_price: 1
output_price: 1
- type: groq
# docs:
# - https://console.groq.com/docs/models
# - https://console.groq.com/docs/text-chat
models:
- name: llama3-8b-8192
max_input_tokens: 8192
max_output_tokens: 8192
- name: llama3-70b-8192
max_input_tokens: 8192
max_output_tokens: 8192
- name: llama2-70b-4096
max_input_tokens: 4096
max_output_tokens: 4096
- name: mixtral-8x7b-32768
max_input_tokens: 32768
max_output_tokens: 32768
- name: gemma-7b-it
max_input_tokens: 8192
max_output_tokens: 8192
- type: vertexai
# docs:
# - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
# - https://cloud.google.com/vertex-ai/generative-ai/pricing
# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini
models:
- name: gemini-1.0-pro
max_input_tokens: 24568
max_output_tokens: 24568
input_price: 0.125
output_price: 0.375
- name: gemini-1.0-pro-vision
max_input_tokens: 14336
max_output_tokens: 14336
input_price: 0.125
output_price: 0.375
supports_vision: true
- name: gemini-1.5-pro-preview-0409
max_input_tokens: 1000000
max_output_tokens: 1000000
input_price: 2.5
output_price: 7.5
supports_vision: true
- type: ernie
# docs:
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
models:
- name: ernie-4.0-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
input_price: 16.8
output_price: 16.8
- name: ernie-3.5-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
input_price: 1.68
output_price: 1.68
- name: ernie-speed-128k
max_input_tokens: 124000
max_output_tokens: 4096
input_price: 0.56
output_price: 1.12
- name: ernie-lite-8k
max_input_tokens: 7168
max_output_tokens: 2048
input_price: 0.42
output_price: 0.84
- name: ernie-tiny-8k
max_input_tokens: 7168
max_output_tokens: 2048
input_price: 0.14
output_price: 0.14
- type: qianwen
# docs:
# - https://help.aliyun.com/zh/dashscope/developer-reference/tongyiqianwen-large-language-models/
# - https://help.aliyun.com/zh/dashscope/developer-reference/qwen-vl-plus/
models:
- name: qwen-turbo
max_input_tokens: 6000
max_output_tokens?: 1500
input_price: 1.12
output_price: 1.12
- name: qwen-plus
max_input_tokens: 30000
max_output_tokens?: 2000
input_price: 2.8
output_price: 2.8
- name: qwen-max
max_input_tokens: 6000
max_output_tokens?: 2000
input_price: 16.8
output_price: 16.8
- name: qwen-max-longcontext
max_input_tokens: 28000
max_output_tokens?: 2000
- name: qwen-vl-plus
input_price: 1.12
output_price: 1.12
supports_vision: true
- name: qwen-vl-max
input_price: 2.8
output_price: 2.8
supports_vision: true
- type: moonshot
# docs:
# - https://platform.moonshot.cn/docs/intro
# - https://platform.moonshot.cn/docs/pricing
# - https://platform.moonshot.cn/docs/api-reference
models:
- name: moonshot-v1-8k
max_input_tokens: 8000
input_price: 1.68
output_price: 1.68
- name: moonshot-v1-32k
max_input_tokens: 32000
input_price: 3.36
output_price: 3.36
- name: moonshot-v1-128k
max_input_tokens: 128000
input_price: 8.4
output_price: 8.4

@ -17,7 +17,6 @@ pub struct AzureOpenAIConfig {
}
impl AzureOpenAIClient {
list_models_fn!(AzureOpenAIConfig);
config_get_fn!(api_base, get_api_base);
config_get_fn!(api_key, get_api_key);

@ -24,16 +24,6 @@ pub struct ClaudeConfig {
}
impl ClaudeClient {
list_models_fn!(
ClaudeConfig,
[
// https://docs.anthropic.com/claude/docs/models-overview
("claude-3-opus-20240229", "text,vision", 200000, 4096),
("claude-3-sonnet-20240229", "text,vision", 200000, 4096),
("claude-3-haiku-20240307", "text,vision", 200000, 4096),
]
);
config_get_fn!(api_key, get_api_key);
pub const PROMPTS: [PromptType<'static>; 1] =

@ -22,14 +22,6 @@ pub struct CohereConfig {
}
impl CohereClient {
list_models_fn!(
CohereConfig,
[
// https://docs.cohere.com/docs/command-r
("command-r", "text", 128000),
("command-r-plus", "text", 128000),
]
);
config_get_fn!(api_key, get_api_key);
pub const PROMPTS: [PromptType<'static>; 1] =

@ -1,4 +1,4 @@
use super::{openai::OpenAIConfig, ClientConfig, Message, Model, ReplyHandler};
use super::{openai::OpenAIConfig, ClientConfig, ClientModel, Message, Model, ReplyHandler};
use crate::{
config::{GlobalConfig, Input},
@ -9,12 +9,19 @@ use crate::{
use anyhow::{bail, Context, Result};
use async_trait::async_trait;
use futures_util::{Stream, StreamExt};
use lazy_static::lazy_static;
use reqwest::{Client as ReqwestClient, ClientBuilder, Proxy, RequestBuilder};
use serde::Deserialize;
use serde_json::{json, Value};
use std::{env, future::Future, time::Duration};
use tokio::{sync::mpsc::unbounded_channel, time::sleep};
const MODELS_YAML: &str = include_str!("../../models.yaml");
lazy_static! {
pub static ref CLIENT_MODELS: Vec<ClientModel> = serde_yaml::from_str(MODELS_YAML).unwrap();
}
#[macro_export]
macro_rules! register_client {
(
@ -38,6 +45,17 @@ macro_rules! register_client {
Unknown,
}
#[derive(Debug, Clone, serde::Deserialize)]
#[serde(tag = "type")]
pub enum ClientModel {
$(
#[serde(rename = $name)]
$config { models: Vec<ModelConfig> },
)+
#[serde(other)]
Unknown,
}
$(
#[derive(Debug)]
@ -68,6 +86,23 @@ macro_rules! register_client {
}))
}
pub fn list_models(local_config: &$config) -> Vec<Model> {
let client_name = Self::name(local_config);
if local_config.models.is_empty() {
for model in $crate::client::CLIENT_MODELS.iter() {
match model {
$crate::client::ClientModel::$config { models } => {
return Model::from_config(client_name, models);
}
_ => {}
}
}
vec![]
} else {
Model::from_config(client_name, &local_config.models)
}
}
pub fn name(config: &$config) -> &str {
config.name.as_deref().unwrap_or(Self::NAME)
}
@ -131,10 +166,9 @@ macro_rules! openai_compatible_client {
$config:ident,
$client:ident,
$api_base:literal,
[$(($name:literal, $capabilities:literal, $max_input_tokens:literal $(, $max_output_tokens:literal)? )),+$(,)?]
) => {
use $crate::client::openai::openai_build_body;
use $crate::client::{ExtraConfig, $client, Model, ModelConfig, PromptType, SendData};
use $crate::client::{$client, ExtraConfig, Model, ModelConfig, PromptType, SendData};
use $crate::utils::PromptKind;
@ -159,22 +193,17 @@ macro_rules! openai_compatible_client {
$crate::client::openai::openai_send_message_streaming
);
impl $client {
list_models_fn!(
$config,
[
$(
($name, $capabilities, $max_input_tokens $(, $max_output_tokens)?),
)+
]
);
config_get_fn!(api_key, get_api_key);
pub const PROMPTS: [PromptType<'static>; 1] =
[("api_key", "API Key:", false, PromptKind::String)];
fn request_builder(&self, client: &ReqwestClient, data: SendData) -> Result<RequestBuilder> {
fn request_builder(
&self,
client: &ReqwestClient,
data: SendData,
) -> Result<RequestBuilder> {
let api_key = self.get_api_key().ok();
let body = openai_build_body(data, &self.model);
@ -191,8 +220,7 @@ macro_rules! openai_compatible_client {
Ok(builder)
}
}
}
};
}
#[macro_export]
@ -269,33 +297,6 @@ macro_rules! config_get_fn {
};
}
#[macro_export]
macro_rules! list_models_fn {
($config:ident) => {
pub fn list_models(local_config: &$config) -> Vec<Model> {
let client_name = Self::name(local_config);
Model::from_config(client_name, &local_config.models)
}
};
($config:ident, [$(($name:literal, $capabilities:literal, $max_input_tokens:literal $(, $max_output_tokens:literal)? )),+$(,)?]) => {
pub fn list_models(local_config: &$config) -> Vec<Model> {
let client_name = Self::name(local_config);
if local_config.models.is_empty() {
vec![
$(
Model::new(client_name, $name)
.set_capabilities($capabilities.into())
.set_max_input_tokens(Some($max_input_tokens))
$(.set_max_output_tokens(Some($max_output_tokens)))?
),+
]
} else {
Model::from_config(client_name, &local_config.models)
}
}
};
}
#[macro_export]
macro_rules! unsupported_model {
($name:expr) => {

@ -31,19 +31,6 @@ pub struct ErnieConfig {
}
impl ErnieClient {
list_models_fn!(
ErnieConfig,
[
// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/clntwmv7t
("ernie-4.0-8k", "text", 5120, 2048),
("ernie-3.5-8k", "text", 5120, 2048),
("ernie-3.5-4k", "text", 2048, 2048),
("ernie-speed-8k", "text", 7168, 2048),
("ernie-speed-128k", "text", 124000, 4096),
("ernie-lite-8k", "text", 7168, 2048),
("ernie-tiny-8k", "text", 7168, 2048),
]
);
pub const PROMPTS: [PromptType<'static>; 2] = [
("api_key", "API Key:", true, PromptKind::String),
@ -53,16 +40,9 @@ impl ErnieClient {
fn request_builder(&self, client: &ReqwestClient, data: SendData) -> Result<RequestBuilder> {
let body = build_body(data, &self.model);
let endpoint = match self.model.name.as_str() {
"ernie-4.0-8k" => "completions_pro",
"ernie-3.5-8k" => "ernie-3.5-8k-0205",
"ernie-3.5-4k" => "ernie-3.5-4k-0205",
"ernie-speed-8k" => "ernie_speed",
_ => &self.model.name,
};
let url = format!(
"{API_BASE}/wenxinworkshop/chat/{endpoint}?access_token={}",
"{API_BASE}/wenxinworkshop/chat/{}?access_token={}",
&self.model.name,
unsafe { &ACCESS_TOKEN.0 }
);

@ -20,15 +20,6 @@ pub struct GeminiConfig {
}
impl GeminiClient {
list_models_fn!(
GeminiConfig,
[
// https://ai.google.dev/models/gemini
("gemini-1.0-pro-latest", "text", 30720),
("gemini-1.0-pro-vision-latest", "text,vision", 12288),
("gemini-1.5-pro-latest", "text,vision", 1048576),
]
);
config_get_fn!(api_key, get_api_key);
pub const PROMPTS: [PromptType<'static>; 1] =

@ -2,12 +2,4 @@ openai_compatible_client!(
GroqConfig,
GroqClient,
"https://api.groq.com/openai/v1",
[
// https://console.groq.com/docs/models
("llama3-8b-8192", "text", 8192),
("llama3-70b-8192", "text", 8192),
("llama2-70b-4096", "text", 4096),
("mixtral-8x7b-32768", "text", 32768),
("gemma-7b-it", "text", 8192),
]
);

@ -2,13 +2,4 @@ openai_compatible_client!(
MistralConfig,
MistralClient,
"https://api.mistral.ai/v1",
[
// https://docs.mistral.ai/platform/endpoints/
("open-mistral-7b", "text", 32000),
("open-mixtral-8x7b", "text", 32000),
("open-mixtral-8x22b", "text", 64000),
("mistral-small-latest", "text", 32000),
("mistral-medium-latest", "text", 32000),
("mistral-large-latest", "text", 32000),
]
);

@ -11,26 +11,26 @@ pub use reply_handler::*;
register_client!(
(openai, "openai", OpenAIConfig, OpenAIClient),
(
azure_openai,
"azure-openai",
AzureOpenAIConfig,
AzureOpenAIClient
),
(
openai_compatible,
"openai-compatible",
OpenAICompatibleConfig,
OpenAICompatibleClient
),
(gemini, "gemini", GeminiConfig, GeminiClient),
(vertexai, "vertexai", VertexAIConfig, VertexAIClient),
(claude, "claude", ClaudeConfig, ClaudeClient),
(mistral, "mistral", MistralConfig, MistralClient),
(cohere, "cohere", CohereConfig, CohereClient),
(perplexity, "perplexity", PerplexityConfig, PerplexityClient),
(groq, "groq", GroqConfig, GroqClient),
(
openai_compatible,
"openai-compatible",
OpenAICompatibleConfig,
OpenAICompatibleClient
),
(ollama, "ollama", OllamaConfig, OllamaClient),
(
azure_openai,
"azure-openai",
AzureOpenAIConfig,
AzureOpenAIClient
),
(vertexai, "vertexai", VertexAIConfig, VertexAIClient),
(ernie, "ernie", ErnieConfig, ErnieClient),
(qianwen, "qianwen", QianwenConfig, QianwenClient),
(moonshot, "moonshot", MoonshotConfig, MoonshotClient),

@ -3,7 +3,7 @@ use super::message::{Message, MessageContent};
use crate::utils::count_tokens;
use anyhow::{bail, Result};
use serde::{Deserialize, Deserializer};
use serde::Deserialize;
const PER_MESSAGES_TOKENS: usize = 5;
const BASIS_TOKENS: usize = 2;
@ -41,10 +41,10 @@ impl Model {
.iter()
.map(|v| {
Model::new(client_name, &v.name)
.set_capabilities(v.capabilities)
.set_max_input_tokens(v.max_input_tokens)
.set_max_output_tokens(v.max_output_tokens)
.set_extra_fields(v.extra_fields.clone())
.set_supports_vision(v.supports_vision)
.set_extra_fields(&v.extra_fields)
})
.collect()
}
@ -84,19 +84,6 @@ impl Model {
format!("{}:{}", self.client_name, self.name)
}
pub fn set_capabilities(mut self, capabilities: ModelCapabilities) -> Self {
self.capabilities = capabilities;
self
}
pub fn set_extra_fields(
mut self,
extra_fields: Option<serde_json::Map<String, serde_json::Value>>,
) -> Self {
self.extra_fields = extra_fields;
self
}
pub fn set_max_input_tokens(mut self, max_input_tokens: Option<usize>) -> Self {
match max_input_tokens {
None | Some(0) => self.max_input_tokens = None,
@ -113,6 +100,23 @@ impl Model {
self
}
pub fn set_supports_vision(mut self, supports_vision: bool) -> Self {
if supports_vision {
self.capabilities |= ModelCapabilities::Vision;
} else {
self.capabilities &= !ModelCapabilities::Vision;
}
self
}
pub fn set_extra_fields(
mut self,
extra_fields: &Option<serde_json::Map<String, serde_json::Value>>,
) -> Self {
self.extra_fields = extra_fields.clone();
self
}
pub fn messages_tokens(&self, messages: &[Message]) -> usize {
messages
.iter()
@ -174,10 +178,11 @@ pub struct ModelConfig {
pub name: String,
pub max_input_tokens: Option<usize>,
pub max_output_tokens: Option<isize>,
pub input_price: Option<f64>,
pub output_price: Option<f64>,
#[serde(default)]
pub supports_vision: bool,
pub extra_fields: Option<serde_json::Map<String, serde_json::Value>>,
#[serde(deserialize_with = "deserialize_capabilities")]
#[serde(default = "default_capabilities")]
pub capabilities: ModelCapabilities,
}
bitflags::bitflags! {
@ -187,29 +192,3 @@ bitflags::bitflags! {
const Vision = 0b00000010;
}
}
impl From<&str> for ModelCapabilities {
fn from(value: &str) -> Self {
let value = if value.is_empty() { "text" } else { value };
let mut output = ModelCapabilities::empty();
if value.contains("text") {
output |= ModelCapabilities::Text;
}
if value.contains("vision") {
output |= ModelCapabilities::Vision;
}
output
}
}
fn deserialize_capabilities<'de, D>(deserializer: D) -> Result<ModelCapabilities, D::Error>
where
D: Deserializer<'de>,
{
let value: String = Deserialize::deserialize(deserializer)?;
Ok(value.as_str().into())
}
fn default_capabilities() -> ModelCapabilities {
ModelCapabilities::Text
}

@ -2,10 +2,4 @@ openai_compatible_client!(
MoonshotConfig,
MoonshotClient,
"https://api.moonshot.cn/v1",
[
// https://platform.moonshot.cn/docs/intro#%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8
("moonshot-v1-8k", "text", 8000),
("moonshot-v1-32k", "text", 32000),
("moonshot-v1-128k", "text", 128000),
]
);

@ -22,7 +22,6 @@ pub struct OllamaConfig {
}
impl OllamaClient {
list_models_fn!(OllamaConfig);
config_get_fn!(api_key, get_api_key);
pub const PROMPTS: [PromptType<'static>; 4] = [

@ -25,20 +25,6 @@ pub struct OpenAIConfig {
}
impl OpenAIClient {
list_models_fn!(
OpenAIConfig,
[
// https://platform.openai.com/docs/models
("gpt-3.5-turbo", "text", 16385),
("gpt-3.5-turbo-1106", "text", 16385),
("gpt-4-turbo", "text,vision", 128000),
("gpt-4-turbo-preview", "text", 128000),
("gpt-4-1106-preview", "text", 128000),
("gpt-4-vision-preview", "text,vision", 128000, 4096),
("gpt-4", "text", 8192),
("gpt-4-32k", "text", 32768),
]
);
config_get_fn!(api_key, get_api_key);
config_get_fn!(api_base, get_api_base);

@ -18,7 +18,6 @@ pub struct OpenAICompatibleConfig {
}
impl OpenAICompatibleClient {
list_models_fn!(OpenAICompatibleConfig);
config_get_fn!(api_key, get_api_key);
pub const PROMPTS: [PromptType<'static>; 5] = [

@ -2,18 +2,4 @@ openai_compatible_client!(
PerplexityConfig,
PerplexityClient,
"https://api.perplexity.ai",
[
// https://docs.perplexity.ai/docs/model-cards
("sonar-small-chat", "text", 16384),
("sonar-small-online", "text", 12000),
("sonar-medium-chat", "text", 16384),
("sonar-medium-online", "text", 12000),
("llama-3-8b-instruct", "text", 8192),
("llama-3-70b-instruct", "text", 8192),
("codellama-70b-instruct", "text", 16384),
("mistral-7b-instruct", "text", 16384),
("mixtral-8x7b-instruct", "text", 16384),
("mixtral-8x22b-instruct", "text", 16384),
]
);

@ -34,19 +34,6 @@ pub struct QianwenConfig {
}
impl QianwenClient {
list_models_fn!(
QianwenConfig,
[
// https://help.aliyun.com/zh/dashscope/developer-reference/api-details
("qwen-turbo", "text", 6000),
("qwen-plus", "text", 30000),
("qwen-max", "text", 6000),
("qwen-max-longcontext", "text", 28000),
// https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-qianwen-vl-plus-api
("qwen-vl-plus", "text,vision", 0),
("qwen-vl-max", "text,vision", 0),
]
);
config_get_fn!(api_key, get_api_key);
pub const PROMPTS: [PromptType<'static>; 1] =

@ -27,15 +27,6 @@ pub struct VertexAIConfig {
}
impl VertexAIClient {
list_models_fn!(
VertexAIConfig,
[
// https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
("gemini-1.0-pro", "text", 24568),
("gemini-1.0-pro-vision", "text,vision", 14336),
("gemini-1.5-pro-preview-0409", "text,vision", 1000000),
]
);
config_get_fn!(api_base, get_api_base);
pub const PROMPTS: [PromptType<'static>; 1] =

Loading…
Cancel
Save