Compare commits

...

2 Commits

@ -482,8 +482,8 @@
const QUERY = parseQueryString(location.search);
const NUM = parseInt(QUERY.num) || 2
const API_BASE = QUERY.api_base || ".";
const DATAJSON_API = API_BASE + "/data.json";
const CHAT_COMPLETIONS_URL = API_BASE + "/v1/chat/completions";
const MODELS_API = API_BASE + "/v1/models";
const MODEL_IDS_STORAGE_KEY = "__model_ids__";
document.addEventListener("alpine:init", () => {
@ -512,7 +512,7 @@
async init() {
try {
const { models } = await fetchDataJSON(DATAJSON_API);
const models = await fetchJSON(MODELS_API);
this.models = models;
} catch (err) {
console.error(err);
@ -746,8 +746,8 @@
messages: messages,
stream: true,
};
const { max_output_token, need_max_tokens } = retrieveModel(this.models, chat.model_id);
if (!body["max_tokens"] && need_max_tokens) {
const { max_output_token, pass_max_tokens } = retrieveModel(this.models, chat.model_id);
if (!body["max_tokens"] && pass_max_tokens) {
body["max_tokens"] = max_output_token;
};
return body;
@ -755,10 +755,10 @@
}));
}
async function fetchDataJSON(url) {
async function fetchJSON(url) {
const res = await fetch(url);
const data = await res.json()
return data;
return data.data;
}
async function* fetchChatCompletions(url, body, signal) {
@ -819,14 +819,14 @@
function retrieveModel(models, id) {
const model = models.find(model => model.id === id);
if (!model) return {};
const max_output_token = model.max_output_tokens || model["max_output_tokens?"] || null;
const need_max_tokens = !!model.max_output_tokens;
const max_output_token = model.max_output_tokens;
const supports_vision = !!model.supports_vision;
const pass_max_tokens = !!model.pass_max_tokens;
return {
id,
max_output_token,
need_max_tokens,
supports_vision,
pass_max_tokens,
}
}

@ -641,8 +641,9 @@
<script>
const QUERY = parseQueryString(location.search);
const API_BASE = QUERY.api_base || ".";
const DATAJSON_API = API_BASE + "/data.json";
const CHAT_COMPLETIONS_URL = API_BASE + "/v1/chat/completions";
const MODELS_API = API_BASE + "/v1/models";
const ROLES_API = API_BASE + "/v1/roles";
const SETTINGS_STORAGE_KEY = "__settings__";
document.addEventListener("alpine:init", () => {
@ -691,7 +692,7 @@
async init() {
try {
const { models, roles } = await fetchDataJSON(DATAJSON_API);
const [models, roles] = await Promise.all([MODELS_API, ROLES_API].map(url => fetchJSON(url)));
this.models = models;
this.roles.push(...roles);
} catch (err) {
@ -939,8 +940,8 @@
body[body_key || setting_key] = this.settings[setting_key];
}
});
const { max_output_token, need_max_tokens } = this.currentModel;
if (!body["max_tokens"] && need_max_tokens) {
const { max_output_token, pass_max_tokens } = this.currentModel;
if (!body["max_tokens"] && pass_max_tokens) {
body["max_tokens"] = max_output_token;
};
return body;
@ -949,10 +950,10 @@
}
async function fetchDataJSON(url) {
async function fetchJSON(url) {
const res = await fetch(url);
const data = await res.json()
return data;
return data.data;
}
async function* fetchChatCompletions(url, body, signal) {
@ -1013,14 +1014,14 @@
function retrieveModel(models, id) {
const model = models.find(model => model.id === id);
if (!model) return {};
const max_output_token = model.max_output_tokens || model["max_output_tokens?"] || null;
const need_max_tokens = !!model.max_output_tokens;
const max_output_token = model.max_output_tokens;
const supports_vision = !!model.supports_vision;
const pass_max_tokens = !!model.pass_max_tokens;
return {
id,
max_output_token,
need_max_tokens,
supports_vision,
pass_max_tokens,
}
}

@ -33,7 +33,6 @@ clients:
# models:
# - name: xxxx # The model name
# max_input_tokens: 100000
# max_output_tokens: 4096
# supports_vision: true
# extra_fields: # Set custom parameters, will merge with the body json
# key: value

@ -8,44 +8,45 @@
models:
- name: gpt-3.5-turbo
max_input_tokens: 16385
max_output_tokens?: 4096
max_output_tokens: 4096
input_price: 0.5
output_price: 1.5
- name: gpt-3.5-turbo-1106
max_input_tokens: 16385
max_output_tokens?: 4096
max_output_tokens: 4096
input_price: 1
output_price: 2
- name: gpt-4-turbo
max_input_tokens: 128000
max_output_tokens?: 4096
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
- name: gpt-4-turbo-preview
max_input_tokens: 128000
max_output_tokens?: 4096
max_output_tokens: 4096
input_price: 10
output_price: 30
- name: gpt-4-1106-preview
max_input_tokens: 128000
max_output_tokens?: 4096
max_output_tokens: 4096
input_price: 10
output_price: 30
- name: gpt-4-vision-preview
max_input_tokens: 128000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 10
output_price: 30
supports_vision: true
- name: gpt-4
max_input_tokens: 8192
max_output_tokens?: 4096
max_output_tokens: 4096
input_price: 30
output_price: 60
- name: gpt-4-32k
max_input_tokens: 32768
max_output_tokens?: 4096
max_output_tokens: 4096
input_price: 60
output_price: 120
@ -59,18 +60,18 @@
models:
- name: gemini-1.0-pro-latest
max_input_tokens: 30720
max_output_tokens?: 2048
max_output_tokens: 2048
input_price: 0.5
output_price: 1.5
- name: gemini-1.0-pro-vision-latest
max_input_tokens: 12288
max_output_tokens?: 4096
max_output_tokens: 4096
input_price: 0.5
output_price: 1.5
supports_vision: true
- name: gemini-1.5-pro-latest
max_input_tokens: 1048576
max_output_tokens?: 8192
max_output_tokens: 8192
input_price: 7
output_price: 21
supports_vision: true
@ -85,18 +86,21 @@
- name: claude-3-opus-20240229
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet-20240229
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku-20240307
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
@ -140,12 +144,12 @@
models:
- name: command-r
max_input_tokens: 128000
max_output_tokens?: 4000
max_output_tokens: 4000
input_price: 0.5
output_price: 1.5
- name: command-r-plus
max_input_tokens: 128000
max_output_tokens?: 4000
max_output_tokens: 4000
input_price: 3
output_price: 15
@ -159,28 +163,28 @@
models:
- name: llama-3-sonar-small-32k-chat
max_input_tokens: 32768
max_output_tokens?: 32768
max_output_tokens: 32768
input_price: 0.2
output_price: 0.2
- name: llama-3-sonar-large-32k-chat
max_input_tokens: 32768
max_output_tokens?: 32768
max_output_tokens: 32768
input_price: 0.6
output_price: 0.6
- name: llama-3-8b-instruct
max_input_tokens: 8192
max_output_tokens?: 8192
max_output_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: llama-3-70b-instruct
max_input_tokens: 8192
max_output_tokens?: 8192
max_output_tokens: 8192
input_price: 1
output_price: 1
- name: mixtral-8x7b-instruct
max_input_tokens: 16384
max_output_tokens?: 16384
max_output_tokens: 16384
input_price: 0.6
output_price: 0.6
@ -195,22 +199,22 @@
models:
- name: llama3-8b-8192
max_input_tokens: 8192
max_output_tokens?: 8192
max_output_tokens: 8192
input_price: 0.05
output_price: 0.10
- name: llama3-70b-8192
max_input_tokens: 8192
max_output_tokens?: 8192
max_output_tokens: 8192
input_price: 0.59
output_price: 0.79
- name: mixtral-8x7b-32768
max_input_tokens: 32768
max_output_tokens?: 32768
max_output_tokens: 32768
input_price: 0.27
output_price: 0.27
- name: gemma-7b-it
max_input_tokens: 8192
max_output_tokens?: 8192
max_output_tokens: 8192
input_price: 0.10
output_price: 0.10
@ -224,18 +228,18 @@
models:
- name: gemini-1.0-pro
max_input_tokens: 24568
max_output_tokens?: 8192
max_output_tokens: 8192
input_price: 0.125
output_price: 0.375
- name: gemini-1.0-pro-vision
max_input_tokens: 14336
max_output_tokens?: 2048
max_output_tokens: 2048
input_price: 0.125
output_price: 0.375
supports_vision: true
- name: gemini-1.5-pro-preview-0409
max_input_tokens: 1000000
max_output_tokens?: 8192
max_output_tokens: 8192
input_price: 2.5
output_price: 7.5
supports_vision: true
@ -250,18 +254,21 @@
- name: claude-3-opus@20240229
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet@20240229
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku@20240307
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
@ -277,44 +284,52 @@
- name: anthropic.claude-3-opus-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: anthropic.claude-3-sonnet-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: anthropic.claude-3-haiku-20240307-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
- name: meta.llama3-8b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.4
output_price: 0.6
- name: meta.llama3-70b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
pass_max_tokens: true
input_price: 2.65
output_price: 3.5
- name: mistral.mistral-7b-instruct-v0:2
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 0.15
output_price: 0.2
- name: mistral.mixtral-8x7b-instruct-v0:1
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 0.45
output_price: 0.7
- name: mistral.mistral-large-2402-v1:0
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 8
output_price: 2.4
@ -328,21 +343,27 @@
- name: '@cf/meta/llama-3-8b-instruct'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@cf/mistral/mistral-7b-instruct-v0.2-lora'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@cf/google/gemma-7b-it-lora'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@cf/qwen/qwen1.5-14b-chat-awq'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@hf/thebloke/deepseek-coder-6.7b-instruct-awq'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@hf/nexusflow/starling-lm-7b-beta'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- platform: replicate
# docs:
@ -354,21 +375,25 @@
- name: meta/meta-llama-3-70b-instruct
max_input_tokens: 8192
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.65
output_price: 2.75
- name: meta/meta-llama-3-8b-instruct
max_input_tokens: 8192
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.05
output_price: 0.25
- name: mistralai/mistral-7b-instruct-v0.2
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 0.05
output_price: 0.25
- name: mistralai/mixtral-8x7b-instruct-v0.1
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 0.3
output_price: 1
@ -382,26 +407,31 @@
- name: ernie-4.0-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
pass_max_tokens: true
input_price: 16.8
output_price: 16.8
- name: ernie-3.5-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
pass_max_tokens: true
input_price: 1.68
output_price: 1.68
- name: ernie-speed-128k
max_input_tokens: 124000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.56
output_price: 1.12
- name: ernie-lite-8k
max_input_tokens: 7168
max_output_tokens: 2048
pass_max_tokens: true
input_price: 0.42
output_price: 0.84
- name: ernie-tiny-8k
max_input_tokens: 7168
max_output_tokens: 2048
pass_max_tokens: true
input_price: 0.14
output_price: 0.14
@ -414,22 +444,22 @@
models:
- name: qwen-turbo
max_input_tokens: 6000
max_output_tokens?: 1500
max_output_tokens: 1500
input_price: 1.12
output_price: 1.12
- name: qwen-plus
max_input_tokens: 30000
max_output_tokens?: 2000
max_output_tokens: 2000
input_price: 2.8
output_price: 2.8
- name: qwen-max
max_input_tokens: 6000
max_output_tokens?: 2000
max_output_tokens: 2000
input_price: 16.8
output_price: 16.8
- name: qwen-max-longcontext
max_input_tokens: 28000
max_output_tokens?: 2000
max_output_tokens: 2000
- name: qwen-vl-plus
input_price: 1.12
output_price: 1.12
@ -686,16 +716,22 @@
supports_vision: true
- name: anthropic/claude-3-opus
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: anthropic/claude-3-sonnet
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: anthropic/claude-3-haiku
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true

@ -172,7 +172,7 @@ async fn send_message_streaming(
let data: Value = decode_chunk(message.payload()).ok_or_else(|| {
anyhow!("Invalid chunk data: {}", hex_encode(message.payload()))
})?;
debug!("bedrock chunk: {data}");
// debug!("bedrock chunk: {data}");
match model_category {
ModelCategory::Anthropic => {
if let Some(typ) = data["type"].as_str() {
@ -235,7 +235,7 @@ fn meta_llama_build_body(data: SendData, model: &Model, pt: PromptFormat) -> Res
let prompt = generate_prompt(&messages, pt)?;
let mut body = json!({ "prompt": prompt });
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["max_gen_len"] = v.into();
}
if let Some(v) = temperature {
@ -258,7 +258,7 @@ fn mistral_build_body(data: SendData, model: &Model) -> Result<Value> {
let prompt = generate_prompt(&messages, MISTRAL_PROMPT_FORMAT)?;
let mut body = json!({ "prompt": prompt });
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {

@ -142,7 +142,7 @@ pub fn claude_build_body(data: SendData, model: &Model) -> Result<Value> {
if let Some(v) = system_message {
body["system"] = v.into();
}
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {

@ -88,7 +88,7 @@ fn build_body(data: SendData, model: &Model) -> Result<Value> {
"messages": messages,
});
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {

@ -135,7 +135,7 @@ fn build_body(data: SendData, model: &Model) -> Result<Value> {
body["chat_history"] = messages.into();
}
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {

@ -128,7 +128,7 @@ fn build_body(data: SendData, model: &Model) -> Value {
"messages": messages,
});
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["max_output_tokens"] = v.into();
}
if let Some(v) = temperature {

@ -14,11 +14,11 @@ pub struct Model {
pub name: String,
pub max_input_tokens: Option<usize>,
pub max_output_tokens: Option<isize>,
pub ref_max_output_tokens: Option<isize>,
pub pass_max_tokens: bool,
pub input_price: Option<f64>,
pub output_price: Option<f64>,
pub extra_fields: Option<serde_json::Map<String, serde_json::Value>>,
pub capabilities: ModelCapabilities,
pub extra_fields: Option<serde_json::Map<String, serde_json::Value>>,
}
impl Default for Model {
@ -32,13 +32,13 @@ impl Model {
Self {
client_name: client_name.into(),
name: name.into(),
extra_fields: None,
max_input_tokens: None,
max_output_tokens: None,
ref_max_output_tokens: None,
pass_max_tokens: false,
input_price: None,
output_price: None,
capabilities: ModelCapabilities::Text,
extra_fields: None,
}
}
@ -49,8 +49,7 @@ impl Model {
let mut model = Model::new(client_name, &v.name);
model
.set_max_input_tokens(v.max_input_tokens)
.set_max_output_tokens(v.max_output_tokens)
.set_ref_max_output_tokens(v.ref_max_output_tokens)
.set_max_tokens(v.max_output_tokens, v.pass_max_tokens)
.set_input_price(v.input_price)
.set_output_price(v.output_price)
.set_supports_vision(v.supports_vision)
@ -97,7 +96,7 @@ impl Model {
pub fn description(&self) -> String {
let max_input_tokens = format_option_value(&self.max_input_tokens);
let max_output_tokens = format_option_value(&self.show_max_output_tokens());
let max_output_tokens = format_option_value(&self.max_output_tokens);
let input_price = format_option_value(&self.input_price);
let output_price = format_option_value(&self.output_price);
let vision = if self.capabilities.contains(ModelCapabilities::Vision) {
@ -115,8 +114,12 @@ impl Model {
self.capabilities.contains(ModelCapabilities::Vision)
}
pub fn show_max_output_tokens(&self) -> Option<isize> {
self.max_output_tokens.or(self.ref_max_output_tokens)
pub fn max_tokens_param(&self) -> Option<isize> {
if self.pass_max_tokens {
self.max_output_tokens
} else {
None
}
}
pub fn set_max_input_tokens(&mut self, max_input_tokens: Option<usize>) -> &mut Self {
@ -127,19 +130,16 @@ impl Model {
self
}
pub fn set_max_output_tokens(&mut self, max_output_tokens: Option<isize>) -> &mut Self {
pub fn set_max_tokens(
&mut self,
max_output_tokens: Option<isize>,
pass_max_tokens: bool,
) -> &mut Self {
match max_output_tokens {
None | Some(0) => self.max_output_tokens = None,
_ => self.max_output_tokens = max_output_tokens,
}
self
}
pub fn set_ref_max_output_tokens(&mut self, ref_max_output_tokens: Option<isize>) -> &mut Self {
match ref_max_output_tokens {
None | Some(0) => self.ref_max_output_tokens = None,
_ => self.ref_max_output_tokens = ref_max_output_tokens,
}
self.pass_max_tokens = pass_max_tokens;
self
}
@ -237,12 +237,12 @@ pub struct ModelConfig {
pub name: String,
pub max_input_tokens: Option<usize>,
pub max_output_tokens: Option<isize>,
#[serde(rename = "max_output_tokens?")]
pub ref_max_output_tokens: Option<isize>,
pub input_price: Option<f64>,
pub output_price: Option<f64>,
#[serde(default)]
pub supports_vision: bool,
#[serde(default)]
pub pass_max_tokens: bool,
pub extra_fields: Option<serde_json::Map<String, serde_json::Value>>,
}

@ -159,7 +159,7 @@ fn build_body(data: SendData, model: &Model) -> Result<Value> {
"options": {},
});
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["options"]["num_predict"] = v.into();
}
if let Some(v) = temperature {

@ -90,7 +90,7 @@ pub fn openai_build_body(data: SendData, model: &Model) -> Value {
"messages": messages,
});
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {

@ -173,7 +173,7 @@ fn build_body(data: SendData, model: &Model, is_vl: bool) -> Result<(Value, bool
parameters["incremental_output"] = true.into();
}
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
parameters["max_tokens"] = v.into();
}
if let Some(v) = temperature {

@ -148,7 +148,7 @@ fn build_body(data: SendData, model: &Model) -> Result<Value> {
"prompt_template": "{prompt}"
});
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
input["max_tokens"] = v.into();
input["max_new_tokens"] = v.into();
}

@ -201,7 +201,7 @@ pub(crate) fn gemini_build_body(
body["safetySettings"] = safety_settings;
}
if let Some(v) = model.max_output_tokens {
if let Some(v) = model.max_tokens_param() {
body["generationConfig"]["maxOutputTokens"] = v.into();
}
if let Some(v) = temperature {

@ -422,7 +422,7 @@ impl Config {
(
"max_output_tokens",
self.model
.max_output_tokens
.max_tokens_param()
.map(|v| format!("{v} (current model)"))
.unwrap_or_else(|| "-".into()),
),
@ -523,7 +523,7 @@ impl Config {
(values, args[0])
} else if args.len() == 2 {
let values = match args[0] {
"max_output_tokens" => match self.model.show_max_output_tokens() {
"max_output_tokens" => match self.model.max_output_tokens {
Some(v) => vec![v.to_string()],
None => vec![],
},
@ -564,7 +564,7 @@ impl Config {
match key {
"max_output_tokens" => {
let value = parse_value(value)?;
self.model.set_max_output_tokens(value);
self.model.set_max_tokens(value, true);
}
"temperature" => {
let value = parse_value(value)?;

@ -93,7 +93,7 @@ impl Server {
"id": id,
"max_input_tokens": model.max_input_tokens,
"max_output_tokens": model.max_output_tokens,
"max_output_tokens?": model.ref_max_output_tokens,
"pass_max_tokens": model.pass_max_tokens,
"input_price": model.input_price,
"output_price": model.output_price,
"supports_vision": model.supports_vision(),
@ -158,12 +158,14 @@ impl Server {
let mut status = StatusCode::OK;
let res = if path == "/v1/chat/completions" {
self.chat_completion(req).await
} else if path == "/v1/models" {
self.list_models()
} else if path == "/v1/roles" {
self.list_roles()
} else if path == "/playground" || path == "/playground.html" {
self.playground_page()
} else if path == "/arena" || path == "/arena.html" {
self.arena_page()
} else if path == "/data.json" {
self.data_json()
} else {
status = StatusCode::NOT_FOUND;
Err(anyhow!("The requested endpoint was not found."))
@ -198,11 +200,16 @@ impl Server {
Ok(res)
}
fn data_json(&self) -> Result<AppResponse> {
let data = json!({
"models": self.models,
"roles": self.roles,
});
fn list_models(&self) -> Result<AppResponse> {
let data = json!({ "data": self.models });
let res = Response::builder()
.header("Content-Type", "application/json; charset=utf-8")
.body(Full::new(Bytes::from(data.to_string())).boxed())?;
Ok(res)
}
fn list_roles(&self) -> Result<AppResponse> {
let data = json!({ "data": self.roles });
let res = Response::builder()
.header("Content-Type", "application/json; charset=utf-8")
.body(Full::new(Bytes::from(data.to_string())).boxed())?;
@ -244,7 +251,7 @@ impl Server {
let mut client = init_client(&config)?;
if max_tokens.is_some() {
client.model_mut().set_max_output_tokens(max_tokens);
client.model_mut().set_max_tokens(max_tokens, true);
}
let abort = create_abort_signal();
let http_client = client.build_client()?;

Loading…
Cancel
Save