llamamodel: manage gguf context with unique_ptr

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2 weeks ago · 36d6951a4b
parent 29686bf976
commit 36d6951a4b
1 changed files with 44 additions and 54 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@ -10,6 +10,7 @@
 #include <iomanip>
 #include <iostream>
 #include <map>
+#include <memory>
 #include <numeric>
 #include <random>
 #include <sstream>
@ -116,21 +117,30 @@ const char *get_arch_name(gguf_context *ctx_gguf) {
    return gguf_get_val_str(ctx_gguf, kid);
 }

-static gguf_context *load_gguf(const char *fname) {
+namespace {
+
+struct gguf_context_deleter {
+    void operator()(gguf_context *ctx) { gguf_free(ctx); }
+};
+
+} // namespace
+
+using unique_gguf_context = std::unique_ptr<gguf_context, gguf_context_deleter>;
+
+static unique_gguf_context load_gguf(const char *fname) {
    struct gguf_init_params params = {
        /*.no_alloc = */ true,
        /*.ctx      = */ nullptr,
    };
-    gguf_context *ctx = gguf_init_from_file(fname, params);
+    unique_gguf_context ctx(gguf_init_from_file(fname, params));
    if (!ctx) {
        std::cerr << __func__ << ": gguf_init_from_file failed\n";
        return nullptr;
    }

-    int gguf_ver = gguf_get_version(ctx);
+    int gguf_ver = gguf_get_version(ctx.get());
    if (gguf_ver > GGUF_VER_MAX) {
        std::cerr << __func__ << ": unsupported gguf version: " << gguf_ver << "\n";
-        gguf_free(ctx);
        return nullptr;
    }

@ -138,32 +148,25 @@ static gguf_context *load_gguf(const char *fname) {
 }

 static int32_t get_arch_key_u32(std::string const &modelPath, std::string const &archKey) {
-    int32_t value = -1;
-    std::string arch;
-
-    auto * ctx = load_gguf(modelPath.c_str());
+    auto ctxMem = load_gguf(modelPath.c_str());
+    auto *ctx = ctxMem.get();
    if (!ctx)
-        goto cleanup;
+        return -1;

+    std::string arch;
    try {
        arch = get_arch_name(ctx);
    } catch (const std::runtime_error &) {
-        goto cleanup; // cannot read key
+        return -1; // cannot read key
    }

-    {
-        auto key = arch + "." + archKey;
-        int keyidx = gguf_find_key(ctx, key.c_str());
-        if (keyidx != -1) {
-            value = gguf_get_val_u32(ctx, keyidx);
-        } else {
-            std::cerr << __func__ << ": " << key << "not found in " << modelPath << "\n";
-        }
-    }
+    auto key = arch + "." + archKey;
+    int keyidx = gguf_find_key(ctx, key.c_str());
+    if (keyidx != -1)
+        return gguf_get_val_u32(ctx, keyidx);

-cleanup:
-    gguf_free(ctx);
-    return value;
+    std::cerr << __func__ << ": " << key << "not found in " << modelPath << "\n";
+    return -1;
 }

 struct LLamaPrivate {
@ -218,7 +221,8 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl)
 }

 bool LLamaModel::isModelBlacklisted(const std::string &modelPath) const {
-    auto * ctx = load_gguf(modelPath.c_str());
+    auto ctxMem = load_gguf(modelPath.c_str());
+    auto *ctx = ctxMem.get();
    if (!ctx) {
        std::cerr << __func__ << ": failed to load " << modelPath << "\n";
        return false;
@ -232,7 +236,6 @@ bool LLamaModel::isModelBlacklisted(const std::string &modelPath) const {
        return keyidx;
    };

-    bool res = false;
    try {
        std::string name(gguf_get_val_str(ctx, get_key("general.name")));
        int token_idx = get_key("tokenizer.ggml.tokens");
@ -243,37 +246,30 @@ bool LLamaModel::isModelBlacklisted(const std::string &modelPath) const {
            && n_vocab == 32002
            && gguf_get_arr_str(ctx, token_idx, 32000) == "<dummy32000>"s // should be <|im_end|>
        ) {
-            res = true;
+            return true;
        }
    } catch (const std::logic_error &e) {
        std::cerr << __func__ << ": " << e.what() << "\n";
    }

-    gguf_free(ctx);
-    return res;
+    return false;
 }

 bool LLamaModel::isEmbeddingModel(const std::string &modelPath) const {
-    bool result = false;
-    std::string arch;
-
-    auto *ctx_gguf = load_gguf(modelPath.c_str());
-    if (!ctx_gguf) {
+    auto ctxMem = load_gguf(modelPath.c_str());
+    if (!ctxMem) {
        std::cerr << __func__ << ": failed to load GGUF from " <<  modelPath << "\n";
-        goto cleanup;
+        return false;
    }

+    std::string arch;
    try {
-        arch = get_arch_name(ctx_gguf);
+        arch = get_arch_name(ctxMem.get());
    } catch (const std::runtime_error &) {
-        goto cleanup; // cannot read key
+        return false; // cannot read key
    }

-    result = is_embedding_arch(arch);
-
-cleanup:
-    gguf_free(ctx_gguf);
-    return result;
+    return is_embedding_arch(arch);
 }

 bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
@ -983,28 +979,22 @@ DLL_EXPORT const char *get_build_variant() {
 }

 DLL_EXPORT char *get_file_arch(const char *fname) {
-    char *arch = nullptr;
-    std::string archStr;
-
-    auto *ctx = load_gguf(fname);
+    auto ctxMem = load_gguf(fname);
+    auto *ctx = ctxMem.get();
    if (!ctx)
-        goto cleanup;
+        return nullptr;

+    std::string archStr;
    try {
        archStr = get_arch_name(ctx);
    } catch (const std::runtime_error &) {
-        goto cleanup; // cannot read key
+        return nullptr; // cannot read key
    }

-    if (is_embedding_arch(archStr) && gguf_find_key(ctx, (archStr + ".pooling_type").c_str()) < 0) {
-        // old bert.cpp embedding model
-    } else {
-        arch = strdup(archStr.c_str());
-    }
+    if (is_embedding_arch(archStr) && gguf_find_key(ctx, (archStr + ".pooling_type").c_str()) < 0)
+        return nullptr; // old bert.cpp embedding model

-cleanup:
-    gguf_free(ctx);
-    return arch;
+    return strdup(archStr.c_str());
 }

 DLL_EXPORT bool is_arch_supported(const char *arch) {