Skip to content

Server: completion_probabilities (tok_str and prob) seem to be broken #7197

@reuank

Description

@reuank

Hello,

I am using the llama.cpp server and noticed strange behavior in the server responses.

When starting a server on commit 637e9a8 using ./server -m ../models/llama-2-7b-chat.Q4_K_M.gguf -c 4096 -ngl 1000 -np 1 -cb, and using this curl command:

curl --request POST \
    --url http://localhost:8080/completion \
    --header "Content-Type: application/json" \
    --data '{"prompt": "Choose between A, B and C.\n\n","n_predict": 1, "n_probs": 10, "temperature": 0}'

I get the following json response:

// commit hash 637e9a86
{
    "content": "A",
    "id_slot": 0,
    "stop": true,
    "model": "../models/llama-2-7b-chat.Q4_K_M.gguf",
    "tokens_predicted": 1,
    "tokens_evaluated": 12,
    "generation_settings":
    {
        ...
    },
    "prompt": "Choose between A, B and C.\n\n",
    "truncated": false,
    "stopped_eos": false,
    "stopped_word": false,
    "stopped_limit": true,
    "stopping_word": "",
    "tokens_cached": 12,
    "timings":
    {
        "prompt_n": 12,
        "prompt_ms": 280.894,
        "prompt_per_token_ms": 23.407833333333333,
        "prompt_per_second": 42.720741632074734,
        "predicted_n": 1,
        "predicted_ms": 1.734,
        "predicted_per_token_ms": 1.734,
        "predicted_per_second": 576.7012687427913
    },
    "completion_probabilities":
    [
        {
            "content": "A",
            "probs":
            [
                {
                    "tok_str": "A",
                    "prob": 0.6929230093955994
                },
                {
                    "tok_str": "Option",
                    "prob": 0.04242830350995064
                },
                {
                    "tok_str": "Wh",
                    "prob": 0.035371895879507065
                },
                {
                    "tok_str": "What",
                    "prob": 0.021582460030913353
                },
                {
                    "tok_str": "The",
                    "prob": 0.020988475531339645
                },
                {
                    "tok_str": "Your",
                    "prob": 0.009944385848939419
                },
                {
                    "tok_str": "In",
                    "prob": 0.007504411973059177
                },
                {
                    "tok_str": "You",
                    "prob": 0.0066000730730593204
                },
                {
                    "tok_str": "Question",
                    "prob": 0.006469167303293943
                },
                {
                    "tok_str": "If",
                    "prob": 0.006083796266466379
                }
            ]
        }
    ]
}

But when running the same command on the latest commit on master (f89fe27), I get

// commit hash f89fe273
{
    "content": "A",
    "id_slot": 0,
    "stop": true,
    "model": "../models/llama-2-7b-chat.Q4_K_M.gguf",
    "tokens_predicted": 1,
    "tokens_evaluated": 12,
    "generation_settings":
    {
        ...
    },
    "prompt": "Choose between A, B and C.\n\n",
    "truncated": false,
    "stopped_eos": false,
    "stopped_word": false,
    "stopped_limit": true,
    "stopping_word": "",
    "tokens_cached": 12,
    "timings":
    {
        "prompt_n": 12,
        "prompt_ms": 298.66,
        "prompt_per_token_ms": 24.888333333333335,
        "prompt_per_second": 40.17946829170294,
        "predicted_n": 1,
        "predicted_ms": 0.021,
        "predicted_per_token_ms": 0.021,
        "predicted_per_second": 47619.04761904762
    },
    "completion_probabilities":
    [
        {
            "content": "A",
            "probs":
            [
                {
                    "tok_str": "",
                    "prob": 1.0
                },
                {
                    "tok_str": "<s>",
                    "prob": 0.0
                },
                {
                    "tok_str": "</s>",
                    "prob": 0.0
                },
                {
                    "tok_str": "\u0000",
                    "prob": 0.0
                },
                {
                    "tok_str": "\u0001",
                    "prob": 0.0
                },
                {
                    "tok_str": "\u0002",
                    "prob": 0.0
                },
                {
                    "tok_str": "\u0003",
                    "prob": 0.0
                },
                {
                    "tok_str": "\u0004",
                    "prob": 0.0
                },
                {
                    "tok_str": "\u0005",
                    "prob": 0.0
                },
                {
                    "tok_str": "\u0006",
                    "prob": 0.0
                }
            ]
        }
    ]
}

The returned probs are strange, and the tokens seem to be the first n tokens of the tokenizer vocabulary.

What happened here?

Best
Leon

Metadata

Metadata

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions