You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+13Lines changed: 13 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -209,6 +209,8 @@ def __call__(
209
209
classLlama:
210
210
"""High-level Python wrapper for a llama.cpp model."""
211
211
212
+
__backend_initialized=False
213
+
212
214
def__init__(
213
215
self,
214
216
model_path: str,
@@ -234,6 +236,7 @@ def __init__(
234
236
last_n_tokens_size: int=64,
235
237
lora_base: Optional[str] =None,
236
238
lora_path: Optional[str] =None,
239
+
numa: bool=False,
237
240
verbose: bool=True,
238
241
**kwargs# type: ignore
239
242
):
@@ -261,6 +264,7 @@ def __init__(
261
264
last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
262
265
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
263
266
lora_path: Path to a LoRA file to apply to the model.
267
+
numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
0 commit comments