Update README.md
Browse files
README.md
CHANGED
@@ -57,11 +57,11 @@ snapshot_download(
|
|
57 |
allow_patterns = ["*UD-IQ1_S*"], # Select quant type UD-IQ1_S for 1.58bit
|
58 |
)
|
59 |
```
|
60 |
-
5. Example with
|
61 |
```bash
|
62 |
./llama.cpp/llama-cli \
|
63 |
--model DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf \
|
64 |
-
--cache-type-k
|
65 |
--threads 12 -no-cnv --prio 2 \
|
66 |
--temp 0.6 \
|
67 |
--ctx-size 8192 \
|
@@ -83,7 +83,7 @@ snapshot_download(
|
|
83 |
```bash
|
84 |
./llama.cpp/llama-cli \
|
85 |
--model DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf \
|
86 |
-
--cache-type-k
|
87 |
--threads 12 -no-cnv --prio 2 \
|
88 |
--n-gpu-layers 7 \
|
89 |
--temp 0.6 \
|
|
|
57 |
allow_patterns = ["*UD-IQ1_S*"], # Select quant type UD-IQ1_S for 1.58bit
|
58 |
)
|
59 |
```
|
60 |
+
5. Example with Q8_0 K quantized cache **Notice -no-cnv disables auto conversation mode**
|
61 |
```bash
|
62 |
./llama.cpp/llama-cli \
|
63 |
--model DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf \
|
64 |
+
--cache-type-k q8_0 \
|
65 |
--threads 12 -no-cnv --prio 2 \
|
66 |
--temp 0.6 \
|
67 |
--ctx-size 8192 \
|
|
|
83 |
```bash
|
84 |
./llama.cpp/llama-cli \
|
85 |
--model DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf \
|
86 |
+
--cache-type-k q8_0 \
|
87 |
--threads 12 -no-cnv --prio 2 \
|
88 |
--n-gpu-layers 7 \
|
89 |
--temp 0.6 \
|