reduce position embeddings
Browse files
app.py
CHANGED
@@ -30,7 +30,8 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
30 |
quantization_config=quantization_config, # Apply quantization
|
31 |
# device_map="auto", # Automatically map to available devices
|
32 |
torch_dtype=torch.bfloat16,
|
33 |
-
token=HF_TOKEN
|
|
|
34 |
)
|
35 |
|
36 |
@spaces.GPU
|
|
|
30 |
quantization_config=quantization_config, # Apply quantization
|
31 |
# device_map="auto", # Automatically map to available devices
|
32 |
torch_dtype=torch.bfloat16,
|
33 |
+
token=HF_TOKEN,
|
34 |
+
max_position_embeddings=8192 # Reduce context window to 8k tokens (from 128k)
|
35 |
)
|
36 |
|
37 |
@spaces.GPU
|