fix: bump up rate limits wkw
Browse files
app.py
CHANGED
@@ -99,14 +99,14 @@ def respond(
|
|
99 |
response = ""
|
100 |
for msg in client.chat_completion(
|
101 |
messages,
|
102 |
-
model="meta-llama/llama-4-scout
|
103 |
max_tokens=max_tokens,
|
104 |
stream=True,
|
105 |
temperature=temperature,
|
106 |
seed=random.randint(1, 1000),
|
107 |
top_p=top_p,
|
108 |
extra_body={
|
109 |
-
"models": ["meta-llama/llama-4-maverick
|
110 |
},
|
111 |
):
|
112 |
token = msg.choices[0].delta.content
|
|
|
99 |
response = ""
|
100 |
for msg in client.chat_completion(
|
101 |
messages,
|
102 |
+
model="meta-llama/llama-4-scout",
|
103 |
max_tokens=max_tokens,
|
104 |
stream=True,
|
105 |
temperature=temperature,
|
106 |
seed=random.randint(1, 1000),
|
107 |
top_p=top_p,
|
108 |
extra_body={
|
109 |
+
"models": ["meta-llama/llama-4-maverick", "google/gemma-3-1b-it"]
|
110 |
},
|
111 |
):
|
112 |
token = msg.choices[0].delta.content
|