zhiminy commited on
Commit
49cb056
·
1 Parent(s): c1d84c5

add grok-3 and llama-4

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. context_window.json +7 -3
app.py CHANGED
@@ -510,7 +510,7 @@ with gr.Blocks() as app:
510
  # ⚔️ Software Engineering (SE) Arena: Explore and Test the Best SE Chatbots with Long-Context Interactions
511
 
512
  ## 📜How It Works
513
- - **Blind Comparison**: Submit a SE-related query to two anonymous chatbots randomly selected from up to {len(available_models)} top models, including OpenAI-o3, Grok-2, Gemini-2.0, Claude-3.7, Deepseek-r1, Mistral-large, Llama-3.3, Qwen-2.5, and others.
514
  - **Interactive Voting**: Engage in multi-turn dialogues with both chatbots and compare their responses. You can continue the conversation until you confidently choose the better model.
515
  - **Fair Play Rules**: Votes are counted only if chatbot identities remain anonymous. Revealing a chatbot's identity disqualifies the session.
516
 
 
510
  # ⚔️ Software Engineering (SE) Arena: Explore and Test the Best SE Chatbots with Long-Context Interactions
511
 
512
  ## 📜How It Works
513
+ - **Blind Comparison**: Submit a SE-related query to two anonymous chatbots randomly selected from up to {len(available_models)} top models from OpenAI, Gemini, Grok, Claude, Deepseek, Qwen, Llama, Mistral, and others.
514
  - **Interactive Voting**: Engage in multi-turn dialogues with both chatbots and compare their responses. You can continue the conversation until you confidently choose the better model.
515
  - **Fair Play Rules**: Votes are counted only if chatbot identities remain anonymous. Revealing a chatbot's identity disqualifies the session.
516
 
context_window.json CHANGED
@@ -16,10 +16,15 @@
16
  "gemini-2.0-flash-lite-preview": 1048576,
17
  "gemini-2.0-pro-exp": 2097152,
18
  "gemma-3-27b-it": 128000,
19
- "grok-2-1212": 131072,
 
 
 
20
  "llama-3.1-8b": 128000,
21
  "llama-3.1-405b": 128000,
22
  "llama-3.3-70b": 128000,
 
 
23
  "mistral-large-latest": 131000,
24
  "mistral-small-latest": 32000,
25
  "o1": 128000,
@@ -28,6 +33,5 @@
28
  "Qwen2.5-32B-Instruct": 131072,
29
  "Qwen2.5-72B-Instruct": 131072,
30
  "Qwen2.5-72B-Instruct-128k": 131072,
31
- "Qwen2.5-Coder-32B-Instruct": 131072,
32
- "yi-large": 32000
33
  }
 
16
  "gemini-2.0-flash-lite-preview": 1048576,
17
  "gemini-2.0-pro-exp": 2097152,
18
  "gemma-3-27b-it": 128000,
19
+ "grok-3-fast-beta": 1000000,
20
+ "grok-3-beta": 1000000,
21
+ "grok-3-mini-fast-beta": 1000000,
22
+ "grok-3-mini-beta": 1000000,
23
  "llama-3.1-8b": 128000,
24
  "llama-3.1-405b": 128000,
25
  "llama-3.3-70b": 128000,
26
+ "llama4-scout-instruct-basic": 10000000,
27
+ "llama4-maverick-instruct-basic": 10000000,
28
  "mistral-large-latest": 131000,
29
  "mistral-small-latest": 32000,
30
  "o1": 128000,
 
33
  "Qwen2.5-32B-Instruct": 131072,
34
  "Qwen2.5-72B-Instruct": 131072,
35
  "Qwen2.5-72B-Instruct-128k": 131072,
36
+ "Qwen2.5-Coder-32B-Instruct": 131072
 
37
  }