import os
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download#, login
import numpy as np
#login(os.getenv("HF_TOKEN")) my bad now its public
model = Llama(
model_path=hf_hub_download(
repo_id=os.environ.get("REPO_ID", "Lyte/QuadConnect2.5-0.5B-GRPO"),
filename=os.environ.get("MODEL_FILE", "unsloth.Q8_0.gguf"),
)
)
SYSTEM_PROMPT = """You are a Connect Four player[Connect Four is played on a 6 x 7 grid (with 6 rows and 7 columns]. Given the current board state, predict the next move. Respond in the following format:
Explain your reasoning for choosing the move, considering the current board state and potential future moves.
The column and row of your move in the format 'a1', 'b3', 'g5', 'c6', etc. (column letter followed by row number).
"""
class ConnectFour:
def __init__(self):
self.board = np.zeros((6, 7))
self.current_player = 1 # 1 for player, 2 for AI
self.game_over = False
def make_move(self, col):
if self.game_over:
return False, -1
# Find the lowest empty row in the selected column
for row in range(5, -1, -1):
if self.board[row][col] == 0:
self.board[row][col] = self.current_player
return True, row
return False, -1
def check_winner(self):
# Check horizontal
for row in range(6):
for col in range(4):
if (self.board[row][col] != 0 and
self.board[row][col] == self.board[row][col+1] ==
self.board[row][col+2] == self.board[row][col+3]):
return self.board[row][col]
# Check vertical
for row in range(3):
for col in range(7):
if (self.board[row][col] != 0 and
self.board[row][col] == self.board[row+1][col] ==
self.board[row+2][col] == self.board[row+3][col]):
return self.board[row][col]
# Check diagonal (positive slope)
for row in range(3):
for col in range(4):
if (self.board[row][col] != 0 and
self.board[row][col] == self.board[row+1][col+1] ==
self.board[row+2][col+2] == self.board[row+3][col+3]):
return self.board[row][col]
# Check diagonal (negative slope)
for row in range(3, 6):
for col in range(4):
if (self.board[row][col] != 0 and
self.board[row][col] == self.board[row-1][col+1] ==
self.board[row-2][col+2] == self.board[row-3][col+3]):
return self.board[row][col]
return 0
def board_to_string(self):
moves = []
for row in range(6):
for col in range(7):
if self.board[row][col] != 0:
col_letter = chr(ord('a') + col)
row_num = str(6 - row) # Convert to 1-based indexing
player = "X" if self.board[row][col] == 1 else "O"
moves.append(f"{col_letter}{row_num}={player}")
return ", ".join(moves)
def parse_ai_move(self, move_str):
# Parse move like 'a1', 'b3', etc.
col = ord(move_str[0].lower()) - ord('a')
return col
def create_interface():
game = ConnectFour()
css = """
.connect4-board {
display: grid;
grid-template-columns: repeat(7, 1fr);
gap: 8px;
max-width: 600px;
margin: 10px auto;
background: #2196F3;
padding: 15px;
border-radius: 15px;
box-shadow: 0 4px 8px rgba(0,0,0,0.2);
}
.connect4-cell {
aspect-ratio: 1;
background: white;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-size: 2em;
}
.player1 { background: #f44336 !important; }
.player2 { background: #ffc107 !important; }
#ai-status {
font-size: 1.2em;
margin: 10px 0;
color: #2196F3;
font-weight: bold;
}
#ai-reasoning {
background: #22004d;
border-radius: 10px;
padding: 15px;
margin: 15px 0;
font-family: monospace;
min-height: 100px;
}
.reasoning-box {
border-left: 4px solid #2196F3;
padding-left: 15px;
margin: 10px 0;
background: #22004d;
border-radius: 0 10px 10px 0;
}
#column-buttons {
display: flex;
justify-content: center;
align-items: anchor-center;
max-width: 600px;
margin: 0 auto;
padding: 0 15px;
}
#column-buttons button {
margin: 0px 4px;
}
div.svelte-1nguped {
display: block;
}
"""
with gr.Blocks(css=css) as interface:
gr.Markdown("# 🎮 Connect Four vs AI")
gr.Markdown("### This is just a quick prototype for now, and the current model was trained just for 200 steps to test the concept, the reward functions were flawed, update coming soon!")
with gr.Row():
with gr.Column(scale=2):
# Status display
status = gr.Markdown("Your turn! Click a button to drop your piece!", elem_id="ai-status")
# Column buttons
with gr.Group(elem_id="column-buttons"):
col_buttons = []
for i in range(7):
btn = gr.Button(f"⬇️ {i+1}", scale=1)
col_buttons.append(btn)
# Game board
board_display = gr.HTML(render_board(), elem_id="board-display")
reset_btn = gr.Button("🔄 New Game", variant="primary")
with gr.Column(scale=1):
# AI reasoning display
gr.Markdown("### 🤖 AI's Thoughts")
reasoning_display = gr.HTML(
value='
Waiting for your move...
',
elem_id="ai-reasoning-container"
)
def handle_move(col):
if game.game_over:
return [
render_board(game.board),
"Game is over! Click New Game to play again.",
'Game Over!
'
]
# Player move
success, row = game.make_move(col)
if not success:
return [
render_board(game.board),
"Column is full! Try another one.",
'Invalid move!
'
]
# Check for winner
winner = game.check_winner()
if winner == 1:
game.game_over = True
return [
render_board(game.board),
"🎉 You win! 🎉",
'Congratulations! You won!
'
]
# AI move
game.current_player = 2
board_state = game.board_to_string()
prompt = f"Current Board: {board_state}. Make a move."
# Get AI response
response = model.create_chat_completion(
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt}
],
temperature=0.7,
max_tokens=512
)
ai_response = response['choices'][0]['message']['content']
# Extract reasoning and move
try:
reasoning = ai_response.split("")[1].split("")[0].strip()
move_str = ai_response.split("")[1].split("")[0].strip()
ai_col = game.parse_ai_move(move_str)
# Format reasoning for display
reasoning_html = f'''
🤔 Reasoning:
{reasoning}
📍 Move chosen: {move_str}
'''
success, _ = game.make_move(ai_col)
if success:
# Check for AI winner
winner = game.check_winner()
if winner == 2:
game.game_over = True
return [
render_board(game.board),
"🤖 AI wins! Better luck next time!",
reasoning_html
]
else:
return [
render_board(game.board),
"AI made invalid move! You win by default!",
'AI made an invalid move!
'
]
except Exception as e:
game.game_over = True
return [
render_board(game.board),
"AI error occurred! You win by default!",
f'Error: {str(e)}
'
]
game.current_player = 1
return [render_board(game.board), "Your turn!", reasoning_html]
def reset_game():
game.board = np.zeros((6, 7))
game.current_player = 1
game.game_over = False
return [
render_board(),
"Your turn! Click a button to drop your piece!",
'New game started! Make your move...
'
]
# Event handlers
for i, btn in enumerate(col_buttons):
btn.click(
fn=handle_move,
inputs=[gr.Number(value=i, visible=False)],
outputs=[board_display, status, reasoning_display]
)
reset_btn.click(
fn=reset_game,
outputs=[board_display, status, reasoning_display]
)
return interface
def render_board(board=None):
if board is None:
board = np.zeros((6, 7))
html = ''
for row in range(6):
for col in range(7):
cell_class = "connect4-cell"
content = "⚪"
if board[row][col] == 1:
cell_class += " player1"
content = "🔴"
elif board[row][col] == 2:
cell_class += " player2"
content = "🟡"
html += f'
{content}
'
html += "
"
return html
# Launch the interface
if __name__ == "__main__":
interface = create_interface()
interface.launch(debug=True)