import os import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download#, login import numpy as np #login(os.getenv("HF_TOKEN")) my bad now its public model = Llama( model_path=hf_hub_download( repo_id=os.environ.get("REPO_ID", "Lyte/QuadConnect2.5-0.5B-GRPO"), filename=os.environ.get("MODEL_FILE", "unsloth.Q8_0.gguf"), ) ) SYSTEM_PROMPT = """You are a Connect Four player[Connect Four is played on a 6 x 7 grid (with 6 rows and 7 columns]. Given the current board state, predict the next move. Respond in the following format: Explain your reasoning for choosing the move, considering the current board state and potential future moves. The column and row of your move in the format 'a1', 'b3', 'g5', 'c6', etc. (column letter followed by row number). """ class ConnectFour: def __init__(self): self.board = np.zeros((6, 7)) self.current_player = 1 # 1 for player, 2 for AI self.game_over = False def make_move(self, col): if self.game_over: return False, -1 # Find the lowest empty row in the selected column for row in range(5, -1, -1): if self.board[row][col] == 0: self.board[row][col] = self.current_player return True, row return False, -1 def check_winner(self): # Check horizontal for row in range(6): for col in range(4): if (self.board[row][col] != 0 and self.board[row][col] == self.board[row][col+1] == self.board[row][col+2] == self.board[row][col+3]): return self.board[row][col] # Check vertical for row in range(3): for col in range(7): if (self.board[row][col] != 0 and self.board[row][col] == self.board[row+1][col] == self.board[row+2][col] == self.board[row+3][col]): return self.board[row][col] # Check diagonal (positive slope) for row in range(3): for col in range(4): if (self.board[row][col] != 0 and self.board[row][col] == self.board[row+1][col+1] == self.board[row+2][col+2] == self.board[row+3][col+3]): return self.board[row][col] # Check diagonal (negative slope) for row in range(3, 6): for col in range(4): if (self.board[row][col] != 0 and self.board[row][col] == self.board[row-1][col+1] == self.board[row-2][col+2] == self.board[row-3][col+3]): return self.board[row][col] return 0 def board_to_string(self): moves = [] for row in range(6): for col in range(7): if self.board[row][col] != 0: col_letter = chr(ord('a') + col) row_num = str(6 - row) # Convert to 1-based indexing player = "X" if self.board[row][col] == 1 else "O" moves.append(f"{col_letter}{row_num}={player}") return ", ".join(moves) def parse_ai_move(self, move_str): # Parse move like 'a1', 'b3', etc. col = ord(move_str[0].lower()) - ord('a') return col def create_interface(): game = ConnectFour() css = """ .connect4-board { display: grid; grid-template-columns: repeat(7, 1fr); gap: 8px; max-width: 600px; margin: 10px auto; background: #2196F3; padding: 15px; border-radius: 15px; box-shadow: 0 4px 8px rgba(0,0,0,0.2); } .connect4-cell { aspect-ratio: 1; background: white; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 2em; } .player1 { background: #f44336 !important; } .player2 { background: #ffc107 !important; } #ai-status { font-size: 1.2em; margin: 10px 0; color: #2196F3; font-weight: bold; } #ai-reasoning { background: #22004d; border-radius: 10px; padding: 15px; margin: 15px 0; font-family: monospace; min-height: 100px; } .reasoning-box { border-left: 4px solid #2196F3; padding-left: 15px; margin: 10px 0; background: #22004d; border-radius: 0 10px 10px 0; } #column-buttons { display: flex; justify-content: center; align-items: anchor-center; max-width: 600px; margin: 0 auto; padding: 0 15px; } #column-buttons button { margin: 0px 4px; } div.svelte-1nguped { display: block; } """ with gr.Blocks(css=css) as interface: gr.Markdown("# 🎮 Connect Four vs AI") gr.Markdown("### This is just a quick prototype for now, and the current model was trained just for 200 steps to test the concept, the reward functions were flawed, update coming soon!") with gr.Row(): with gr.Column(scale=2): # Status display status = gr.Markdown("Your turn! Click a button to drop your piece!", elem_id="ai-status") # Column buttons with gr.Group(elem_id="column-buttons"): col_buttons = [] for i in range(7): btn = gr.Button(f"⬇️ {i+1}", scale=1) col_buttons.append(btn) # Game board board_display = gr.HTML(render_board(), elem_id="board-display") reset_btn = gr.Button("🔄 New Game", variant="primary") with gr.Column(scale=1): # AI reasoning display gr.Markdown("### 🤖 AI's Thoughts") reasoning_display = gr.HTML( value='
Waiting for your move...
', elem_id="ai-reasoning-container" ) def handle_move(col): if game.game_over: return [ render_board(game.board), "Game is over! Click New Game to play again.", '
Game Over!
' ] # Player move success, row = game.make_move(col) if not success: return [ render_board(game.board), "Column is full! Try another one.", '
Invalid move!
' ] # Check for winner winner = game.check_winner() if winner == 1: game.game_over = True return [ render_board(game.board), "🎉 You win! 🎉", '
Congratulations! You won!
' ] # AI move game.current_player = 2 board_state = game.board_to_string() prompt = f"Current Board: {board_state}. Make a move." # Get AI response response = model.create_chat_completion( messages=[ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt} ], temperature=0.7, max_tokens=512 ) ai_response = response['choices'][0]['message']['content'] # Extract reasoning and move try: reasoning = ai_response.split("")[1].split("")[0].strip() move_str = ai_response.split("")[1].split("")[0].strip() ai_col = game.parse_ai_move(move_str) # Format reasoning for display reasoning_html = f'''

🤔 Reasoning:

{reasoning}

📍 Move chosen: {move_str}

''' success, _ = game.make_move(ai_col) if success: # Check for AI winner winner = game.check_winner() if winner == 2: game.game_over = True return [ render_board(game.board), "🤖 AI wins! Better luck next time!", reasoning_html ] else: return [ render_board(game.board), "AI made invalid move! You win by default!", '
AI made an invalid move!
' ] except Exception as e: game.game_over = True return [ render_board(game.board), "AI error occurred! You win by default!", f'
Error: {str(e)}
' ] game.current_player = 1 return [render_board(game.board), "Your turn!", reasoning_html] def reset_game(): game.board = np.zeros((6, 7)) game.current_player = 1 game.game_over = False return [ render_board(), "Your turn! Click a button to drop your piece!", '
New game started! Make your move...
' ] # Event handlers for i, btn in enumerate(col_buttons): btn.click( fn=handle_move, inputs=[gr.Number(value=i, visible=False)], outputs=[board_display, status, reasoning_display] ) reset_btn.click( fn=reset_game, outputs=[board_display, status, reasoning_display] ) return interface def render_board(board=None): if board is None: board = np.zeros((6, 7)) html = '
' for row in range(6): for col in range(7): cell_class = "connect4-cell" content = "⚪" if board[row][col] == 1: cell_class += " player1" content = "🔴" elif board[row][col] == 2: cell_class += " player2" content = "🟡" html += f'
{content}
' html += "
" return html # Launch the interface if __name__ == "__main__": interface = create_interface() interface.launch(debug=True)