Spaces:
Runtime error
Runtime error
Upload 8 files
Browse files- classes/Qlearningagent.py +79 -0
- classes/__pycache__/Qlearningagent.cpython-39.pyc +0 -0
- classes/__pycache__/game_model.cpython-39.pyc +0 -0
- classes/environment.py +184 -0
- classes/game.py +130 -0
- classes/game_model.py +92 -0
- gradio_game.py +194 -0
- requirements.txt +3 -0
classes/Qlearningagent.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import json
|
3 |
+
import ast
|
4 |
+
|
5 |
+
class QlearningAgent():
|
6 |
+
def __init__(self, epsilon,alpha ,discount_factor, train):
|
7 |
+
self.q_table = {}
|
8 |
+
self.epsilon = epsilon
|
9 |
+
self.alpha = alpha
|
10 |
+
self.discount_factor = discount_factor
|
11 |
+
self.train = train
|
12 |
+
|
13 |
+
def save_agent_dict(self, file_name):
|
14 |
+
# Convert tuple keys to strings
|
15 |
+
q_table_str_keys = {str(key): value for key, value in self.q_table.items()}
|
16 |
+
|
17 |
+
with open(file_name, 'w') as file_json:
|
18 |
+
json.dump(q_table_str_keys, file_json)
|
19 |
+
|
20 |
+
|
21 |
+
def load_agent_dict(self, file_name):
|
22 |
+
try:
|
23 |
+
with open(file_name, 'r') as file_json:
|
24 |
+
json_data = json.load(file_json)
|
25 |
+
# Convert string keys back to tuples
|
26 |
+
q_table = {ast.literal_eval(key): value for key, value in json_data.items()}
|
27 |
+
|
28 |
+
self.q_table = q_table
|
29 |
+
print("Q-table loaded successfully.")
|
30 |
+
except FileNotFoundError:
|
31 |
+
print(f"File '{file_name}' not found. Q-table not loaded.")
|
32 |
+
|
33 |
+
|
34 |
+
def get_q_value(self, state, action, piece):
|
35 |
+
state_tuple = tuple(state.flatten())
|
36 |
+
|
37 |
+
if (state_tuple,action,piece) not in self.q_table:
|
38 |
+
self.q_table[(state_tuple, action, piece)] = 0.0
|
39 |
+
|
40 |
+
|
41 |
+
return self.q_table[(state_tuple, action, piece)]
|
42 |
+
|
43 |
+
|
44 |
+
def choose_move(self, state, available_moves, piece):
|
45 |
+
q_values = []
|
46 |
+
for action in available_moves:
|
47 |
+
q_values.append(self.get_q_value(state, action, piece))
|
48 |
+
|
49 |
+
if random.uniform(0, 1) < self.epsilon and self.train:
|
50 |
+
return random.choice(available_moves)
|
51 |
+
else:
|
52 |
+
max_q_value = max(q_values)
|
53 |
+
if q_values.count(max_q_value) > 1:
|
54 |
+
best_moves = [i for i in range(len(available_moves)) if q_values[i] == max_q_value]
|
55 |
+
i = random.choice(best_moves)
|
56 |
+
else:
|
57 |
+
i = q_values.index(max_q_value)
|
58 |
+
return available_moves[i]
|
59 |
+
|
60 |
+
|
61 |
+
def update_q_value(self, states, rewards):
|
62 |
+
#estado atual + alpha[retorno estado atual + ymax(proximo_estado) - estado atual]
|
63 |
+
for i,state in enumerate(states):
|
64 |
+
rt = 0
|
65 |
+
if state not in self.q_table.keys():
|
66 |
+
self.q_table[state] = 0.0
|
67 |
+
|
68 |
+
|
69 |
+
for ii in range(0,len(rewards)):
|
70 |
+
rt+= rewards[ii] * (self.discount_factor ** (ii-i))
|
71 |
+
|
72 |
+
if i == len(states)-1:
|
73 |
+
next_reward = 0
|
74 |
+
else:
|
75 |
+
next_reward = rewards[i+1]
|
76 |
+
|
77 |
+
q_formula = self.q_table[state] + (self.alpha*(rewards[i] + self.discount_factor*(next_reward) - self.q_table[state]))
|
78 |
+
self.q_table[state] = q_formula
|
79 |
+
|
classes/__pycache__/Qlearningagent.cpython-39.pyc
ADDED
Binary file (2.97 kB). View file
|
|
classes/__pycache__/game_model.cpython-39.pyc
ADDED
Binary file (3.32 kB). View file
|
|
classes/environment.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tqdm
|
2 |
+
from classes.game_model import tic_tac_toe_model
|
3 |
+
from classes.Qlearningagent import QlearningAgent
|
4 |
+
|
5 |
+
class environment():
|
6 |
+
def __init__(self, tic_tac_toe: tic_tac_toe_model, q_agent: QlearningAgent, train: bool, show_stats: bool):
|
7 |
+
self.board = tic_tac_toe
|
8 |
+
self.q_agent = q_agent
|
9 |
+
self.train = train
|
10 |
+
self.show_stats = show_stats
|
11 |
+
|
12 |
+
def play_one_game(self, piece):
|
13 |
+
game_over = False
|
14 |
+
win_piece = 0
|
15 |
+
self.board.reset_matrix()
|
16 |
+
if piece == 1:
|
17 |
+
piece_enemy = 2
|
18 |
+
states_x = []
|
19 |
+
rewards_x = []
|
20 |
+
while not game_over:
|
21 |
+
w = self.board.check_win()
|
22 |
+
if w != 4:
|
23 |
+
state = self.board.matriz.copy()
|
24 |
+
reward_x = self.board.reward_piece(piece)
|
25 |
+
state_x = (tuple(state.flatten()),-1,piece)# -1 for terminal state
|
26 |
+
states_x.append(state_x)
|
27 |
+
rewards_x.append(reward_x)
|
28 |
+
win_piece = w
|
29 |
+
break
|
30 |
+
|
31 |
+
#x move and state/reward/move dynamic
|
32 |
+
state = self.board.matriz.copy()
|
33 |
+
avaible_moves = self.board.get_avaible_moves()
|
34 |
+
action_x = self.q_agent.choose_move(state,avaible_moves,piece)
|
35 |
+
i, j = self.board.number_ij(action_x)
|
36 |
+
self.board.move(i,j,piece)
|
37 |
+
reward_x = self.board.reward_piece(piece)
|
38 |
+
state_x = (tuple(state.flatten()),action_x,piece)
|
39 |
+
states_x.append(state_x)
|
40 |
+
rewards_x.append(reward_x)
|
41 |
+
w = self.board.check_win()
|
42 |
+
if w != 4:
|
43 |
+
state = self.board.matriz.copy()
|
44 |
+
reward_x = self.board.reward_piece(piece)
|
45 |
+
state_x = (tuple(state.flatten()),-1,piece)# -1 for terminal state
|
46 |
+
states_x.append(state_x)
|
47 |
+
rewards_x.append(reward_x)
|
48 |
+
win_piece = w
|
49 |
+
break
|
50 |
+
self.board.get_random_move(piece_enemy)
|
51 |
+
|
52 |
+
self.q_agent.update_q_value(states_x,rewards_x)
|
53 |
+
|
54 |
+
return win_piece
|
55 |
+
else:
|
56 |
+
piece_enemy = 1
|
57 |
+
states_o = []
|
58 |
+
rewards_o = []
|
59 |
+
while not game_over:
|
60 |
+
self.board.get_random_move(piece_enemy)
|
61 |
+
|
62 |
+
w = self.board.check_win()
|
63 |
+
if w != 4:
|
64 |
+
state = self.board.matriz.copy()
|
65 |
+
reward_o = self.board.reward_piece(piece)
|
66 |
+
state_o = (tuple(state.flatten()),-1,piece)# -1 for terminal state
|
67 |
+
states_o.append(state_o)
|
68 |
+
rewards_o.append(reward_o)
|
69 |
+
win_piece = w
|
70 |
+
break
|
71 |
+
state = self.board.matriz.copy()
|
72 |
+
avaible_moves = self.board.get_avaible_moves()
|
73 |
+
action_o = self.q_agent.choose_move(state,avaible_moves,piece)
|
74 |
+
i, j = self.board.number_ij(action_o)
|
75 |
+
self.board.move(i,j,piece)
|
76 |
+
|
77 |
+
reward_o = self.board.reward_piece(piece)
|
78 |
+
state_o = (tuple(state.flatten()), action_o, piece)
|
79 |
+
states_o.append(state_o)
|
80 |
+
rewards_o.append(reward_o)
|
81 |
+
w = self.board.check_win()
|
82 |
+
if w != 4:
|
83 |
+
state = self.board.matriz.copy()
|
84 |
+
reward_o = self.board.reward_piece(piece)
|
85 |
+
state_o = (tuple(state.flatten()),-1,piece)# -1 for terminal state
|
86 |
+
states_o.append(state_o)
|
87 |
+
rewards_o.append(reward_o)
|
88 |
+
win_piece = w
|
89 |
+
break
|
90 |
+
|
91 |
+
self.q_agent.update_q_value(states_o,rewards_o)
|
92 |
+
|
93 |
+
return win_piece
|
94 |
+
|
95 |
+
|
96 |
+
def play_ia_vs_ia(self):
|
97 |
+
game_over = False
|
98 |
+
self.board.reset_matrix()
|
99 |
+
ia_x = 1
|
100 |
+
ia_o = 2
|
101 |
+
states_x = []
|
102 |
+
rewards_x = []
|
103 |
+
states_o = []
|
104 |
+
rewards_o = []
|
105 |
+
while not game_over:
|
106 |
+
w = self.board.check_win()
|
107 |
+
if w != 4:
|
108 |
+
win_piece = w
|
109 |
+
break
|
110 |
+
state_x = self.board.matriz.copy()
|
111 |
+
avaible_moves_x = self.board.get_avaible_moves()
|
112 |
+
action_x = self.q_agent.choose_move(state_x,avaible_moves_x,ia_x)
|
113 |
+
i, j = self.board.number_ij(action_x)
|
114 |
+
self.board.move(i,j,ia_x) # x play
|
115 |
+
|
116 |
+
#x state/reward
|
117 |
+
reward_x = self.board.reward_piece(ia_x)
|
118 |
+
state_x = (tuple(state_x.flatten()),action_x,ia_x)
|
119 |
+
states_x.append(state_x)
|
120 |
+
rewards_x.append(reward_x)
|
121 |
+
|
122 |
+
w = self.board.check_win()
|
123 |
+
if w != 4:
|
124 |
+
win_piece = w
|
125 |
+
break
|
126 |
+
|
127 |
+
state_o = self.board.matriz.copy()
|
128 |
+
avaible_moves_o = self.board.get_avaible_moves()
|
129 |
+
action_o = self.q_agent.choose_move(state_o,avaible_moves_o,ia_o)
|
130 |
+
i, j = self.board.number_ij(action_o)
|
131 |
+
self.board.move(i,j,ia_o) # o play
|
132 |
+
|
133 |
+
reward_o = self.board.reward_piece(ia_o)
|
134 |
+
state_o = (tuple(state_o.flatten()),action_o,ia_o)
|
135 |
+
states_o.append(state_o)
|
136 |
+
rewards_o.append(reward_o)
|
137 |
+
|
138 |
+
if win_piece == 1:
|
139 |
+
state = self.board.matriz.copy()
|
140 |
+
reward_o = self.board.reward_piece(ia_o)
|
141 |
+
state_o = (tuple(state.flatten()),-1,ia_o)# -1 for terminal state
|
142 |
+
states_o.append(state_o)
|
143 |
+
rewards_o.append(reward_o)
|
144 |
+
elif win_piece == 2:
|
145 |
+
state = self.board.matriz.copy()
|
146 |
+
reward_x = self.board.reward_piece(ia_x)
|
147 |
+
state_x = (tuple(state.flatten()),-1,ia_x)# -1 for terminal state
|
148 |
+
states_x.append(state_x)
|
149 |
+
rewards_x.append(reward_x)
|
150 |
+
|
151 |
+
|
152 |
+
self.q_agent.update_q_value(states_x,rewards_x)
|
153 |
+
self.q_agent.update_q_value(states_o,rewards_o)
|
154 |
+
|
155 |
+
return win_piece
|
156 |
+
|
157 |
+
|
158 |
+
def run(self, n):
|
159 |
+
wins_x = []
|
160 |
+
wins_o = []
|
161 |
+
wins_ia = []
|
162 |
+
if self.show_stats:
|
163 |
+
print(f'Playing {n} games with X')
|
164 |
+
for i in tqdm.tqdm(range(0,n)):
|
165 |
+
wins_x.append(self.play_one_game(piece=1))
|
166 |
+
|
167 |
+
print(f'Playing {n} games with O')
|
168 |
+
for i in tqdm.tqdm(range(0,n)):
|
169 |
+
wins_o.append(self.play_one_game(piece=2))
|
170 |
+
|
171 |
+
print(f'Playing {n} games ia vs ia')
|
172 |
+
for i in tqdm.tqdm(range(0,n)):
|
173 |
+
wins_ia.append(self.play_ia_vs_ia())
|
174 |
+
else:
|
175 |
+
for i in range(0,n):
|
176 |
+
wins_x.append(self.play_one_game(piece=1))
|
177 |
+
|
178 |
+
for i in range(0,n):
|
179 |
+
wins_o.append(self.play_one_game(piece=2))
|
180 |
+
|
181 |
+
for i in range(0,n):
|
182 |
+
wins_ia.append(self.play_ia_vs_ia())
|
183 |
+
|
184 |
+
return wins_x,wins_o, wins_ia
|
classes/game.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tqdm
|
2 |
+
import random
|
3 |
+
from classes.game_model import tic_tac_toe_model
|
4 |
+
from classes.Qlearningagent import QlearningAgent
|
5 |
+
|
6 |
+
class Game():
|
7 |
+
def __init__(self, tic_tac_toe: tic_tac_toe_model, q_agent: QlearningAgent):
|
8 |
+
self.board = tic_tac_toe
|
9 |
+
self.q_agent = q_agent
|
10 |
+
|
11 |
+
def ia_vs_ia(self):
|
12 |
+
game_over = False
|
13 |
+
self.board.reset_matrix()
|
14 |
+
while not game_over:
|
15 |
+
w = self.board.check_win()
|
16 |
+
if w != 4:
|
17 |
+
win_piece = w
|
18 |
+
break
|
19 |
+
state = self.board.matriz.copy()
|
20 |
+
avaible_moves = self.board.get_avaible_moves()
|
21 |
+
action = self.q_agent.choose_move(state,avaible_moves,1)
|
22 |
+
i, j = self.board.number_ij(action)
|
23 |
+
self.board.move(i,j,1)
|
24 |
+
if w != 4:
|
25 |
+
win_piece = w
|
26 |
+
break
|
27 |
+
|
28 |
+
state = self.board.matriz.copy()
|
29 |
+
avaible_moves = self.board.get_avaible_moves()
|
30 |
+
action = self.q_agent.choose_move(state,avaible_moves,2)
|
31 |
+
i, j = self.board.number_ij(action)
|
32 |
+
self.board.move(i,j,2)
|
33 |
+
|
34 |
+
|
35 |
+
return win_piece
|
36 |
+
|
37 |
+
def run_ia_vs_ia(self,n):
|
38 |
+
games = []
|
39 |
+
for i in tqdm.tqdm(range(0,n)):
|
40 |
+
w = self.ia_vs_ia()
|
41 |
+
games.append(w)
|
42 |
+
return games
|
43 |
+
|
44 |
+
def ia_vs_user(self):
|
45 |
+
print('Menu')
|
46 |
+
print("1 - Iniciar aleatorio")
|
47 |
+
print("2 - Escolher peça [X-O]")
|
48 |
+
m1 = int(input())
|
49 |
+
while m1 != 1 and m1 !=2:
|
50 |
+
print('Insira um valor valido 1-2')
|
51 |
+
print("1 - Iniciar aleatorio")
|
52 |
+
print("2 - Escolher peça [X-O]")
|
53 |
+
m1 = int(input())
|
54 |
+
|
55 |
+
game_over = False
|
56 |
+
self.board.reset_matrix()
|
57 |
+
pieces = [1,2]
|
58 |
+
win_piece = 0
|
59 |
+
if m1 == 1 :
|
60 |
+
user = random.choice(pieces)
|
61 |
+
else:
|
62 |
+
print('1 - X \n 2 - O')
|
63 |
+
user = int(input())
|
64 |
+
while user != 1 and user !=2:
|
65 |
+
print("Insira um valor válido 1-2")
|
66 |
+
print('1 - X\n2 - O')
|
67 |
+
user = int(input())
|
68 |
+
|
69 |
+
|
70 |
+
ia = 2 if user == 1 else 1
|
71 |
+
print("Começo de jogo")
|
72 |
+
self.board.print_game()
|
73 |
+
if ia == 1:
|
74 |
+
while not game_over:
|
75 |
+
state = self.board.matriz.copy()
|
76 |
+
avaible_moves = self.board.get_avaible_moves()
|
77 |
+
action = self.q_agent.choose_move(state,avaible_moves,ia)
|
78 |
+
i, j = self.board.number_ij(action)
|
79 |
+
print('Jogada da IA - X')
|
80 |
+
self.board.move(i,j,ia)
|
81 |
+
self.board.print_game()
|
82 |
+
w = self.board.check_win()
|
83 |
+
if w != 4:
|
84 |
+
win_piece = w
|
85 |
+
break
|
86 |
+
print("Jogue Usuario - O [Numero da linha][Numero da coluna]")
|
87 |
+
ml,mv = int(input()), int(input())
|
88 |
+
self.board.move(ml,mv,user)
|
89 |
+
self.board.move(i,j,user)
|
90 |
+
self.board.print_game()
|
91 |
+
w = self.board.check_win()
|
92 |
+
if w != 4:
|
93 |
+
win_piece = w
|
94 |
+
break
|
95 |
+
|
96 |
+
else:
|
97 |
+
while not game_over:
|
98 |
+
print('Jogue Usuario - X [Numero da linha][Numero da coluna]')
|
99 |
+
ml,mv = int(input()), int(input())
|
100 |
+
|
101 |
+
self.board.move(ml,mv,user)
|
102 |
+
self.board.print_game()
|
103 |
+
w = self.board.check_win()
|
104 |
+
if w != 4:
|
105 |
+
win_piece = w
|
106 |
+
break
|
107 |
+
|
108 |
+
print('Jogada da IA - O')
|
109 |
+
state = self.board.matriz.copy()
|
110 |
+
avaible_moves = self.board.get_avaible_moves()
|
111 |
+
action = self.q_agent.choose_move(state,avaible_moves,ia)
|
112 |
+
i, j = self.board.number_ij(action)
|
113 |
+
self.board.move(i,j,ia)
|
114 |
+
w = self.board.check_win()
|
115 |
+
|
116 |
+
self.board.print_game()
|
117 |
+
|
118 |
+
if w != 4:
|
119 |
+
win_piece = w
|
120 |
+
break
|
121 |
+
|
122 |
+
|
123 |
+
if win_piece == ia :
|
124 |
+
print("IA venceu Humano Fraco")
|
125 |
+
elif win_piece == user :
|
126 |
+
print("Humano venceu, esta preparado para a revolução?")
|
127 |
+
else:
|
128 |
+
print('Deu velha, mas a I.A segue aprendendo e melhorando e você?')
|
129 |
+
return win_piece
|
130 |
+
|
classes/game_model.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
class tic_tac_toe_model():
|
5 |
+
def __init__(self, n):
|
6 |
+
self.n = n
|
7 |
+
self.matriz = np.full((n,n),0,dtype=int)
|
8 |
+
|
9 |
+
def reset_matrix(self):
|
10 |
+
self.matriz = np.full((self.n,self.n),0,dtype=int)
|
11 |
+
|
12 |
+
def print_game(self):
|
13 |
+
matriz = self.matriz
|
14 |
+
substituicoes = {1: "X", 2: "O", 0:' '}
|
15 |
+
|
16 |
+
cabecalho = [str(i) for i in range(0,self.n)]
|
17 |
+
print(' ', ' | '.join(cabecalho))
|
18 |
+
for i,linha in enumerate(matriz):
|
19 |
+
linha_formatada = [str(substituicoes.get(valor, valor)) for valor in linha]
|
20 |
+
print(i, " | ".join(linha_formatada))
|
21 |
+
print("-" * 12)
|
22 |
+
|
23 |
+
|
24 |
+
def number_ij(self, number):
|
25 |
+
i,j = np.unravel_index(number, self.matriz.shape)
|
26 |
+
return i,j
|
27 |
+
|
28 |
+
|
29 |
+
def get_avaible_moves(self):
|
30 |
+
avaible_moves = np.ravel_multi_index(np.where(self.matriz == 0), self.matriz.shape)
|
31 |
+
return list(avaible_moves)
|
32 |
+
|
33 |
+
|
34 |
+
def get_random_move(self, piece):
|
35 |
+
possible_move_i, possible_move_j = np.where(self.matriz == 0)
|
36 |
+
if possible_move_j.shape[0] > 0: # Verifica se há movimentos possíveis
|
37 |
+
index = random.randint(0, possible_move_j.shape[0] - 1) # Correção aqui
|
38 |
+
self.move(possible_move_i[index], possible_move_j[index], piece)
|
39 |
+
|
40 |
+
|
41 |
+
def move(self,index_i,index_j,piece):
|
42 |
+
if self.matriz[index_i][index_j] == 0:
|
43 |
+
self.matriz[index_i][index_j] = piece
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
def reward_piece(self,piece):
|
48 |
+
w = self.check_win()
|
49 |
+
if w != 4 :
|
50 |
+
if w!=3:
|
51 |
+
if w == piece:
|
52 |
+
return 1
|
53 |
+
else:
|
54 |
+
return -1
|
55 |
+
|
56 |
+
return 0
|
57 |
+
|
58 |
+
def check_win(self):
|
59 |
+
state = False
|
60 |
+
win_piece = -1
|
61 |
+
value_counts_diagonal = np.unique(self.matriz.diagonal())
|
62 |
+
value_counts_diagonal2 = np.unique(np.fliplr(self.matriz).diagonal())
|
63 |
+
if value_counts_diagonal.shape[0] == 1 and value_counts_diagonal[0] !=0:
|
64 |
+
state=True
|
65 |
+
win_piece = value_counts_diagonal[0]
|
66 |
+
return win_piece
|
67 |
+
if value_counts_diagonal2.shape[0] == 1 and value_counts_diagonal2[0] !=0:
|
68 |
+
state=True
|
69 |
+
win_piece = value_counts_diagonal2[0]
|
70 |
+
return win_piece
|
71 |
+
|
72 |
+
for i in range(0,self.n):
|
73 |
+
value_counts_linha = np.unique(self.matriz[i,:])
|
74 |
+
value_counts_coluna = np.unique(self.matriz[:,i])
|
75 |
+
|
76 |
+
if value_counts_linha.shape[0] == 1 and value_counts_linha[0] != 0 :
|
77 |
+
state=True
|
78 |
+
win_piece = value_counts_linha[0]
|
79 |
+
break
|
80 |
+
if value_counts_coluna.shape[0] == 1 and value_counts_coluna[0] != 0:
|
81 |
+
state=True
|
82 |
+
win_piece = value_counts_coluna[0]
|
83 |
+
break
|
84 |
+
|
85 |
+
velha = np.where(self.matriz == 0)
|
86 |
+
|
87 |
+
if state:
|
88 |
+
return win_piece
|
89 |
+
if velha[0].shape[0] == 0:
|
90 |
+
return 3
|
91 |
+
else:
|
92 |
+
return 4
|
gradio_game.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
import numpy as np
|
4 |
+
import time
|
5 |
+
import argparse
|
6 |
+
from classes.game_model import tic_tac_toe_model
|
7 |
+
from classes.Qlearningagent import QlearningAgent
|
8 |
+
|
9 |
+
|
10 |
+
custom_css = """
|
11 |
+
.gradio-button {
|
12 |
+
width: 75px; /* Defina a largura desejada */
|
13 |
+
height: 75px; /* Defina a altura desejada */
|
14 |
+
background-color: blue; /* Cor de fundo azul */
|
15 |
+
color: white; /* Cor do texto (branco neste caso) */
|
16 |
+
border: none; /* Remove a borda */
|
17 |
+
text-align: center; /* Centraliza o texto horizontalmente */
|
18 |
+
text-decoration: none; /* Remove a decoração do texto */
|
19 |
+
display: inline-block; /* Permite definir largura e altura */
|
20 |
+
font-size: 16px; /* Tamanho do texto */
|
21 |
+
line-height: 75px; /* Centraliza o texto verticalmente */
|
22 |
+
}
|
23 |
+
|
24 |
+
"""
|
25 |
+
|
26 |
+
usr = -2
|
27 |
+
ia = -1
|
28 |
+
game_over = False
|
29 |
+
temp = -2
|
30 |
+
|
31 |
+
def main():
|
32 |
+
|
33 |
+
def usr_move(position,value):
|
34 |
+
global game_over
|
35 |
+
if value == '' and game_over == False:
|
36 |
+
usr_piece = 'X' if usr == 1 else 'O'
|
37 |
+
i,j = np.unravel_index(int(position)-1, shape=(3,3))
|
38 |
+
board.move(i,j,usr)
|
39 |
+
board.print_game()
|
40 |
+
w = board.check_win()
|
41 |
+
|
42 |
+
global temp
|
43 |
+
temp-=1
|
44 |
+
return gr.update(size='lg', scale = 0, min_width = 100, value=usr_piece, interactive=True), gr.Number(value=temp, visible=False), gr.Number(value=w, visible=False)
|
45 |
+
else:
|
46 |
+
return gr.update(size='lg', scale = 0, min_width = 100, interactive=True), gr.update(value=temp, visible=False),gr.update(visible=False)
|
47 |
+
|
48 |
+
|
49 |
+
def ia_move():
|
50 |
+
global game_over
|
51 |
+
if game_over == False:
|
52 |
+
state_board = board.matriz.copy()
|
53 |
+
avaible_moves = board.get_avaible_moves()
|
54 |
+
action = q_agent.choose_move(state_board,avaible_moves,ia)
|
55 |
+
i, j = board.number_ij(action)
|
56 |
+
ia_piece = 'X' if ia == 1 else 'O'
|
57 |
+
board.move(i,j,ia)
|
58 |
+
board.print_game()
|
59 |
+
w = board.check_win()
|
60 |
+
#buttons
|
61 |
+
retornos_btn = [gr.Button(size='lg', scale = 0, min_width = 100, interactive=True)]*(action+1)
|
62 |
+
retornos_btn[-1] = gr.Button(size='lg', scale = 0, min_width = 100, interactive=True, value=ia_piece)
|
63 |
+
retornos_btn.extend([gr.Button(size='lg', scale = 0, min_width = 100, interactive=True)]*(9 - action-1))
|
64 |
+
retornos_btn.append(gr.Number(value=w, visible=False))
|
65 |
+
return retornos_btn
|
66 |
+
else:
|
67 |
+
retornos_btn = [gr.Button(size='lg', scale = 0, min_width = 100, interactive=True)]*9
|
68 |
+
retornos_btn.append(gr.Number(visible=False))
|
69 |
+
return retornos_btn
|
70 |
+
|
71 |
+
|
72 |
+
def set_col_visible(selected):
|
73 |
+
global usr
|
74 |
+
global game_over
|
75 |
+
game_over = False
|
76 |
+
usr = 1 if selected == 'X' else 2
|
77 |
+
global ia
|
78 |
+
ia = 2 if usr == 1 else 1
|
79 |
+
retorn_buttons = [gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=True)]*9
|
80 |
+
if selected == 'X':
|
81 |
+
retorn_buttons.append(gr.Number(value=temp, visible=False))
|
82 |
+
|
83 |
+
else:
|
84 |
+
retorn_buttons = ia_move()
|
85 |
+
|
86 |
+
return retorn_buttons
|
87 |
+
|
88 |
+
|
89 |
+
def reset():
|
90 |
+
board.reset_matrix()
|
91 |
+
buttons = [gr.Button(size='lg', scale = 0, min_width = 100,elem_classes='gradio-button',interactive=False)]*9
|
92 |
+
return buttons
|
93 |
+
|
94 |
+
|
95 |
+
def check(n):
|
96 |
+
msg = ''
|
97 |
+
global game_over
|
98 |
+
if n == 1:
|
99 |
+
game_over = True
|
100 |
+
msg = 'O jogador X ganhou'
|
101 |
+
board.reset_matrix()
|
102 |
+
|
103 |
+
elif n == 2:
|
104 |
+
game_over = True
|
105 |
+
msg = 'O jogador O ganhou'
|
106 |
+
board.reset_matrix()
|
107 |
+
|
108 |
+
elif n == 3 :
|
109 |
+
game_over = True
|
110 |
+
msg = 'VELHA'
|
111 |
+
board.reset_matrix()
|
112 |
+
|
113 |
+
print('check',game_over)
|
114 |
+
return gr.update(value=msg, show_label=False)
|
115 |
+
|
116 |
+
parser = argparse.ArgumentParser()
|
117 |
+
parser.add_argument('--file_name', type=str, required=False, default='models/q_agent-0.9ep-0.6ap-0.9-1000r.json')
|
118 |
+
args = parser.parse_args()
|
119 |
+
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
120 |
+
q_agent= QlearningAgent(epsilon=0,alpha=0,discount_factor=0, train=False)
|
121 |
+
q_agent.load_agent_dict(args.file_name)
|
122 |
+
board = tic_tac_toe_model(3)
|
123 |
+
with gr.Row():
|
124 |
+
with gr.Column() as x:
|
125 |
+
title = gr.Text(value='Escolha uma peça', show_label=False)
|
126 |
+
with gr.Row():
|
127 |
+
p1 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
128 |
+
p2 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
129 |
+
p3 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
130 |
+
with gr.Row():
|
131 |
+
p4 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
132 |
+
p5 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
133 |
+
p6 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
134 |
+
with gr.Row():
|
135 |
+
p7 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
136 |
+
p8 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
137 |
+
p9 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
|
138 |
+
|
139 |
+
n1 = gr.Number(value="1", visible=False)
|
140 |
+
n2 = gr.Number(value="2", visible=False)
|
141 |
+
n3 = gr.Number(value="3", visible=False)
|
142 |
+
n4 = gr.Number(value="4", visible=False)
|
143 |
+
n5 = gr.Number(value="5", visible=False)
|
144 |
+
n6 = gr.Number(value="6", visible=False)
|
145 |
+
n7 = gr.Number(value="7", visible=False)
|
146 |
+
n8 = gr.Number(value="8", visible=False)
|
147 |
+
n9 = gr.Number(value="9", visible=False)
|
148 |
+
temp_number = gr.Number(value="10", visible=False)
|
149 |
+
win_number = gr.Number(value="4", visible=False)
|
150 |
+
game_state = gr.Number(value="0", visible=False)
|
151 |
+
|
152 |
+
with gr.Column():
|
153 |
+
c1 = gr.Radio( ['X','O'],label='Jogar como')
|
154 |
+
clear_button = gr.Button(value='Resetar')
|
155 |
+
|
156 |
+
|
157 |
+
buttons = [p1, p2, p3, p4, p5, p6, p7, p8, p9,win_number]
|
158 |
+
renders_components = [p1, p2, p3, p4, p5, p6, p7, p8, p9, win_number]
|
159 |
+
game_components = [p1, p2, p3, p4, p5, p6, p7, p8, p9, title,temp_number,win_number, c1]
|
160 |
+
only_buttons = [p1, p2, p3, p4, p5, p6, p7, p8, p9]
|
161 |
+
p1.click(fn=usr_move,inputs=[n1,p1], outputs = [p1,temp_number,win_number])
|
162 |
+
p2.click(fn=usr_move,inputs=[n2,p2], outputs = [p2,temp_number,win_number])
|
163 |
+
p3.click(fn=usr_move,inputs=[n3,p3], outputs = [p3,temp_number,win_number])
|
164 |
+
p4.click(fn=usr_move,inputs=[n4,p4], outputs = [p4,temp_number,win_number])
|
165 |
+
p5.click(fn=usr_move,inputs=[n5,p5], outputs = [p5,temp_number,win_number])
|
166 |
+
p6.click(fn=usr_move,inputs=[n6,p6], outputs = [p6,temp_number,win_number])
|
167 |
+
p7.click(fn=usr_move,inputs=[n7,p7], outputs = [p7,temp_number,win_number])
|
168 |
+
p8.click(fn=usr_move,inputs=[n8,p8], outputs = [p8,temp_number,win_number])
|
169 |
+
p9.click(fn=usr_move,inputs=[n9,p9], outputs = [p9,temp_number,win_number])
|
170 |
+
|
171 |
+
|
172 |
+
def update_buttons():
|
173 |
+
global game_over
|
174 |
+
game_over = True
|
175 |
+
return {p1: '', p2:'', p3:'', p4:'', p5:'', p6:'', p7:'', p8:'', p9:'',
|
176 |
+
title:'Escolha uma peça',temp_number:10, win_number:4, c1: ''}
|
177 |
+
|
178 |
+
|
179 |
+
|
180 |
+
c1.select(fn=set_col_visible, inputs= c1, outputs=buttons)
|
181 |
+
|
182 |
+
|
183 |
+
temp_number.change(fn=ia_move,outputs=renders_components)
|
184 |
+
win_number.change(fn=check, inputs=win_number, outputs=title)
|
185 |
+
clear_button.click(fn=update_buttons,outputs=game_components)
|
186 |
+
clear_button.click(fn=reset,outputs=only_buttons)
|
187 |
+
|
188 |
+
|
189 |
+
|
190 |
+
demo.launch()
|
191 |
+
|
192 |
+
if __name__ == '__main__':
|
193 |
+
main()
|
194 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.4.1
|
2 |
+
numpy==1.26.2
|
3 |
+
tqdm==4.66.1
|