Gustking commited on
Commit
b3accf9
·
1 Parent(s): 3f29880

Upload 8 files

Browse files
classes/Qlearningagent.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import json
3
+ import ast
4
+
5
+ class QlearningAgent():
6
+ def __init__(self, epsilon,alpha ,discount_factor, train):
7
+ self.q_table = {}
8
+ self.epsilon = epsilon
9
+ self.alpha = alpha
10
+ self.discount_factor = discount_factor
11
+ self.train = train
12
+
13
+ def save_agent_dict(self, file_name):
14
+ # Convert tuple keys to strings
15
+ q_table_str_keys = {str(key): value for key, value in self.q_table.items()}
16
+
17
+ with open(file_name, 'w') as file_json:
18
+ json.dump(q_table_str_keys, file_json)
19
+
20
+
21
+ def load_agent_dict(self, file_name):
22
+ try:
23
+ with open(file_name, 'r') as file_json:
24
+ json_data = json.load(file_json)
25
+ # Convert string keys back to tuples
26
+ q_table = {ast.literal_eval(key): value for key, value in json_data.items()}
27
+
28
+ self.q_table = q_table
29
+ print("Q-table loaded successfully.")
30
+ except FileNotFoundError:
31
+ print(f"File '{file_name}' not found. Q-table not loaded.")
32
+
33
+
34
+ def get_q_value(self, state, action, piece):
35
+ state_tuple = tuple(state.flatten())
36
+
37
+ if (state_tuple,action,piece) not in self.q_table:
38
+ self.q_table[(state_tuple, action, piece)] = 0.0
39
+
40
+
41
+ return self.q_table[(state_tuple, action, piece)]
42
+
43
+
44
+ def choose_move(self, state, available_moves, piece):
45
+ q_values = []
46
+ for action in available_moves:
47
+ q_values.append(self.get_q_value(state, action, piece))
48
+
49
+ if random.uniform(0, 1) < self.epsilon and self.train:
50
+ return random.choice(available_moves)
51
+ else:
52
+ max_q_value = max(q_values)
53
+ if q_values.count(max_q_value) > 1:
54
+ best_moves = [i for i in range(len(available_moves)) if q_values[i] == max_q_value]
55
+ i = random.choice(best_moves)
56
+ else:
57
+ i = q_values.index(max_q_value)
58
+ return available_moves[i]
59
+
60
+
61
+ def update_q_value(self, states, rewards):
62
+ #estado atual + alpha[retorno estado atual + ymax(proximo_estado) - estado atual]
63
+ for i,state in enumerate(states):
64
+ rt = 0
65
+ if state not in self.q_table.keys():
66
+ self.q_table[state] = 0.0
67
+
68
+
69
+ for ii in range(0,len(rewards)):
70
+ rt+= rewards[ii] * (self.discount_factor ** (ii-i))
71
+
72
+ if i == len(states)-1:
73
+ next_reward = 0
74
+ else:
75
+ next_reward = rewards[i+1]
76
+
77
+ q_formula = self.q_table[state] + (self.alpha*(rewards[i] + self.discount_factor*(next_reward) - self.q_table[state]))
78
+ self.q_table[state] = q_formula
79
+
classes/__pycache__/Qlearningagent.cpython-39.pyc ADDED
Binary file (2.97 kB). View file
 
classes/__pycache__/game_model.cpython-39.pyc ADDED
Binary file (3.32 kB). View file
 
classes/environment.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tqdm
2
+ from classes.game_model import tic_tac_toe_model
3
+ from classes.Qlearningagent import QlearningAgent
4
+
5
+ class environment():
6
+ def __init__(self, tic_tac_toe: tic_tac_toe_model, q_agent: QlearningAgent, train: bool, show_stats: bool):
7
+ self.board = tic_tac_toe
8
+ self.q_agent = q_agent
9
+ self.train = train
10
+ self.show_stats = show_stats
11
+
12
+ def play_one_game(self, piece):
13
+ game_over = False
14
+ win_piece = 0
15
+ self.board.reset_matrix()
16
+ if piece == 1:
17
+ piece_enemy = 2
18
+ states_x = []
19
+ rewards_x = []
20
+ while not game_over:
21
+ w = self.board.check_win()
22
+ if w != 4:
23
+ state = self.board.matriz.copy()
24
+ reward_x = self.board.reward_piece(piece)
25
+ state_x = (tuple(state.flatten()),-1,piece)# -1 for terminal state
26
+ states_x.append(state_x)
27
+ rewards_x.append(reward_x)
28
+ win_piece = w
29
+ break
30
+
31
+ #x move and state/reward/move dynamic
32
+ state = self.board.matriz.copy()
33
+ avaible_moves = self.board.get_avaible_moves()
34
+ action_x = self.q_agent.choose_move(state,avaible_moves,piece)
35
+ i, j = self.board.number_ij(action_x)
36
+ self.board.move(i,j,piece)
37
+ reward_x = self.board.reward_piece(piece)
38
+ state_x = (tuple(state.flatten()),action_x,piece)
39
+ states_x.append(state_x)
40
+ rewards_x.append(reward_x)
41
+ w = self.board.check_win()
42
+ if w != 4:
43
+ state = self.board.matriz.copy()
44
+ reward_x = self.board.reward_piece(piece)
45
+ state_x = (tuple(state.flatten()),-1,piece)# -1 for terminal state
46
+ states_x.append(state_x)
47
+ rewards_x.append(reward_x)
48
+ win_piece = w
49
+ break
50
+ self.board.get_random_move(piece_enemy)
51
+
52
+ self.q_agent.update_q_value(states_x,rewards_x)
53
+
54
+ return win_piece
55
+ else:
56
+ piece_enemy = 1
57
+ states_o = []
58
+ rewards_o = []
59
+ while not game_over:
60
+ self.board.get_random_move(piece_enemy)
61
+
62
+ w = self.board.check_win()
63
+ if w != 4:
64
+ state = self.board.matriz.copy()
65
+ reward_o = self.board.reward_piece(piece)
66
+ state_o = (tuple(state.flatten()),-1,piece)# -1 for terminal state
67
+ states_o.append(state_o)
68
+ rewards_o.append(reward_o)
69
+ win_piece = w
70
+ break
71
+ state = self.board.matriz.copy()
72
+ avaible_moves = self.board.get_avaible_moves()
73
+ action_o = self.q_agent.choose_move(state,avaible_moves,piece)
74
+ i, j = self.board.number_ij(action_o)
75
+ self.board.move(i,j,piece)
76
+
77
+ reward_o = self.board.reward_piece(piece)
78
+ state_o = (tuple(state.flatten()), action_o, piece)
79
+ states_o.append(state_o)
80
+ rewards_o.append(reward_o)
81
+ w = self.board.check_win()
82
+ if w != 4:
83
+ state = self.board.matriz.copy()
84
+ reward_o = self.board.reward_piece(piece)
85
+ state_o = (tuple(state.flatten()),-1,piece)# -1 for terminal state
86
+ states_o.append(state_o)
87
+ rewards_o.append(reward_o)
88
+ win_piece = w
89
+ break
90
+
91
+ self.q_agent.update_q_value(states_o,rewards_o)
92
+
93
+ return win_piece
94
+
95
+
96
+ def play_ia_vs_ia(self):
97
+ game_over = False
98
+ self.board.reset_matrix()
99
+ ia_x = 1
100
+ ia_o = 2
101
+ states_x = []
102
+ rewards_x = []
103
+ states_o = []
104
+ rewards_o = []
105
+ while not game_over:
106
+ w = self.board.check_win()
107
+ if w != 4:
108
+ win_piece = w
109
+ break
110
+ state_x = self.board.matriz.copy()
111
+ avaible_moves_x = self.board.get_avaible_moves()
112
+ action_x = self.q_agent.choose_move(state_x,avaible_moves_x,ia_x)
113
+ i, j = self.board.number_ij(action_x)
114
+ self.board.move(i,j,ia_x) # x play
115
+
116
+ #x state/reward
117
+ reward_x = self.board.reward_piece(ia_x)
118
+ state_x = (tuple(state_x.flatten()),action_x,ia_x)
119
+ states_x.append(state_x)
120
+ rewards_x.append(reward_x)
121
+
122
+ w = self.board.check_win()
123
+ if w != 4:
124
+ win_piece = w
125
+ break
126
+
127
+ state_o = self.board.matriz.copy()
128
+ avaible_moves_o = self.board.get_avaible_moves()
129
+ action_o = self.q_agent.choose_move(state_o,avaible_moves_o,ia_o)
130
+ i, j = self.board.number_ij(action_o)
131
+ self.board.move(i,j,ia_o) # o play
132
+
133
+ reward_o = self.board.reward_piece(ia_o)
134
+ state_o = (tuple(state_o.flatten()),action_o,ia_o)
135
+ states_o.append(state_o)
136
+ rewards_o.append(reward_o)
137
+
138
+ if win_piece == 1:
139
+ state = self.board.matriz.copy()
140
+ reward_o = self.board.reward_piece(ia_o)
141
+ state_o = (tuple(state.flatten()),-1,ia_o)# -1 for terminal state
142
+ states_o.append(state_o)
143
+ rewards_o.append(reward_o)
144
+ elif win_piece == 2:
145
+ state = self.board.matriz.copy()
146
+ reward_x = self.board.reward_piece(ia_x)
147
+ state_x = (tuple(state.flatten()),-1,ia_x)# -1 for terminal state
148
+ states_x.append(state_x)
149
+ rewards_x.append(reward_x)
150
+
151
+
152
+ self.q_agent.update_q_value(states_x,rewards_x)
153
+ self.q_agent.update_q_value(states_o,rewards_o)
154
+
155
+ return win_piece
156
+
157
+
158
+ def run(self, n):
159
+ wins_x = []
160
+ wins_o = []
161
+ wins_ia = []
162
+ if self.show_stats:
163
+ print(f'Playing {n} games with X')
164
+ for i in tqdm.tqdm(range(0,n)):
165
+ wins_x.append(self.play_one_game(piece=1))
166
+
167
+ print(f'Playing {n} games with O')
168
+ for i in tqdm.tqdm(range(0,n)):
169
+ wins_o.append(self.play_one_game(piece=2))
170
+
171
+ print(f'Playing {n} games ia vs ia')
172
+ for i in tqdm.tqdm(range(0,n)):
173
+ wins_ia.append(self.play_ia_vs_ia())
174
+ else:
175
+ for i in range(0,n):
176
+ wins_x.append(self.play_one_game(piece=1))
177
+
178
+ for i in range(0,n):
179
+ wins_o.append(self.play_one_game(piece=2))
180
+
181
+ for i in range(0,n):
182
+ wins_ia.append(self.play_ia_vs_ia())
183
+
184
+ return wins_x,wins_o, wins_ia
classes/game.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tqdm
2
+ import random
3
+ from classes.game_model import tic_tac_toe_model
4
+ from classes.Qlearningagent import QlearningAgent
5
+
6
+ class Game():
7
+ def __init__(self, tic_tac_toe: tic_tac_toe_model, q_agent: QlearningAgent):
8
+ self.board = tic_tac_toe
9
+ self.q_agent = q_agent
10
+
11
+ def ia_vs_ia(self):
12
+ game_over = False
13
+ self.board.reset_matrix()
14
+ while not game_over:
15
+ w = self.board.check_win()
16
+ if w != 4:
17
+ win_piece = w
18
+ break
19
+ state = self.board.matriz.copy()
20
+ avaible_moves = self.board.get_avaible_moves()
21
+ action = self.q_agent.choose_move(state,avaible_moves,1)
22
+ i, j = self.board.number_ij(action)
23
+ self.board.move(i,j,1)
24
+ if w != 4:
25
+ win_piece = w
26
+ break
27
+
28
+ state = self.board.matriz.copy()
29
+ avaible_moves = self.board.get_avaible_moves()
30
+ action = self.q_agent.choose_move(state,avaible_moves,2)
31
+ i, j = self.board.number_ij(action)
32
+ self.board.move(i,j,2)
33
+
34
+
35
+ return win_piece
36
+
37
+ def run_ia_vs_ia(self,n):
38
+ games = []
39
+ for i in tqdm.tqdm(range(0,n)):
40
+ w = self.ia_vs_ia()
41
+ games.append(w)
42
+ return games
43
+
44
+ def ia_vs_user(self):
45
+ print('Menu')
46
+ print("1 - Iniciar aleatorio")
47
+ print("2 - Escolher peça [X-O]")
48
+ m1 = int(input())
49
+ while m1 != 1 and m1 !=2:
50
+ print('Insira um valor valido 1-2')
51
+ print("1 - Iniciar aleatorio")
52
+ print("2 - Escolher peça [X-O]")
53
+ m1 = int(input())
54
+
55
+ game_over = False
56
+ self.board.reset_matrix()
57
+ pieces = [1,2]
58
+ win_piece = 0
59
+ if m1 == 1 :
60
+ user = random.choice(pieces)
61
+ else:
62
+ print('1 - X \n 2 - O')
63
+ user = int(input())
64
+ while user != 1 and user !=2:
65
+ print("Insira um valor válido 1-2")
66
+ print('1 - X\n2 - O')
67
+ user = int(input())
68
+
69
+
70
+ ia = 2 if user == 1 else 1
71
+ print("Começo de jogo")
72
+ self.board.print_game()
73
+ if ia == 1:
74
+ while not game_over:
75
+ state = self.board.matriz.copy()
76
+ avaible_moves = self.board.get_avaible_moves()
77
+ action = self.q_agent.choose_move(state,avaible_moves,ia)
78
+ i, j = self.board.number_ij(action)
79
+ print('Jogada da IA - X')
80
+ self.board.move(i,j,ia)
81
+ self.board.print_game()
82
+ w = self.board.check_win()
83
+ if w != 4:
84
+ win_piece = w
85
+ break
86
+ print("Jogue Usuario - O [Numero da linha][Numero da coluna]")
87
+ ml,mv = int(input()), int(input())
88
+ self.board.move(ml,mv,user)
89
+ self.board.move(i,j,user)
90
+ self.board.print_game()
91
+ w = self.board.check_win()
92
+ if w != 4:
93
+ win_piece = w
94
+ break
95
+
96
+ else:
97
+ while not game_over:
98
+ print('Jogue Usuario - X [Numero da linha][Numero da coluna]')
99
+ ml,mv = int(input()), int(input())
100
+
101
+ self.board.move(ml,mv,user)
102
+ self.board.print_game()
103
+ w = self.board.check_win()
104
+ if w != 4:
105
+ win_piece = w
106
+ break
107
+
108
+ print('Jogada da IA - O')
109
+ state = self.board.matriz.copy()
110
+ avaible_moves = self.board.get_avaible_moves()
111
+ action = self.q_agent.choose_move(state,avaible_moves,ia)
112
+ i, j = self.board.number_ij(action)
113
+ self.board.move(i,j,ia)
114
+ w = self.board.check_win()
115
+
116
+ self.board.print_game()
117
+
118
+ if w != 4:
119
+ win_piece = w
120
+ break
121
+
122
+
123
+ if win_piece == ia :
124
+ print("IA venceu Humano Fraco")
125
+ elif win_piece == user :
126
+ print("Humano venceu, esta preparado para a revolução?")
127
+ else:
128
+ print('Deu velha, mas a I.A segue aprendendo e melhorando e você?')
129
+ return win_piece
130
+
classes/game_model.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+
4
+ class tic_tac_toe_model():
5
+ def __init__(self, n):
6
+ self.n = n
7
+ self.matriz = np.full((n,n),0,dtype=int)
8
+
9
+ def reset_matrix(self):
10
+ self.matriz = np.full((self.n,self.n),0,dtype=int)
11
+
12
+ def print_game(self):
13
+ matriz = self.matriz
14
+ substituicoes = {1: "X", 2: "O", 0:' '}
15
+
16
+ cabecalho = [str(i) for i in range(0,self.n)]
17
+ print(' ', ' | '.join(cabecalho))
18
+ for i,linha in enumerate(matriz):
19
+ linha_formatada = [str(substituicoes.get(valor, valor)) for valor in linha]
20
+ print(i, " | ".join(linha_formatada))
21
+ print("-" * 12)
22
+
23
+
24
+ def number_ij(self, number):
25
+ i,j = np.unravel_index(number, self.matriz.shape)
26
+ return i,j
27
+
28
+
29
+ def get_avaible_moves(self):
30
+ avaible_moves = np.ravel_multi_index(np.where(self.matriz == 0), self.matriz.shape)
31
+ return list(avaible_moves)
32
+
33
+
34
+ def get_random_move(self, piece):
35
+ possible_move_i, possible_move_j = np.where(self.matriz == 0)
36
+ if possible_move_j.shape[0] > 0: # Verifica se há movimentos possíveis
37
+ index = random.randint(0, possible_move_j.shape[0] - 1) # Correção aqui
38
+ self.move(possible_move_i[index], possible_move_j[index], piece)
39
+
40
+
41
+ def move(self,index_i,index_j,piece):
42
+ if self.matriz[index_i][index_j] == 0:
43
+ self.matriz[index_i][index_j] = piece
44
+
45
+
46
+
47
+ def reward_piece(self,piece):
48
+ w = self.check_win()
49
+ if w != 4 :
50
+ if w!=3:
51
+ if w == piece:
52
+ return 1
53
+ else:
54
+ return -1
55
+
56
+ return 0
57
+
58
+ def check_win(self):
59
+ state = False
60
+ win_piece = -1
61
+ value_counts_diagonal = np.unique(self.matriz.diagonal())
62
+ value_counts_diagonal2 = np.unique(np.fliplr(self.matriz).diagonal())
63
+ if value_counts_diagonal.shape[0] == 1 and value_counts_diagonal[0] !=0:
64
+ state=True
65
+ win_piece = value_counts_diagonal[0]
66
+ return win_piece
67
+ if value_counts_diagonal2.shape[0] == 1 and value_counts_diagonal2[0] !=0:
68
+ state=True
69
+ win_piece = value_counts_diagonal2[0]
70
+ return win_piece
71
+
72
+ for i in range(0,self.n):
73
+ value_counts_linha = np.unique(self.matriz[i,:])
74
+ value_counts_coluna = np.unique(self.matriz[:,i])
75
+
76
+ if value_counts_linha.shape[0] == 1 and value_counts_linha[0] != 0 :
77
+ state=True
78
+ win_piece = value_counts_linha[0]
79
+ break
80
+ if value_counts_coluna.shape[0] == 1 and value_counts_coluna[0] != 0:
81
+ state=True
82
+ win_piece = value_counts_coluna[0]
83
+ break
84
+
85
+ velha = np.where(self.matriz == 0)
86
+
87
+ if state:
88
+ return win_piece
89
+ if velha[0].shape[0] == 0:
90
+ return 3
91
+ else:
92
+ return 4
gradio_game.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import numpy as np
4
+ import time
5
+ import argparse
6
+ from classes.game_model import tic_tac_toe_model
7
+ from classes.Qlearningagent import QlearningAgent
8
+
9
+
10
+ custom_css = """
11
+ .gradio-button {
12
+ width: 75px; /* Defina a largura desejada */
13
+ height: 75px; /* Defina a altura desejada */
14
+ background-color: blue; /* Cor de fundo azul */
15
+ color: white; /* Cor do texto (branco neste caso) */
16
+ border: none; /* Remove a borda */
17
+ text-align: center; /* Centraliza o texto horizontalmente */
18
+ text-decoration: none; /* Remove a decoração do texto */
19
+ display: inline-block; /* Permite definir largura e altura */
20
+ font-size: 16px; /* Tamanho do texto */
21
+ line-height: 75px; /* Centraliza o texto verticalmente */
22
+ }
23
+
24
+ """
25
+
26
+ usr = -2
27
+ ia = -1
28
+ game_over = False
29
+ temp = -2
30
+
31
+ def main():
32
+
33
+ def usr_move(position,value):
34
+ global game_over
35
+ if value == '' and game_over == False:
36
+ usr_piece = 'X' if usr == 1 else 'O'
37
+ i,j = np.unravel_index(int(position)-1, shape=(3,3))
38
+ board.move(i,j,usr)
39
+ board.print_game()
40
+ w = board.check_win()
41
+
42
+ global temp
43
+ temp-=1
44
+ return gr.update(size='lg', scale = 0, min_width = 100, value=usr_piece, interactive=True), gr.Number(value=temp, visible=False), gr.Number(value=w, visible=False)
45
+ else:
46
+ return gr.update(size='lg', scale = 0, min_width = 100, interactive=True), gr.update(value=temp, visible=False),gr.update(visible=False)
47
+
48
+
49
+ def ia_move():
50
+ global game_over
51
+ if game_over == False:
52
+ state_board = board.matriz.copy()
53
+ avaible_moves = board.get_avaible_moves()
54
+ action = q_agent.choose_move(state_board,avaible_moves,ia)
55
+ i, j = board.number_ij(action)
56
+ ia_piece = 'X' if ia == 1 else 'O'
57
+ board.move(i,j,ia)
58
+ board.print_game()
59
+ w = board.check_win()
60
+ #buttons
61
+ retornos_btn = [gr.Button(size='lg', scale = 0, min_width = 100, interactive=True)]*(action+1)
62
+ retornos_btn[-1] = gr.Button(size='lg', scale = 0, min_width = 100, interactive=True, value=ia_piece)
63
+ retornos_btn.extend([gr.Button(size='lg', scale = 0, min_width = 100, interactive=True)]*(9 - action-1))
64
+ retornos_btn.append(gr.Number(value=w, visible=False))
65
+ return retornos_btn
66
+ else:
67
+ retornos_btn = [gr.Button(size='lg', scale = 0, min_width = 100, interactive=True)]*9
68
+ retornos_btn.append(gr.Number(visible=False))
69
+ return retornos_btn
70
+
71
+
72
+ def set_col_visible(selected):
73
+ global usr
74
+ global game_over
75
+ game_over = False
76
+ usr = 1 if selected == 'X' else 2
77
+ global ia
78
+ ia = 2 if usr == 1 else 1
79
+ retorn_buttons = [gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=True)]*9
80
+ if selected == 'X':
81
+ retorn_buttons.append(gr.Number(value=temp, visible=False))
82
+
83
+ else:
84
+ retorn_buttons = ia_move()
85
+
86
+ return retorn_buttons
87
+
88
+
89
+ def reset():
90
+ board.reset_matrix()
91
+ buttons = [gr.Button(size='lg', scale = 0, min_width = 100,elem_classes='gradio-button',interactive=False)]*9
92
+ return buttons
93
+
94
+
95
+ def check(n):
96
+ msg = ''
97
+ global game_over
98
+ if n == 1:
99
+ game_over = True
100
+ msg = 'O jogador X ganhou'
101
+ board.reset_matrix()
102
+
103
+ elif n == 2:
104
+ game_over = True
105
+ msg = 'O jogador O ganhou'
106
+ board.reset_matrix()
107
+
108
+ elif n == 3 :
109
+ game_over = True
110
+ msg = 'VELHA'
111
+ board.reset_matrix()
112
+
113
+ print('check',game_over)
114
+ return gr.update(value=msg, show_label=False)
115
+
116
+ parser = argparse.ArgumentParser()
117
+ parser.add_argument('--file_name', type=str, required=False, default='models/q_agent-0.9ep-0.6ap-0.9-1000r.json')
118
+ args = parser.parse_args()
119
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
120
+ q_agent= QlearningAgent(epsilon=0,alpha=0,discount_factor=0, train=False)
121
+ q_agent.load_agent_dict(args.file_name)
122
+ board = tic_tac_toe_model(3)
123
+ with gr.Row():
124
+ with gr.Column() as x:
125
+ title = gr.Text(value='Escolha uma peça', show_label=False)
126
+ with gr.Row():
127
+ p1 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
128
+ p2 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
129
+ p3 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
130
+ with gr.Row():
131
+ p4 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
132
+ p5 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
133
+ p6 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
134
+ with gr.Row():
135
+ p7 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
136
+ p8 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
137
+ p9 = gr.Button(size='lg', scale = 0, min_width = 100,value='',elem_classes='gradio-button',interactive=False)
138
+
139
+ n1 = gr.Number(value="1", visible=False)
140
+ n2 = gr.Number(value="2", visible=False)
141
+ n3 = gr.Number(value="3", visible=False)
142
+ n4 = gr.Number(value="4", visible=False)
143
+ n5 = gr.Number(value="5", visible=False)
144
+ n6 = gr.Number(value="6", visible=False)
145
+ n7 = gr.Number(value="7", visible=False)
146
+ n8 = gr.Number(value="8", visible=False)
147
+ n9 = gr.Number(value="9", visible=False)
148
+ temp_number = gr.Number(value="10", visible=False)
149
+ win_number = gr.Number(value="4", visible=False)
150
+ game_state = gr.Number(value="0", visible=False)
151
+
152
+ with gr.Column():
153
+ c1 = gr.Radio( ['X','O'],label='Jogar como')
154
+ clear_button = gr.Button(value='Resetar')
155
+
156
+
157
+ buttons = [p1, p2, p3, p4, p5, p6, p7, p8, p9,win_number]
158
+ renders_components = [p1, p2, p3, p4, p5, p6, p7, p8, p9, win_number]
159
+ game_components = [p1, p2, p3, p4, p5, p6, p7, p8, p9, title,temp_number,win_number, c1]
160
+ only_buttons = [p1, p2, p3, p4, p5, p6, p7, p8, p9]
161
+ p1.click(fn=usr_move,inputs=[n1,p1], outputs = [p1,temp_number,win_number])
162
+ p2.click(fn=usr_move,inputs=[n2,p2], outputs = [p2,temp_number,win_number])
163
+ p3.click(fn=usr_move,inputs=[n3,p3], outputs = [p3,temp_number,win_number])
164
+ p4.click(fn=usr_move,inputs=[n4,p4], outputs = [p4,temp_number,win_number])
165
+ p5.click(fn=usr_move,inputs=[n5,p5], outputs = [p5,temp_number,win_number])
166
+ p6.click(fn=usr_move,inputs=[n6,p6], outputs = [p6,temp_number,win_number])
167
+ p7.click(fn=usr_move,inputs=[n7,p7], outputs = [p7,temp_number,win_number])
168
+ p8.click(fn=usr_move,inputs=[n8,p8], outputs = [p8,temp_number,win_number])
169
+ p9.click(fn=usr_move,inputs=[n9,p9], outputs = [p9,temp_number,win_number])
170
+
171
+
172
+ def update_buttons():
173
+ global game_over
174
+ game_over = True
175
+ return {p1: '', p2:'', p3:'', p4:'', p5:'', p6:'', p7:'', p8:'', p9:'',
176
+ title:'Escolha uma peça',temp_number:10, win_number:4, c1: ''}
177
+
178
+
179
+
180
+ c1.select(fn=set_col_visible, inputs= c1, outputs=buttons)
181
+
182
+
183
+ temp_number.change(fn=ia_move,outputs=renders_components)
184
+ win_number.change(fn=check, inputs=win_number, outputs=title)
185
+ clear_button.click(fn=update_buttons,outputs=game_components)
186
+ clear_button.click(fn=reset,outputs=only_buttons)
187
+
188
+
189
+
190
+ demo.launch()
191
+
192
+ if __name__ == '__main__':
193
+ main()
194
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==4.4.1
2
+ numpy==1.26.2
3
+ tqdm==4.66.1