Q-Learning Agent playing1 FrozenLake-v1
This is a trained model of a Q-Learning agent playing FrozenLake-v1 .
Usage
from huggingface_sb3 import load_from_hub
import gymnasium as gym
from tqdm import tqdm
import numpy as np
import pickle
def greedy_policy(Qtable, state):
# Exploitation: take the action with the highest state, action value
action = np.argmax(Qtable[state, :])
return action
def evaluate_agent(env: gym.Env, max_steps: int, n_eval_episodes: int, Q: np.ndarray):
"""
Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.
:param env: The evaluation environment
:param max_steps: Maximum number of steps per episode
:param n_eval_episodes: Number of episode to evaluate the agent
:param Q: The Q-table
:param seed: The evaluation seed array (for taxi-v3)
"""
episode_rewards = []
for episode in tqdm(range(n_eval_episodes)):
state, info = env.reset()
truncated = False
terminated = False
total_rewards_ep = 0
for step in range(max_steps):
action = greedy_policy(Q, state)
new_state, reward, terminated, truncated, info = env.step(action)
total_rewards_ep += reward
if terminated or truncated:
break
state = new_state
episode_rewards.append(total_rewards_ep)
mean_reward = np.mean(episode_rewards)
std_reward = np.std(episode_rewards)
return float(mean_reward), float(std_reward)
if __name__ == "__main__":
file_path = load_from_hub(repo_id="BobChuang/q-FrozenLake-v1-4x4-noSlippery", filename="q-learning.pkl")
with open(file_path, 'rb') as f:
model = pickle.load(f)
env = gym.make(model["env_id"], render_mode="rgb_array", is_slippery=False)
max_steps = model["max_steps"]
n_eval_episodes = model["n_eval_episodes"]
qtable = model["qtable"]
mean_reward, std_reward = evaluate_agent(env, max_steps, n_eval_episodes, qtable)
print(f"\n{ mean_reward = }, { std_reward = }")
Evaluation results
- mean_reward on FrozenLake-v1-4x4-no_slipperyself-reported1.00 +/- 0.00