Q-Learning Agent playing1 FrozenLake-v1

This is a trained model of a Q-Learning agent playing FrozenLake-v1 .

Usage

from huggingface_sb3 import load_from_hub
import gymnasium as gym
from tqdm import tqdm
import numpy as np
import pickle

def greedy_policy(Qtable, state):
  # Exploitation: take the action with the highest state, action value
  action = np.argmax(Qtable[state, :])
  return action

def evaluate_agent(env: gym.Env, max_steps: int, n_eval_episodes: int, Q: np.ndarray):
  """
  Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.
  :param env: The evaluation environment
  :param max_steps: Maximum number of steps per episode
  :param n_eval_episodes: Number of episode to evaluate the agent
  :param Q: The Q-table
  :param seed: The evaluation seed array (for taxi-v3)
  """
  episode_rewards = []
  for episode in tqdm(range(n_eval_episodes)):
      state, info = env.reset()

      truncated = False
      terminated = False
      total_rewards_ep = 0

      for step in range(max_steps):
          action = greedy_policy(Q, state)
          new_state, reward, terminated, truncated, info = env.step(action)
          total_rewards_ep += reward

          if terminated or truncated:
              break
          state = new_state

      episode_rewards.append(total_rewards_ep)

  mean_reward = np.mean(episode_rewards)
  std_reward = np.std(episode_rewards)

  return float(mean_reward), float(std_reward)

if __name__ == "__main__":
  file_path = load_from_hub(repo_id="BobChuang/q-FrozenLake-v1-4x4-noSlippery", filename="q-learning.pkl")
  with open(file_path, 'rb') as f:
      model = pickle.load(f)

  env = gym.make(model["env_id"], render_mode="rgb_array", is_slippery=False)
  max_steps = model["max_steps"]
  n_eval_episodes = model["n_eval_episodes"]
  qtable = model["qtable"]

  mean_reward, std_reward = evaluate_agent(env, max_steps, n_eval_episodes, qtable)
  print(f"\n{ mean_reward = }, { std_reward = }")
Downloads last month

-

Downloads are not tracked for this model. How to track
Video Preview
loading

Evaluation results

  • mean_reward on FrozenLake-v1-4x4-no_slippery
    self-reported
    1.00 +/- 0.00