2022-01-19 20:52:00 +01:00
|
|
|
import os
|
2022-03-31 12:50:38 -07:00
|
|
|
from typing import Optional
|
2022-01-19 20:52:00 +01:00
|
|
|
|
|
|
|
import numpy as np
|
2021-12-08 22:14:15 +01:00
|
|
|
|
2022-09-16 23:41:27 +01:00
|
|
|
import gymnasium as gym
|
2022-09-08 10:10:07 +01:00
|
|
|
from gymnasium import spaces
|
|
|
|
from gymnasium.error import DependencyNotInstalled
|
2016-05-09 22:05:56 -07:00
|
|
|
|
2020-11-09 13:24:26 -05:00
|
|
|
|
2016-05-10 17:05:47 +02:00
|
|
|
def cmp(a, b):
|
2016-11-20 21:15:06 -08:00
|
|
|
return float(a > b) - float(a < b)
|
2016-05-10 17:05:47 +02:00
|
|
|
|
2020-11-09 13:24:26 -05:00
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
# 1 = Ace, 2-10 = Number cards, Jack/Queen/King = 10
|
|
|
|
deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]
|
|
|
|
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
def draw_card(np_random):
|
2016-11-20 21:15:06 -08:00
|
|
|
return int(np_random.choice(deck))
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
def draw_hand(np_random):
|
|
|
|
return [draw_card(np_random), draw_card(np_random)]
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
|
|
|
|
def usable_ace(hand): # Does this hand have a usable ace?
|
2023-03-08 20:42:07 +09:00
|
|
|
return int(1 in hand and sum(hand) + 10 <= 21)
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
|
|
|
|
def sum_hand(hand): # Return current hand total
|
|
|
|
if usable_ace(hand):
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
return sum(hand) + 10
|
2016-05-09 22:05:56 -07:00
|
|
|
return sum(hand)
|
|
|
|
|
|
|
|
|
|
|
|
def is_bust(hand): # Is this hand a bust?
|
|
|
|
return sum_hand(hand) > 21
|
|
|
|
|
|
|
|
|
|
|
|
def score(hand): # What is the score of this hand (0 if bust)
|
|
|
|
return 0 if is_bust(hand) else sum_hand(hand)
|
|
|
|
|
|
|
|
|
|
|
|
def is_natural(hand): # Is this hand a natural blackjack?
|
|
|
|
return sorted(hand) == [1, 10]
|
|
|
|
|
|
|
|
|
2022-09-16 23:41:27 +01:00
|
|
|
class BlackjackEnv(gym.Env):
|
2022-01-26 16:02:42 -05:00
|
|
|
"""
|
2022-02-17 13:45:51 -05:00
|
|
|
Blackjack is a card game where the goal is to beat the dealer by obtaining cards
|
|
|
|
that sum to closer to 21 (without going over 21) than the dealers cards.
|
2022-01-26 16:02:42 -05:00
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
## Description
|
2022-11-30 08:45:19 +10:30
|
|
|
The game starts with the dealer having one face up and one face down card,
|
|
|
|
while the player has two face up cards. All cards are drawn from an infinite deck
|
|
|
|
(i.e. with replacement).
|
2022-01-26 16:02:42 -05:00
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
The card values are:
|
2022-02-17 13:45:51 -05:00
|
|
|
- Face cards (Jack, Queen, King) have a point value of 10.
|
|
|
|
- Aces can either count as 11 (called a 'usable ace') or 1.
|
|
|
|
- Numerical cards (2-9) have a value equal to their number.
|
2022-01-26 16:02:42 -05:00
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
The player has the sum of cards held. The player can request
|
|
|
|
additional cards (hit) until they decide to stop (stick) or exceed 21 (bust,
|
|
|
|
immediate loss).
|
|
|
|
|
|
|
|
After the player sticks, the dealer reveals their facedown card, and draws cards
|
|
|
|
until their sum is 17 or greater. If the dealer goes bust, the player wins.
|
2016-05-09 22:05:56 -07:00
|
|
|
|
2022-02-17 13:45:51 -05:00
|
|
|
If neither the player nor the dealer busts, the outcome (win, lose, draw) is
|
2022-01-26 16:02:42 -05:00
|
|
|
decided by whose sum is closer to 21.
|
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
This environment corresponds to the version of the blackjack problem
|
|
|
|
described in Example 5.1 in Reinforcement Learning: An Introduction
|
|
|
|
by Sutton and Barto [<a href="#blackjack_ref">1</a>].
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
## Action Space
|
2022-11-30 08:45:19 +10:30
|
|
|
The action shape is `(1,)` in the range `{0, 1}` indicating
|
|
|
|
whether to stick or hit.
|
|
|
|
|
|
|
|
- 0: Stick
|
|
|
|
- 1: Hit
|
2016-05-09 22:05:56 -07:00
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
## Observation Space
|
2022-02-17 13:45:51 -05:00
|
|
|
The observation consists of a 3-tuple containing: the player's current sum,
|
|
|
|
the value of the dealer's one showing card (1-10 where 1 is ace),
|
|
|
|
and whether the player holds a usable ace (0 or 1).
|
2016-05-09 22:05:56 -07:00
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
The observation is returned as `(int(), int(), int())`.
|
|
|
|
|
|
|
|
## Starting State
|
|
|
|
The starting state is initialised in the following range.
|
|
|
|
|
|
|
|
| Observation | Min | Max |
|
|
|
|
|---------------------------|------|------|
|
|
|
|
| Player current sum | 4 | 12 |
|
|
|
|
| Dealer showing card value | 2 | 11 |
|
|
|
|
| Usable Ace | 0 | 1 |
|
2022-01-26 16:02:42 -05:00
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
## Rewards
|
2022-01-26 16:02:42 -05:00
|
|
|
- win game: +1
|
|
|
|
- lose game: -1
|
|
|
|
- draw game: 0
|
|
|
|
- win game with natural blackjack:
|
2022-11-30 08:45:19 +10:30
|
|
|
+1.5 (if <a href="#nat">natural</a> is True)
|
|
|
|
+1 (if <a href="#nat">natural</a> is False)
|
|
|
|
|
|
|
|
## Episode End
|
|
|
|
The episode ends if the following happens:
|
2022-01-26 16:02:42 -05:00
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
- Termination:
|
|
|
|
1. The player hits and the sum of hand exceeds 21.
|
|
|
|
2. The player sticks.
|
2022-01-26 16:02:42 -05:00
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
An ace will always be counted as usable (11) unless it busts the player.
|
|
|
|
|
|
|
|
## Information
|
|
|
|
|
|
|
|
No additional information is returned.
|
2022-01-26 16:02:42 -05:00
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
## Arguments
|
2022-01-26 16:02:42 -05:00
|
|
|
|
2022-09-16 23:41:27 +01:00
|
|
|
```python
|
|
|
|
import gymnasium as gym
|
|
|
|
gym.make('Blackjack-v1', natural=False, sab=False)
|
2022-01-26 16:02:42 -05:00
|
|
|
```
|
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
<a id="nat"></a>`natural=False`: Whether to give an additional reward for
|
2022-01-26 16:02:42 -05:00
|
|
|
starting with a natural blackjack, i.e. starting with an ace and ten (sum is 21).
|
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
<a id="sab"></a>`sab=False`: Whether to follow the exact rules outlined in the book by
|
2022-04-10 06:37:22 +02:00
|
|
|
Sutton and Barto. If `sab` is `True`, the keyword argument `natural` will be ignored.
|
|
|
|
If the player achieves a natural blackjack and the dealer does not, the player
|
|
|
|
will win (i.e. get a reward of +1). The reverse rule does not apply.
|
|
|
|
If both the player and the dealer get a natural, it will be a draw (i.e. reward 0).
|
|
|
|
|
2022-11-30 08:45:19 +10:30
|
|
|
## References
|
|
|
|
<a id="blackjack_ref"></a>[1] R. Sutton and A. Barto, “Reinforcement Learning:
|
|
|
|
An Introduction” 2020. [Online]. Available: [http://www.incompleteideas.net/book/RLbook2020.pdf](http://www.incompleteideas.net/book/RLbook2020.pdf)
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
## Version History
|
2022-11-30 08:45:19 +10:30
|
|
|
* v1: Fix the natural handling in Blackjack
|
|
|
|
* v0: Initial version release
|
2016-05-09 22:05:56 -07:00
|
|
|
"""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
2022-06-08 00:20:56 +02:00
|
|
|
metadata = {
|
2023-01-09 13:12:07 +00:00
|
|
|
"render_modes": ["human", "rgb_array"],
|
2022-06-08 00:20:56 +02:00
|
|
|
"render_fps": 4,
|
|
|
|
}
|
2022-01-19 20:52:00 +01:00
|
|
|
|
2022-06-08 00:20:56 +02:00
|
|
|
def __init__(self, render_mode: Optional[str] = None, natural=False, sab=False):
|
2016-05-30 18:07:59 -07:00
|
|
|
self.action_space = spaces.Discrete(2)
|
2021-07-29 15:39:42 -04:00
|
|
|
self.observation_space = spaces.Tuple(
|
|
|
|
(spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))
|
|
|
|
)
|
2016-05-29 09:07:09 -07:00
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
|
|
|
|
# Ref: http://www.bicyclecards.com/how-to-play/blackjack/
|
|
|
|
self.natural = natural
|
2021-09-07 20:53:59 +02:00
|
|
|
|
|
|
|
# Flag for full agreement with the (Sutton and Barto, 2018) definition. Overrides self.natural
|
|
|
|
self.sab = sab
|
2016-05-09 22:05:56 -07:00
|
|
|
|
2022-06-08 00:20:56 +02:00
|
|
|
self.render_mode = render_mode
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def step(self, action):
|
2016-06-16 00:20:22 -07:00
|
|
|
assert self.action_space.contains(action)
|
2016-05-09 22:05:56 -07:00
|
|
|
if action: # hit: add a card to players hand and return
|
2016-05-29 09:07:09 -07:00
|
|
|
self.player.append(draw_card(self.np_random))
|
2016-05-09 22:05:56 -07:00
|
|
|
if is_bust(self.player):
|
2022-07-10 02:18:06 +05:30
|
|
|
terminated = True
|
2021-07-29 02:26:34 +02:00
|
|
|
reward = -1.0
|
2016-05-09 22:05:56 -07:00
|
|
|
else:
|
2022-07-10 02:18:06 +05:30
|
|
|
terminated = False
|
2021-07-29 02:26:34 +02:00
|
|
|
reward = 0.0
|
2016-05-09 22:05:56 -07:00
|
|
|
else: # stick: play out the dealers hand, and score
|
2022-07-10 02:18:06 +05:30
|
|
|
terminated = True
|
2016-05-09 22:05:56 -07:00
|
|
|
while sum_hand(self.dealer) < 17:
|
2016-05-29 09:07:09 -07:00
|
|
|
self.dealer.append(draw_card(self.np_random))
|
2016-05-09 22:05:56 -07:00
|
|
|
reward = cmp(score(self.player), score(self.dealer))
|
2021-09-07 20:53:59 +02:00
|
|
|
if self.sab and is_natural(self.player) and not is_natural(self.dealer):
|
|
|
|
# Player automatically wins. Rules consistent with S&B
|
|
|
|
reward = 1.0
|
|
|
|
elif (
|
|
|
|
not self.sab
|
|
|
|
and self.natural
|
|
|
|
and is_natural(self.player)
|
|
|
|
and reward == 1.0
|
|
|
|
):
|
|
|
|
# Natural gives extra points, but doesn't autowin. Legacy implementation
|
2016-05-09 22:05:56 -07:00
|
|
|
reward = 1.5
|
2022-09-05 21:56:36 +02:00
|
|
|
|
|
|
|
if self.render_mode == "human":
|
|
|
|
self.render()
|
2022-07-10 02:18:06 +05:30
|
|
|
return self._get_obs(), reward, terminated, False, {}
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
def _get_obs(self):
|
|
|
|
return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))
|
|
|
|
|
2022-02-06 17:28:27 -06:00
|
|
|
def reset(
|
|
|
|
self,
|
|
|
|
seed: Optional[int] = None,
|
|
|
|
options: Optional[dict] = None,
|
|
|
|
):
|
2021-12-08 22:14:15 +01:00
|
|
|
super().reset(seed=seed)
|
2016-05-29 09:07:09 -07:00
|
|
|
self.dealer = draw_hand(self.np_random)
|
|
|
|
self.player = draw_hand(self.np_random)
|
2022-06-08 00:20:56 +02:00
|
|
|
|
2022-06-24 09:59:49 -04:00
|
|
|
_, dealer_card_value, _ = self._get_obs()
|
|
|
|
|
|
|
|
suits = ["C", "D", "H", "S"]
|
|
|
|
self.dealer_top_card_suit = self.np_random.choice(suits)
|
|
|
|
|
|
|
|
if dealer_card_value == 1:
|
|
|
|
self.dealer_top_card_value_str = "A"
|
|
|
|
elif dealer_card_value == 10:
|
|
|
|
self.dealer_top_card_value_str = self.np_random.choice(["J", "Q", "K"])
|
|
|
|
else:
|
|
|
|
self.dealer_top_card_value_str = str(dealer_card_value)
|
|
|
|
|
2022-09-05 21:56:36 +02:00
|
|
|
if self.render_mode == "human":
|
|
|
|
self.render()
|
2022-08-23 11:09:54 -04:00
|
|
|
return self._get_obs(), {}
|
2022-01-19 20:52:00 +01:00
|
|
|
|
2022-08-22 17:21:08 +02:00
|
|
|
def render(self):
|
2022-10-05 17:53:45 +01:00
|
|
|
if self.render_mode is None:
|
2022-11-12 10:21:24 +00:00
|
|
|
assert self.spec is not None
|
2022-10-05 17:53:45 +01:00
|
|
|
gym.logger.warn(
|
|
|
|
"You are calling render method without specifying any render mode. "
|
|
|
|
"You can specify the render_mode at initialization, "
|
2022-11-18 17:15:52 +01:00
|
|
|
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
|
2022-10-05 17:53:45 +01:00
|
|
|
)
|
|
|
|
return
|
|
|
|
|
2022-04-30 00:44:28 +01:00
|
|
|
try:
|
|
|
|
import pygame
|
2022-12-10 16:47:18 +02:00
|
|
|
except ImportError as e:
|
2022-04-30 00:44:28 +01:00
|
|
|
raise DependencyNotInstalled(
|
2023-01-09 21:14:45 +08:00
|
|
|
"pygame is not installed, run `pip install gymnasium[toy-text]`"
|
2022-12-10 16:47:18 +02:00
|
|
|
) from e
|
2022-04-01 00:55:48 +02:00
|
|
|
|
2022-01-19 20:52:00 +01:00
|
|
|
player_sum, dealer_card_value, usable_ace = self._get_obs()
|
|
|
|
screen_width, screen_height = 600, 500
|
|
|
|
card_img_height = screen_height // 3
|
|
|
|
card_img_width = int(card_img_height * 142 / 197)
|
|
|
|
spacing = screen_height // 20
|
|
|
|
|
|
|
|
bg_color = (7, 99, 36)
|
|
|
|
white = (255, 255, 255)
|
|
|
|
|
|
|
|
if not hasattr(self, "screen"):
|
2022-03-20 15:32:08 +00:00
|
|
|
pygame.init()
|
2022-09-04 15:42:10 +02:00
|
|
|
if self.render_mode == "human":
|
2022-03-12 00:37:04 +08:00
|
|
|
pygame.display.init()
|
2022-01-19 20:52:00 +01:00
|
|
|
self.screen = pygame.display.set_mode((screen_width, screen_height))
|
|
|
|
else:
|
|
|
|
pygame.font.init()
|
|
|
|
self.screen = pygame.Surface((screen_width, screen_height))
|
|
|
|
|
2022-03-02 16:37:48 +01:00
|
|
|
if not hasattr(self, "clock"):
|
|
|
|
self.clock = pygame.time.Clock()
|
|
|
|
|
2022-01-19 20:52:00 +01:00
|
|
|
self.screen.fill(bg_color)
|
|
|
|
|
|
|
|
def get_image(path):
|
|
|
|
cwd = os.path.dirname(__file__)
|
2022-01-19 23:28:43 +01:00
|
|
|
image = pygame.image.load(os.path.join(cwd, path))
|
2022-01-19 20:52:00 +01:00
|
|
|
return image
|
|
|
|
|
|
|
|
def get_font(path, size):
|
|
|
|
cwd = os.path.dirname(__file__)
|
2022-01-19 23:28:43 +01:00
|
|
|
font = pygame.font.Font(os.path.join(cwd, path), size)
|
2022-01-19 20:52:00 +01:00
|
|
|
return font
|
|
|
|
|
|
|
|
small_font = get_font(
|
|
|
|
os.path.join("font", "Minecraft.ttf"), screen_height // 15
|
|
|
|
)
|
|
|
|
dealer_text = small_font.render(
|
|
|
|
"Dealer: " + str(dealer_card_value), True, white
|
|
|
|
)
|
|
|
|
dealer_text_rect = self.screen.blit(dealer_text, (spacing, spacing))
|
|
|
|
|
|
|
|
def scale_card_img(card_img):
|
|
|
|
return pygame.transform.scale(card_img, (card_img_width, card_img_height))
|
|
|
|
|
|
|
|
dealer_card_img = scale_card_img(
|
|
|
|
get_image(
|
2022-06-24 09:59:49 -04:00
|
|
|
os.path.join(
|
|
|
|
"img",
|
|
|
|
f"{self.dealer_top_card_suit}{self.dealer_top_card_value_str}.png",
|
|
|
|
)
|
2022-01-19 20:52:00 +01:00
|
|
|
)
|
|
|
|
)
|
|
|
|
dealer_card_rect = self.screen.blit(
|
|
|
|
dealer_card_img,
|
|
|
|
(
|
|
|
|
screen_width // 2 - card_img_width - spacing // 2,
|
|
|
|
dealer_text_rect.bottom + spacing,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
hidden_card_img = scale_card_img(get_image(os.path.join("img", "Card.png")))
|
|
|
|
self.screen.blit(
|
|
|
|
hidden_card_img,
|
|
|
|
(
|
|
|
|
screen_width // 2 + spacing // 2,
|
|
|
|
dealer_text_rect.bottom + spacing,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
player_text = small_font.render("Player", True, white)
|
|
|
|
player_text_rect = self.screen.blit(
|
|
|
|
player_text, (spacing, dealer_card_rect.bottom + 1.5 * spacing)
|
|
|
|
)
|
|
|
|
|
|
|
|
large_font = get_font(os.path.join("font", "Minecraft.ttf"), screen_height // 6)
|
|
|
|
player_sum_text = large_font.render(str(player_sum), True, white)
|
|
|
|
player_sum_text_rect = self.screen.blit(
|
|
|
|
player_sum_text,
|
|
|
|
(
|
|
|
|
screen_width // 2 - player_sum_text.get_width() // 2,
|
|
|
|
player_text_rect.bottom + spacing,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
if usable_ace:
|
|
|
|
usable_ace_text = small_font.render("usable ace", True, white)
|
|
|
|
self.screen.blit(
|
|
|
|
usable_ace_text,
|
|
|
|
(
|
|
|
|
screen_width // 2 - usable_ace_text.get_width() // 2,
|
|
|
|
player_sum_text_rect.bottom + spacing // 2,
|
|
|
|
),
|
|
|
|
)
|
2022-09-04 15:42:10 +02:00
|
|
|
if self.render_mode == "human":
|
2022-03-12 00:37:04 +08:00
|
|
|
pygame.event.pump()
|
2022-01-19 20:52:00 +01:00
|
|
|
pygame.display.update()
|
2022-03-02 16:37:48 +01:00
|
|
|
self.clock.tick(self.metadata["render_fps"])
|
2022-01-19 20:52:00 +01:00
|
|
|
else:
|
|
|
|
return np.transpose(
|
|
|
|
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
|
|
|
|
)
|
2022-03-15 00:41:02 -04:00
|
|
|
|
2022-03-20 15:32:08 +00:00
|
|
|
def close(self):
|
2022-05-13 13:58:19 +01:00
|
|
|
if hasattr(self, "screen"):
|
2022-04-01 00:55:48 +02:00
|
|
|
import pygame
|
|
|
|
|
2022-03-20 15:32:08 +00:00
|
|
|
pygame.display.quit()
|
|
|
|
pygame.quit()
|
|
|
|
|
2022-03-15 20:35:55 -04:00
|
|
|
|
2022-03-15 00:41:02 -04:00
|
|
|
# Pixel art from Mariia Khmelnytska (https://www.123rf.com/photo_104453049_stock-vector-pixel-art-playing-cards-standart-deck-vector-set.html)
|