2016-05-09 22:05:56 -07:00
|
|
|
import gym
|
|
|
|
from gym import spaces
|
2016-05-29 09:07:09 -07:00
|
|
|
from gym.utils import seeding
|
2016-05-09 22:05:56 -07:00
|
|
|
|
2020-11-09 13:24:26 -05:00
|
|
|
|
2016-05-10 17:05:47 +02:00
|
|
|
def cmp(a, b):
|
2016-11-20 21:15:06 -08:00
|
|
|
return float(a > b) - float(a < b)
|
2016-05-10 17:05:47 +02:00
|
|
|
|
2020-11-09 13:24:26 -05:00
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
# 1 = Ace, 2-10 = Number cards, Jack/Queen/King = 10
|
|
|
|
deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]
|
|
|
|
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
def draw_card(np_random):
|
2016-11-20 21:15:06 -08:00
|
|
|
return int(np_random.choice(deck))
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
def draw_hand(np_random):
|
|
|
|
return [draw_card(np_random), draw_card(np_random)]
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
|
|
|
|
def usable_ace(hand): # Does this hand have a usable ace?
|
|
|
|
return 1 in hand and sum(hand) + 10 <= 21
|
|
|
|
|
|
|
|
|
|
|
|
def sum_hand(hand): # Return current hand total
|
|
|
|
if usable_ace(hand):
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
return sum(hand) + 10
|
2016-05-09 22:05:56 -07:00
|
|
|
return sum(hand)
|
|
|
|
|
|
|
|
|
|
|
|
def is_bust(hand): # Is this hand a bust?
|
|
|
|
return sum_hand(hand) > 21
|
|
|
|
|
|
|
|
|
|
|
|
def score(hand): # What is the score of this hand (0 if bust)
|
|
|
|
return 0 if is_bust(hand) else sum_hand(hand)
|
|
|
|
|
|
|
|
|
|
|
|
def is_natural(hand): # Is this hand a natural blackjack?
|
|
|
|
return sorted(hand) == [1, 10]
|
|
|
|
|
|
|
|
|
|
|
|
class BlackjackEnv(gym.Env):
|
|
|
|
"""Simple blackjack environment
|
|
|
|
|
|
|
|
Blackjack is a card game where the goal is to obtain cards that sum to as
|
|
|
|
near as possible to 21 without going over. They're playing against a fixed
|
|
|
|
dealer.
|
|
|
|
Face cards (Jack, Queen, King) have point value 10.
|
|
|
|
Aces can either count as 11 or 1, and it's called 'usable' at 11.
|
|
|
|
This game is placed with an infinite deck (or with replacement).
|
2020-03-23 04:54:54 +08:00
|
|
|
The game starts with dealer having one face up and one face down card, while
|
|
|
|
player having two face up cards. (Virtually for all Blackjack games today).
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
The player can request additional cards (hit=1) until they decide to stop
|
|
|
|
(stick=0) or exceed 21 (bust).
|
|
|
|
|
|
|
|
After the player sticks, the dealer reveals their facedown card, and draws
|
|
|
|
until their sum is 17 or greater. If the dealer goes bust the player wins.
|
|
|
|
|
|
|
|
If neither player nor dealer busts, the outcome (win, lose, draw) is
|
|
|
|
decided by whose sum is closer to 21. The reward for winning is +1,
|
|
|
|
drawing is 0, and losing is -1.
|
|
|
|
|
|
|
|
The observation of a 3-tuple of: the players current sum,
|
|
|
|
the dealer's one showing card (1-10 where 1 is ace),
|
|
|
|
and whether or not the player holds a usable ace (0 or 1).
|
|
|
|
|
|
|
|
This environment corresponds to the version of the blackjack problem
|
|
|
|
described in Example 5.1 in Reinforcement Learning: An Introduction
|
2018-08-24 19:30:17 -04:00
|
|
|
by Sutton and Barto.
|
|
|
|
http://incompleteideas.net/book/the-book-2nd.html
|
2016-05-09 22:05:56 -07:00
|
|
|
"""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
2021-09-07 20:53:59 +02:00
|
|
|
def __init__(self, natural=False, sab=False):
|
2016-05-30 18:07:59 -07:00
|
|
|
self.action_space = spaces.Discrete(2)
|
2021-07-29 15:39:42 -04:00
|
|
|
self.observation_space = spaces.Tuple(
|
|
|
|
(spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))
|
|
|
|
)
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
self.seed()
|
2016-05-29 09:07:09 -07:00
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
|
|
|
|
# Ref: http://www.bicyclecards.com/how-to-play/blackjack/
|
|
|
|
self.natural = natural
|
2021-09-07 20:53:59 +02:00
|
|
|
|
|
|
|
# Flag for full agreement with the (Sutton and Barto, 2018) definition. Overrides self.natural
|
|
|
|
self.sab = sab
|
2016-05-09 22:05:56 -07:00
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def seed(self, seed=None):
|
2016-05-29 09:07:09 -07:00
|
|
|
self.np_random, seed = seeding.np_random(seed)
|
|
|
|
return [seed]
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def step(self, action):
|
2016-06-16 00:20:22 -07:00
|
|
|
assert self.action_space.contains(action)
|
2016-05-09 22:05:56 -07:00
|
|
|
if action: # hit: add a card to players hand and return
|
2016-05-29 09:07:09 -07:00
|
|
|
self.player.append(draw_card(self.np_random))
|
2016-05-09 22:05:56 -07:00
|
|
|
if is_bust(self.player):
|
|
|
|
done = True
|
2021-07-29 02:26:34 +02:00
|
|
|
reward = -1.0
|
2016-05-09 22:05:56 -07:00
|
|
|
else:
|
|
|
|
done = False
|
2021-07-29 02:26:34 +02:00
|
|
|
reward = 0.0
|
2016-05-09 22:05:56 -07:00
|
|
|
else: # stick: play out the dealers hand, and score
|
|
|
|
done = True
|
|
|
|
while sum_hand(self.dealer) < 17:
|
2016-05-29 09:07:09 -07:00
|
|
|
self.dealer.append(draw_card(self.np_random))
|
2016-05-09 22:05:56 -07:00
|
|
|
reward = cmp(score(self.player), score(self.dealer))
|
2021-09-07 20:53:59 +02:00
|
|
|
if self.sab and is_natural(self.player) and not is_natural(self.dealer):
|
|
|
|
# Player automatically wins. Rules consistent with S&B
|
|
|
|
reward = 1.0
|
|
|
|
elif (
|
|
|
|
not self.sab
|
|
|
|
and self.natural
|
|
|
|
and is_natural(self.player)
|
|
|
|
and reward == 1.0
|
|
|
|
):
|
|
|
|
# Natural gives extra points, but doesn't autowin. Legacy implementation
|
2016-05-09 22:05:56 -07:00
|
|
|
reward = 1.5
|
|
|
|
return self._get_obs(), reward, done, {}
|
|
|
|
|
|
|
|
def _get_obs(self):
|
|
|
|
return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def reset(self):
|
2016-05-29 09:07:09 -07:00
|
|
|
self.dealer = draw_hand(self.np_random)
|
|
|
|
self.player = draw_hand(self.np_random)
|
2016-05-09 22:05:56 -07:00
|
|
|
return self._get_obs()
|