2016-05-09 22:05:56 -07:00
|
|
|
import gym
|
|
|
|
from gym import spaces
|
2016-05-29 09:07:09 -07:00
|
|
|
from gym.utils import seeding
|
2016-05-09 22:05:56 -07:00
|
|
|
|
2016-05-10 17:05:47 +02:00
|
|
|
def cmp(a, b):
|
|
|
|
return (a > b) - (a < b)
|
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
# 1 = Ace, 2-10 = Number cards, Jack/Queen/King = 10
|
|
|
|
deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]
|
|
|
|
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
def draw_card(np_random):
|
|
|
|
return np_random.choice(deck)
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
def draw_hand(np_random):
|
|
|
|
return [draw_card(np_random), draw_card(np_random)]
|
2016-05-09 22:05:56 -07:00
|
|
|
|
|
|
|
|
|
|
|
def usable_ace(hand): # Does this hand have a usable ace?
|
|
|
|
return 1 in hand and sum(hand) + 10 <= 21
|
|
|
|
|
|
|
|
|
|
|
|
def sum_hand(hand): # Return current hand total
|
|
|
|
if usable_ace(hand):
|
|
|
|
return sum(hand) + 10
|
|
|
|
return sum(hand)
|
|
|
|
|
|
|
|
|
|
|
|
def is_bust(hand): # Is this hand a bust?
|
|
|
|
return sum_hand(hand) > 21
|
|
|
|
|
|
|
|
|
|
|
|
def score(hand): # What is the score of this hand (0 if bust)
|
|
|
|
return 0 if is_bust(hand) else sum_hand(hand)
|
|
|
|
|
|
|
|
|
|
|
|
def is_natural(hand): # Is this hand a natural blackjack?
|
|
|
|
return sorted(hand) == [1, 10]
|
|
|
|
|
|
|
|
|
|
|
|
class BlackjackEnv(gym.Env):
|
|
|
|
"""Simple blackjack environment
|
|
|
|
|
|
|
|
Blackjack is a card game where the goal is to obtain cards that sum to as
|
|
|
|
near as possible to 21 without going over. They're playing against a fixed
|
|
|
|
dealer.
|
|
|
|
Face cards (Jack, Queen, King) have point value 10.
|
|
|
|
Aces can either count as 11 or 1, and it's called 'usable' at 11.
|
|
|
|
This game is placed with an infinite deck (or with replacement).
|
|
|
|
The game starts with each (player and dealer) having one face up and one
|
|
|
|
face down card.
|
|
|
|
|
|
|
|
The player can request additional cards (hit=1) until they decide to stop
|
|
|
|
(stick=0) or exceed 21 (bust).
|
|
|
|
|
|
|
|
After the player sticks, the dealer reveals their facedown card, and draws
|
|
|
|
until their sum is 17 or greater. If the dealer goes bust the player wins.
|
|
|
|
|
|
|
|
If neither player nor dealer busts, the outcome (win, lose, draw) is
|
|
|
|
decided by whose sum is closer to 21. The reward for winning is +1,
|
|
|
|
drawing is 0, and losing is -1.
|
|
|
|
|
|
|
|
The observation of a 3-tuple of: the players current sum,
|
|
|
|
the dealer's one showing card (1-10 where 1 is ace),
|
|
|
|
and whether or not the player holds a usable ace (0 or 1).
|
|
|
|
|
|
|
|
This environment corresponds to the version of the blackjack problem
|
|
|
|
described in Example 5.1 in Reinforcement Learning: An Introduction
|
|
|
|
by Sutton and Barto (1998).
|
|
|
|
https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html
|
|
|
|
"""
|
|
|
|
def __init__(self, natural=False):
|
2016-05-30 18:07:59 -07:00
|
|
|
self.action_space = spaces.Discrete(2)
|
|
|
|
self.observation_space = spaces.Tuple((
|
|
|
|
spaces.Discrete(32),
|
|
|
|
spaces.Discrete(11),
|
|
|
|
spaces.Discrete(2)))
|
2016-05-29 09:07:09 -07:00
|
|
|
self._seed()
|
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
|
|
|
|
# Ref: http://www.bicyclecards.com/how-to-play/blackjack/
|
|
|
|
self.natural = natural
|
|
|
|
# Start the first game
|
|
|
|
self._reset()
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
def _seed(self, seed=None):
|
|
|
|
self.np_random, seed = seeding.np_random(seed)
|
|
|
|
return [seed]
|
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
def _step(self, action):
|
2016-06-16 00:20:22 -07:00
|
|
|
assert self.action_space.contains(action)
|
2016-05-09 22:05:56 -07:00
|
|
|
if action: # hit: add a card to players hand and return
|
2016-05-29 09:07:09 -07:00
|
|
|
self.player.append(draw_card(self.np_random))
|
2016-05-09 22:05:56 -07:00
|
|
|
if is_bust(self.player):
|
|
|
|
done = True
|
|
|
|
reward = -1
|
|
|
|
else:
|
|
|
|
done = False
|
|
|
|
reward = 0
|
|
|
|
else: # stick: play out the dealers hand, and score
|
|
|
|
done = True
|
|
|
|
while sum_hand(self.dealer) < 17:
|
2016-05-29 09:07:09 -07:00
|
|
|
self.dealer.append(draw_card(self.np_random))
|
2016-05-09 22:05:56 -07:00
|
|
|
reward = cmp(score(self.player), score(self.dealer))
|
|
|
|
if self.natural and is_natural(self.player) and reward == 1:
|
|
|
|
reward = 1.5
|
|
|
|
return self._get_obs(), reward, done, {}
|
|
|
|
|
|
|
|
def _get_obs(self):
|
|
|
|
return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))
|
|
|
|
|
|
|
|
def _reset(self):
|
2016-05-29 09:07:09 -07:00
|
|
|
self.dealer = draw_hand(self.np_random)
|
|
|
|
self.player = draw_hand(self.np_random)
|
2016-05-09 22:05:56 -07:00
|
|
|
return self._get_obs()
|