mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-09-01 02:17:19 +00:00
107 lines
3.6 KiB
Python
107 lines
3.6 KiB
Python
![]() |
import gym
|
||
|
import random
|
||
|
from gym import spaces
|
||
|
|
||
|
# 1 = Ace, 2-10 = Number cards, Jack/Queen/King = 10
|
||
|
deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]
|
||
|
|
||
|
|
||
|
def draw_card():
|
||
|
return random.choice(deck)
|
||
|
|
||
|
|
||
|
def draw_hand():
|
||
|
return [draw_card(), draw_card()]
|
||
|
|
||
|
|
||
|
def usable_ace(hand): # Does this hand have a usable ace?
|
||
|
return 1 in hand and sum(hand) + 10 <= 21
|
||
|
|
||
|
|
||
|
def sum_hand(hand): # Return current hand total
|
||
|
if usable_ace(hand):
|
||
|
return sum(hand) + 10
|
||
|
return sum(hand)
|
||
|
|
||
|
|
||
|
def is_bust(hand): # Is this hand a bust?
|
||
|
return sum_hand(hand) > 21
|
||
|
|
||
|
|
||
|
def score(hand): # What is the score of this hand (0 if bust)
|
||
|
return 0 if is_bust(hand) else sum_hand(hand)
|
||
|
|
||
|
|
||
|
def is_natural(hand): # Is this hand a natural blackjack?
|
||
|
return sorted(hand) == [1, 10]
|
||
|
|
||
|
|
||
|
class BlackjackEnv(gym.Env):
|
||
|
"""Simple blackjack environment
|
||
|
|
||
|
Blackjack is a card game where the goal is to obtain cards that sum to as
|
||
|
near as possible to 21 without going over. They're playing against a fixed
|
||
|
dealer.
|
||
|
Face cards (Jack, Queen, King) have point value 10.
|
||
|
Aces can either count as 11 or 1, and it's called 'usable' at 11.
|
||
|
This game is placed with an infinite deck (or with replacement).
|
||
|
The game starts with each (player and dealer) having one face up and one
|
||
|
face down card.
|
||
|
|
||
|
The player can request additional cards (hit=1) until they decide to stop
|
||
|
(stick=0) or exceed 21 (bust).
|
||
|
|
||
|
After the player sticks, the dealer reveals their facedown card, and draws
|
||
|
until their sum is 17 or greater. If the dealer goes bust the player wins.
|
||
|
|
||
|
If neither player nor dealer busts, the outcome (win, lose, draw) is
|
||
|
decided by whose sum is closer to 21. The reward for winning is +1,
|
||
|
drawing is 0, and losing is -1.
|
||
|
|
||
|
The observation of a 3-tuple of: the players current sum,
|
||
|
the dealer's one showing card (1-10 where 1 is ace),
|
||
|
and whether or not the player holds a usable ace (0 or 1).
|
||
|
|
||
|
This environment corresponds to the version of the blackjack problem
|
||
|
described in Example 5.1 in Reinforcement Learning: An Introduction
|
||
|
by Sutton and Barto (1998).
|
||
|
https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html
|
||
|
"""
|
||
|
def __init__(self, natural=False):
|
||
|
self.action_space = spaces.Discrete(2)
|
||
|
self.observation_space = spaces.Tuple((spaces.Discrete(32),
|
||
|
spaces.Discrete(11),
|
||
|
spaces.Discrete(2)))
|
||
|
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
|
||
|
# Ref: http://www.bicyclecards.com/how-to-play/blackjack/
|
||
|
self.natural = natural
|
||
|
# Start the first game
|
||
|
self._reset()
|
||
|
|
||
|
def _step(self, action):
|
||
|
assert(self.action_space.contains(action))
|
||
|
if action: # hit: add a card to players hand and return
|
||
|
self.player.append(draw_card())
|
||
|
if is_bust(self.player):
|
||
|
done = True
|
||
|
reward = -1
|
||
|
else:
|
||
|
done = False
|
||
|
reward = 0
|
||
|
else: # stick: play out the dealers hand, and score
|
||
|
done = True
|
||
|
while sum_hand(self.dealer) < 17:
|
||
|
self.dealer.append(draw_card())
|
||
|
reward = cmp(score(self.player), score(self.dealer))
|
||
|
if self.natural and is_natural(self.player) and reward == 1:
|
||
|
reward = 1.5
|
||
|
return self._get_obs(), reward, done, {}
|
||
|
|
||
|
def _get_obs(self):
|
||
|
return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))
|
||
|
|
||
|
def _reset(self):
|
||
|
self.dealer = draw_hand()
|
||
|
self.player = draw_hand()
|
||
|
return self._get_obs()
|