import gym

import matplotlib
import matplotlib.pyplot as plt

class LivePlot(object):
    def __init__(self, outdir, data_key='episode_rewards', line_color='blue'):
        """
        Liveplot renders a graph of either episode_rewards or episode_lengths

        Args:
            outdir (outdir): Monitor output file location used to populate the graph
            data_key (Optional[str]): The key in the json to graph (episode_rewards or episode_lengths).
            line_color (Optional[dict]): Color of the plot.
        """
        self.outdir = outdir
        self._last_data = None
        self.data_key = data_key
        self.line_color = line_color

        #styling options
        matplotlib.rcParams['toolbar'] = 'None'
        plt.style.use('ggplot')
        plt.xlabel("")
        plt.ylabel(data_key)
        fig = plt.gcf().canvas.set_window_title('')

    def plot(self):
        results = gym.monitoring.monitor.load_results(self.outdir)
        data =  results[self.data_key]

        #only update plot if data is different (plot calls are expensive)
        if data !=  self._last_data:
            self._last_data = data
            plt.plot(data, color=self.line_color)

            # pause so matplotlib will display
            # may want to figure out matplotlib animation or use a different library in the future
            plt.pause(0.000001)

if __name__ == '__main__':
    env = gym.make('CartPole-v0')
    outdir = '/tmp/random-agent-results'
    env.seed(0)
    env.monitor.start(outdir, force=True)

    # You may optionally include a LivePlot so that you can see
    # how your agent is performing.  Use plotter.plot() to update
    # the graph.
    plotter = LivePlot(outdir)

    episode_count = 100
    max_steps = 200
    reward = 0
    done = False

    for i in range(episode_count):
        ob = env.reset()

        for j in range(max_steps):
            ob, reward, done, _ = env.step(env.action_space.sample())
            if done:
                break

            plotter.plot()
            env.render()

    # Dump result info to disk
    env.monitor.close()