# Doom Defend the Center with Policy Gradient(Reinforce)

The purpose of this scenario is to teach the agent that killing the monsters is GOOD and when monsters kill you is BAD. In addition, wasting amunition is not very good either. Agent is rewarded only for killing monsters so he has to figure out the rest for himself.

### Enviroment:

Map is a large circle. Player is spawned in the exact center. 5 melee-only, monsters are spawned along the wall. Monsters are killed after a single shot. After dying each monster is respawned after some time.Episode ends when the player dies.

### Actions:
 - turn left
 - turn right
 - shoot (attack)

### REWARDS:
 - +1 for killing a monster
 - -1 for death penalty

## Step 1: Import the libraries

In [None]:
import numpy as np
import random                # Handling random number generation
import time                  # Handling time calculation
import cv2

import torch
from vizdoom import *        # Doom Environment
import matplotlib.pyplot as plt
from IPython.display import clear_output
from collections import namedtuple, deque
import math

%matplotlib inline

In [None]:
import sys
sys.path.append('../../')
from algos.agents import ReinforceAgent
from algos.models import ActorCnn
from algos.preprocessing.stack_frame import preprocess_frame, stack_frame

## Step 2: Create our environment

Initialize the environment in the code cell below.


In [None]:
def create_environment():
    game = DoomGame()
    
    # Load the correct configuration
    game.load_config("doom_files/defend_the_center.cfg")
    
    # Load the correct scenario (in our case defend_the_center scenario)
    game.set_doom_scenario_path("doom_files/defend_the_center.wad")
    
    possible_actions  = np.identity(3,dtype=int).tolist()
    
    return game, possible_actions
game, possible_actions = create_environment()

In [None]:
# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

## Step 3: Viewing our Enviroment

In [None]:
print("The size of frame is: (", game.get_screen_height(), ", ", game.get_screen_width(), ")")
print("No. of Actions: ", possible_actions)
game.init()
plt.figure()
plt.imshow(game.get_state().screen_buffer.transpose(1, 2, 0))
plt.title('Original Frame')
plt.show()
game.close()

### Execute the code cell below to play Pong with a random policy.

In [None]:
def random_play():
    game.init()
    game.new_episode()
    score = 0
    while True:
        reward = game.make_action(possible_actions[np.random.randint(3)])
        done = game.is_episode_finished()
        score += reward
        time.sleep(0.01)
        if done:
            print("Your total score is: ", score)
            game.close()
            break
random_play()

## Step 4:Preprocessing Frame

In [None]:
game.init()
plt.figure()
plt.imshow(preprocess_frame(game.get_state().screen_buffer.transpose(1, 2, 0),(100, -12, -80, 4), 84), cmap="gray")
game.close()
plt.title('Pre Processed image')
plt.show()

## Step 5: Stacking Frame

In [None]:
def stack_frames(frames, state, is_new=False):
    frame = preprocess_frame(state, (100, -12, -80, 4), 84)
    frames = stack_frame(frames, frame, is_new)

    return frames
    

## Step 6: Creating our Agent

In [None]:
INPUT_SHAPE = (4, 84, 84)
ACTION_SIZE = len(possible_actions)
SEED = 0
GAMMA = 0.99        # discount factor
LR= 0.0001          # Learning rate

agent = ReinforceAgent(INPUT_SHAPE, ACTION_SIZE, SEED, device, GAMMA, LR, ActorCnn)

## Step 7: Watching untrained agent play

In [None]:

# watch an untrained agent
game.init()
score = 0
state = stack_frames(None, game.get_state().screen_buffer.transpose(1, 2, 0), True) 
while True:
    action, _ = agent.act(state)
    score += game.make_action(possible_actions[action])
    done = game.is_episode_finished()
    if done:
        print("Your total score is: ", score)
        break
    else:
        state = stack_frames(state, game.get_state().screen_buffer.transpose(1, 2, 0), False)
        
game.close()

## Step 8: Loading Agent
Uncomment line to load a pretrained agent

In [None]:
start_epoch = 0
scores = []
scores_window = deque(maxlen=20)

## Step 9: Train the Agent with DQN

In [None]:
def train(n_episodes=1000):
    """
    Params
    ======
        n_episodes (int): maximum number of training episodes
    """
    game.init()
    for i_episode in range(start_epoch + 1, n_episodes+1):
        game.new_episode()
        state = stack_frames(None, game.get_state().screen_buffer.transpose(1, 2, 0), True) 
        score = 0
        while True:
            action, log_prob = agent.act(state)
            reward = game.make_action(possible_actions[action])
            done = game.is_episode_finished()
            agent.step(log_prob, reward, done)
            score += reward
            if done:
                break
            else:
                next_state = stack_frames(state, game.get_state().screen_buffer.transpose(1, 2, 0), False)
                state = next_state
        agent.learn()
        scores_window.append(score)       # save most recent score
        scores.append(score)              # save most recent score
        
        clear_output(True)
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.plot(np.arange(len(scores)), scores)
        plt.ylabel('Score')
        plt.xlabel('Episode #')
        plt.show()
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
    game.close()
    return scores

In [None]:
scores = train(5000)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

## Step 10: Watch a Smart Agent!

In [None]:
game.init()
score = 0
state = stack_frames(None, game.get_state().screen_buffer.transpose(1, 2, 0), True) 
while True:
    action, _ = agent.act(state)
    score += game.make_action(possible_actions[action])
    done = game.is_episode_finished()
    if done:
        print("Your total score is: ", score)
        break
    else:
        state = stack_frames(state, game.get_state().screen_buffer.transpose(1, 2, 0), False)
        
game.close()