(Tensorflow1.0)强化学习实现游戏AI(Demo_1)

http://blog.topspeedsnail.com/archives/10459

在学习完这篇文章好，打算循序渐进的实现俄罗斯方块AI和斗地主AI,并且突破DQN，使用对抗神经网络来实现更强大的AI

下面代码实现的是上面博客的程序，发现了tensorflow1.0后的一个坑，tf.mul函数改名了，tf,multiply….改成全拼了

# -*- coding: utf-8 -*-

import pygame
from pygame.locals import *
import sys
import tensorflow as tf
import cv2
import random
import numpy as np
from collections import deque
print (tf.__version__)
BLACK     = (0  ,0  ,0  )
WHITE     = (255,255,255)

SCREEN_SIZE = [320,400]
BAR_SIZE = [20, 5]
BALL_SIZE = [15, 15]
MOVE_STAY=[1,0,0]
MOVE_LEFT=[0,1,0]
MOVE_RIGHT=[0,0,1]
LEARN_RATE=0.99
INIT_ESPTION=1.0
FINAL_ESPTION=0.05
EXPLORE=50000
OBSERVE=5000
REPLAY_MEMORY=500000
BATCH=100
class Game(object):
    def __init__(self):
        pygame.init()
        self.clock = pygame.time.Clock()
        self.screen = pygame.display.set_mode(SCREEN_SIZE)
        pygame.display.set_caption('Simple Game')

        self.ball_pos_x = SCREEN_SIZE[0]//2 - BALL_SIZE[0]/2
        self.ball_pos_y = SCREEN_SIZE[1]//2 - BALL_SIZE[1]/2
        # ball移动方向
        self.ball_dir_x = -1 # -1 = left 1 = right  
        self.ball_dir_y = -1 # -1 = up   1 = down
        self.ball_pos = pygame.Rect(self.ball_pos_x, self.ball_pos_y, BALL_SIZE[0], BALL_SIZE[1])

        self.score = 0
        self.bar_pos_x = SCREEN_SIZE[0]//2-BAR_SIZE[0]//2
        self.bar_pos = pygame.Rect(self.bar_pos_x, SCREEN_SIZE[1]-BAR_SIZE[1], BAR_SIZE[0], BAR_SIZE[1])

    def bar_move_left(self):
        self.bar_pos_x = self.bar_pos_x - 2
    def bar_move_right(self):
        self.bar_pos_x = self.bar_pos_x + 2

    def run(self,action):
       # pygame.mouse.set_visible(0) # make cursor invisible

       # bar_move_left = False
        #bar_move_right = False
        while True:
            ''' for event in pygame.event.get():
                if event.type == QUIT:
                    pygame.quit()
                    sys.exit()
                elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 1:  # 鼠标左键按下(左移)
                    bar_move_left = True
                elif event.type == pygame.MOUSEBUTTONUP and event.button == 1: # 鼠标左键释放
                    bar_move_left = False
                elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 3: #右键
                    bar_move_right = True
                elif event.type == pygame.MOUSEBUTTONUP and event.button == 3:
                    bar_move_right = False
'''
            if action == MOVE_LEFT:
                self.bar_move_left()
            if action==MOVE_RIGHT:
                self.bar_move_right()
            else :
                pass
            if self.bar_pos_x<0:
                self.bar_pos_x=0
            
            if self.bar_pos_x>SCREEN_SIZE[0]-BAR_SIZE[0]:
                self.bar_pos_x=SCREEN_SIZE[0]-BAR_SIZE[0]
            
            self.screen.fill(BLACK)
            
            self.bar_pos.left = self.bar_pos_x
            pygame.draw.rect(self.screen, WHITE, self.bar_pos)

            self.ball_pos.left += self.ball_dir_x * 2
            self.ball_pos.bottom += self.ball_dir_y * 3
            pygame.draw.rect(self.screen, WHITE, self.ball_pos)

            if self.ball_pos.top <= 0 or self.ball_pos.bottom >= (SCREEN_SIZE[1] - BAR_SIZE[1]+1):
                self.ball_dir_y = self.ball_dir_y * -1
            if self.ball_pos.left <= 0 or self.ball_pos.right >= (SCREEN_SIZE[0]):
                self.ball_dir_x = self.ball_dir_x * -1

            reward=0
            if self.bar_pos.top <= self.ball_pos.bottom and (self.bar_pos.left < self.ball_pos.right and self.bar_pos.right > self.ball_pos.left):
                self.score += 1
                reward=1
                print("Score: ", self.score, end='\r')
            elif self.bar_pos.top <= self.ball_pos.bottom and (self.bar_pos.left > self.ball_pos.right or self.bar_pos.right < self.ball_pos.left):
                self.score=0                
                print("Game Over: ", self.score)
                
                reward=-1

            pygame.display.update()
            
            self.clock.tick(60)
            
            MyGame_image=pygame.surfarray.array3d(pygame.display.get_surface())
            return reward ,MyGame_image
        
output=3
input_image=tf.placeholder("float", [None,80,100,4])

action=tf.placeholder("float",[None,3])


def convolutional_neural_network(input_image):
    weights = {'w_conv1':tf.Variable(tf.zeros([8, 8, 4, 32])),
               'w_conv2':tf.Variable(tf.zeros([4, 4, 32, 64])),
               'w_conv3':tf.Variable(tf.zeros([3, 3, 64, 64])),
               'w_fc4':tf.Variable(tf.zeros([3456, 784])),
               'w_out':tf.Variable(tf.zeros([784, output]))}

    biases = {'b_conv1':tf.Variable(tf.zeros([32])),
              'b_conv2':tf.Variable(tf.zeros([64])),
              'b_conv3':tf.Variable(tf.zeros([64])),
              'b_fc4':tf.Variable(tf.zeros([784])),
              'b_out':tf.Variable(tf.zeros([output]))}

    conv1 = tf.nn.relu(tf.nn.conv2d(input_image, weights['w_conv1'], strides = [1, 4, 4, 1], padding = "VALID") + biases['b_conv1'])
    conv2 = tf.nn.relu(tf.nn.conv2d(conv1, weights['w_conv2'], strides = [1, 2, 2, 1], padding = "VALID") + biases['b_conv2'])
    conv3 = tf.nn.relu(tf.nn.conv2d(conv2, weights['w_conv3'], strides = [1, 1, 1, 1], padding = "VALID") + biases['b_conv3'])
    conv3_flat = tf.reshape(conv3, [-1, 3456])
    fc4 = tf.nn.relu(tf.matmul(conv3_flat, weights['w_fc4']) + biases['b_fc4'])

    output_layer = tf.matmul(fc4, weights['w_out']) + biases['b_out']
    return output_layer


def train_neural_network(imput_image):
    predict_action=convolutional_neural_network(input_image)
    
    argmax=tf.placeholder("float",[None,output])
    
    gt=tf.placeholder("float",[None])
    
    action = tf.reduce_sum(tf.multiply(predict_action, argmax), reduction_indices = 1)   
    
    cost=tf.reduce_mean(tf.square(action-gt))
    optimizer=tf.train.AdadeltaOptimizer(1e-6).minimize(cost)
    
    game=Game()
    D=deque()
    
    _,image=game.run(MOVE_STAY)
    
    image=cv2.cvtColor(cv2.resize(image,(100,80)),cv2.COLOR_BGR2GRAY)
    
    ret,image=cv2.threshold(image,1,255,cv2.THRESH_BINARY)
    
    input_image_data=np.stack((image,image,image,image),axis=2)
    
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        
        saver=tf.train.Saver()
        n=0
        
        epsilon=INIT_ESPTION
        while True:
            action_t=predict_action.eval(feed_dict = {input_image : [input_image_data]})[0]
            
            argmax_t=np.zeros([output],dtype=np.int)
            
            if(random.random()<=INIT_ESPTION):
                maxIndex=random.randrange(output)
            
            else:
                maxIndex=np.armax(action_t)

            argmax_t[maxIndex]=1
            
            if epsilon>FINAL_ESPTION:
                epsilon-=(INIT_ESPTION-FINAL_ESPTION)/EXPLORE
            
            reward,image=game.run(list(argmax_t))
            image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY)
            
            ret,image=cv2.threshold(image,1,255,cv2.THRESH_BINARY)
            image=np.reshape(image,(80,100,1))
            
            input_image_datal=np.append(image,input_image_data[:,:,0:3],axis=2)
            
            D.append((input_image_data,argmax_t,reward,input_image_datal))
            
            
            if len(D) >REPLAY_MEMORY:
                D.popleft()
            
            if n >OBSERVE:
                minibatch=random.sample(D,BATCH)
                input_image_data_batch=[d[0] for d in minibatch]
                argmax_batch=[d[1] for d in minibatch]
                reward_batch=[d[2] for d in minibatch]
                input_image_data1_batch=[d[3] for d in minibatch]
                gt_batch=[]
                
                out_batch=predict_action.eval(feed_dict={input_image:input_image_data1_batch})
                
                for i in range(0,len(minibatch)):
                    gt_batch.append(reward_batch[i]+LEARN_RATE*np.max(out_batch[i]))
                
                optimizer.run(feed_dict={gt:gt_batch,argmax:argmax_batch,input_image:input_image_data_batch})
            
            input_image_data=input_image_datal
            n=n+1
            
            if n% 10000 ==0:
                saver.save(sess,'C:\\Users\\hasee\\game.cpk',global_step=n)
                
                print(n,"epsilon:",epsilon," ","action:",maxIndex," ","reward: ",reward)
                
train_neural_network(input_image)
原文链接：https://blog.csdn.net/weixin_36368407/article/details/55518652
你可能也喜欢