资源简介

Playing Flappy Bird Using Deep Reinforcement Learning (Based on Deep Q Learning DQN)

资源截图

代码片段和文件信息

# -----------------------------
# File: Deep Q-Learning Algorithm
# Author: Flood Sung
# Date: 2016.3.21
# -----------------------------

import tensorflow as tf 
import numpy as np 
import random
from collections import deque 

# Hyper Parameters:
frame_PER_ACTION = 1
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 100. # timesteps to observe before training
EXPLORE = 200000. # frames over which to anneal epsilon
FINAL_EPSILON = 0#0.001 # final value of epsilon
INITIAL_EPSILON = 0#0.01 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH_SIZE = 32 # size of minibatch
UPDATE_TIME = 100

try:
    tf.mul
except:
    # For new version of tensorflow
    # tf.mul has been removed in new version of tensorflow
    # Using tf.multiply to replace tf.mul
    tf.mul = tf.multiply

class BrainDQN:

def __init__(selfactions):
# init replay memory
self.replayMemory = deque()
# init some parameters
self.timeStep = 0
self.epsilon = INITIAL_EPSILON
self.actions = actions
# init Q network
self.stateInputself.QValueself.W_conv1self.b_conv1self.W_conv2self.b_conv2self.W_conv3self.b_conv3self.W_fc1self.b_fc1self.W_fc2self.b_fc2 = self.createQNetwork()

# init Target Q Network
self.stateInputTself.QValueTself.W_conv1Tself.b_conv1Tself.W_conv2Tself.b_conv2Tself.W_conv3Tself.b_conv3Tself.W_fc1Tself.b_fc1Tself.W_fc2Tself.b_fc2T = self.createQNetwork()

self.copyTargetQNetworkOperation = [self.W_conv1T.assign(self.W_conv1)self.b_conv1T.assign(self.b_conv1)self.W_conv2T.assign(self.W_conv2)self.b_conv2T.assign(self.b_conv2)self.W_conv3T.assign(self.W_conv3)self.b_conv3T.assign(self.b_conv3)self.W_fc1T.assign(self.W_fc1)self.b_fc1T.assign(self.b_fc1)self.W_fc2T.assign(self.W_fc2)self.b_fc2T.assign(self.b_fc2)]

self.createTrainingMethod()

# saving and loading networks
self.saver = tf.train.Saver()
self.session = tf.InteractiveSession()
self.session.run(tf.initialize_all_variables())
checkpoint = tf.train.get_checkpoint_state(“saved_networks“)
if checkpoint and checkpoint.model_checkpoint_path:
self.saver.restore(self.session checkpoint.model_checkpoint_path)
print (“Successfully loaded:“ checkpoint.model_checkpoint_path)
else:
print (“Could not find old network weights“)


def createQNetwork(self):
# network weights
W_conv1 = self.weight_variable([88432])
b_conv1 = self.bias_variable([32])

W_conv2 = self.weight_variable([443264])
b_conv2 = self.bias_variable([64])

W_conv3 = self.weight_variable([336464])
b_conv3 = self.bias_variable([64])

W_fc1 = self.weight_variable([1600512])
b_fc1 = self.bias_variable([512])

W_fc2 = self.weight_variable([512self.actions])
b_fc2 = self.bias_variable([self.actions])

# input layer

stateInput = tf.placeholder(“float“[None80804])

# hidden layers
h_conv1 = tf.nn.relu(self.conv2d(stateInputW_conv14) + b_conv1)
h_pool1 = self.max_pool_2x2(h_conv1)


 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2018-06-14 11:41  DRL-FlappyBird-master\
     目录           0  2018-06-14 11:41  DRL-FlappyBird-master\assets\
     目录           0  2018-06-14 11:41  DRL-FlappyBird-master\assets\audio\
     文件       17483  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\die.ogg
     文件      194894  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\die.wav
     文件       15670  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\hit.ogg
     文件       96590  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\hit.wav
     文件       13235  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\point.ogg
     文件      177486  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\point.wav
     文件       13697  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\swoosh.ogg
     文件      354638  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\swoosh.wav
     文件        7728  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\wing.ogg
     文件       29902  2017-12-04 03:06  DRL-FlappyBird-master\assets\audio\wing.wav
     目录           0  2018-06-14 11:41  DRL-FlappyBird-master\assets\sprites\
     文件        2879  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\0.png
     文件        2868  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\1.png
     文件        2888  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\2.png
     文件        2877  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\3.png
     文件        2898  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\4.png
     文件        2888  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\5.png
     文件        2885  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\6.png
     文件        2896  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\7.png
     文件        2878  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\8.png
     文件        2892  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\9.png
     文件        4030  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\background-black.png
     文件         664  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\base.png
     文件        5042  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\pipe-green.png
     文件        2948  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\redbird-downflap.png
     文件        2949  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\redbird-midflap.png
     文件        2944  2017-12-04 03:06  DRL-FlappyBird-master\assets\sprites\redbird-upflap.png
     文件        6883  2017-12-04 03:06  DRL-FlappyBird-master\BrainDQN_Nature.py
............此处省略15个文件信息

评论

共有 条评论