• 大小: 780KB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2021-05-13
  • 语言: Python
  • 标签: q-learning  

资源简介

一个用python语言来实现的 q-learning实例,供学习参考。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。

资源截图

代码片段和文件信息

#!/usr/bin/env python2
import random

class Agent:
    def __init__(self MDP):
        self.MDP = MDP
        self.state = None

    def executeAction(self a):
        s2 r = self.MDP.executeAction(a self.state)
        return s2 r

    def selectRandomAction(self):
        return random.choice(self.MDP.A_s[self.state])

    def selectBestActionFromQTable(self s Q):
        # discover what is the best possible value considering
        # all possible actions for the state

        # FIXME: usar a tabela V
        maxValue = 0.0
        for a in self.MDP.A_s[self.state]:
            maxValue = max(maxValue Q[s][a])

        # obtain all the actions whose value equals the maximum
        A = []
        for a in self.MDP.A_s[self.state]:
            # FIXME: make it a parameter
            delta = 1e-10
            if abs(Q[s][a] - maxValue) <= delta:
                A.append(a)

        # obtain a random action from all the possible ones
        if len(A) > 0:
            a = random.choice(A)
        else:
            a = ‘---‘

        return a

    def selectBestActionFromProbPolicy(self s Pi):
        P = []
        acum = 0.0

        # FIXME: eliminar a necessidade de ter que rodar a soma
        # cumulativa a toda chamada
        #
        # Fazer essa checagem na leitura da politica agregar uma lista
        # de pares ordenados ja com a probabilidade acumulada.
        # Sortear e somente buscar na lista ate encontrar a acao da
        # vez.
        #
        # PROBLEMA: a politica pode mudar!  Alternativa: mudar a forma
        # como a politica e carregada: Fazer chegar aqui ja uma Pi[s]
        # = [(action cumsum)]
        
        for a in Pi[s].iterkeys():
            if Pi[s][a] > 0.0:
                p = []
                p.append(a)
                acum = acum + Pi[s][a]
                p.append(acum)
                P.append(p)

        #sorteia um numero no intervalo [0 1]
        x = random.random()

        for p in P:
            if x <= p[1]:
                a = p[0]
                break

        return a

    def selectBestAction(self s source = None Q = None Pi = None):
        if source == ‘Q-Table‘:
            a = self.selectBestActionFromQTable(s Q)
        elif source == ‘Probabilistic Policy‘:
            a = self.selectBestActionFromProbPolicy(s Pi)
        else:
            ‘ERROR: wrong source (‘ + source
            sys.exit(1)

        return a

    def setInitialState(self):
        if self.MDP.P == None:
            self.state = random.choice(self.MDP.S)
        else:
            self.state = self.setInitialStateByProb()

    def setInitialStateByProb(self):
        x = random.random()

        for p in self.MDP.P:
            if x <= p[1]:
                s = p[0]
                break

        return s

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2012-09-20 23:06  Q-Learning-in-Python-master\
     文件         118  2012-09-20 23:06  Q-Learning-in-Python-master\.gitignore
     文件           0  2012-09-20 23:06  Q-Learning-in-Python-master\README
     目录           0  2012-09-20 23:06  Q-Learning-in-Python-master\src\
     文件        2812  2012-09-20 23:06  Q-Learning-in-Python-master\src\Agent.py
     文件        4312  2012-09-20 23:06  Q-Learning-in-Python-master\src\MDP.py
     文件         929  2012-09-20 23:06  Q-Learning-in-Python-master\src\PRQL-interval.sh
     文件       11108  2012-09-20 23:06  Q-Learning-in-Python-master\src\PRQLearning.py
     文件         866  2012-09-20 23:06  Q-Learning-in-Python-master\src\QL-interval.sh
     文件        4539  2012-09-20 23:06  Q-Learning-in-Python-master\src\QLearning.py
     文件        2157  2012-09-20 23:06  Q-Learning-in-Python-master\src\QabLearning.py
     文件        6588  2012-09-20 23:06  Q-Learning-in-Python-master\src\RL-PRQL.py
     文件        1072  2012-09-20 23:06  Q-Learning-in-Python-master\src\RL-PRQL.sh
     文件        4277  2012-09-20 23:06  Q-Learning-in-Python-master\src\RL-QL.py
     文件         893  2012-09-20 23:06  Q-Learning-in-Python-master\src\RL-QL.sh
     文件        1072  2012-09-20 23:06  Q-Learning-in-Python-master\src\RL.sh
     文件        1138  2012-09-20 23:06  Q-Learning-in-Python-master\src\meanError.py
     文件         302  2012-09-20 23:06  Q-Learning-in-Python-master\src\prepareFolders.py
     目录           0  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\
     目录           0  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\Danny\
     目录           0  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\Danny\OOo\
     文件        9985  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\Danny\OOo\OOoLib.py
     文件           0  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\Danny\OOo\__init__.py
     文件           0  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\Danny\__init__.py
     文件        1695  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\apagaCelulasEmBranco.py
     文件          77  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\changeExtension.sh
     文件         220  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\compactaArquivos.sh
     文件         221  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\compactaArquivosSecundarios.sh
     文件          92  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\delBackupFiles.sh
     文件         188  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\delOutputFiles.sh
     文件         414  2012-09-20 23:06  Q-Learning-in-Python-master\src\tools\descompacta.sh
............此处省略63个文件信息

评论

共有 条评论