• 大小: 19KB
    文件类型: .rar
    金币: 1
    下载: 0 次
    发布日期: 2021-06-02
  • 语言: 其他
  • 标签:

资源简介

决策树算法(ID3和C45),两个算法分开写的,包含有数据集。

资源截图

代码片段和文件信息

#-*-coding:utf-8-*-
from numpy import *
import math
import copy
import cPickle as pickle

class C45DTree(object):

    def __init__(self):
        self.tree={}
        self.dataSet=[]
        self.labels=[]
    #数据导入函数
    def loadDataSet(selfpathlabels):
        recordlist=[]
        fp=open(path“rb“)
        content=fp.read()
        fp.close()
        rowlist=content.splitlines()
        recordlist=[row.split(“\t“) for row in rowlist if row.strip()]
        self.dataSet=recordlist
        self.labels=labels
    #执行决策树函数
    def train(self):
            labels=copy.deepcopy(self.labels)
            self.tree=self.buildTree(self.dataSetlabels)

    #构建决策树1111111111111111111111111111111111111111
    def buildTree(selfdataSetlabels):
        cateList=[data[-1] for data in dataSet]
        if cateList.count(cateList[0])==len(cateList):
            return cateList[0]
        if len(dataSet[0])==1:
            return self.maxCate(cateList)
        #算法核心

        bestFeatfeatValueList=self.getBestFeat(dataSet)
        bestFeatLabel=labels[bestFeat]
        tree={bestFeatLabel:{}}
        del(labels[bestFeat])
        for value in featValueList:
            sublabels=labels[:]
            #按最优特征列和值分割数据集
            splitDataset=self.splitDataSet(dataSetbestFeatvalue)
            subTree=self.buildTree(splitDatasetsublabels)
            tree[bestFeatLabel][value]=subTree
        return tree
    #计算出现次数最多的类别标签
    def maxCate(selfcatelist):
        items=dict([(catelist.count(i)i) for i in catelist])
        return items[max(items.keys())]
    #计算最优特征11111111111111111111111111
    def getBestFeat(selfdataSet):
        Num_Feats=len(dataSet[0][:-1])
        totality=len(dataSet)
        baseEntropy=self.computeEntropy(dataSet)
        ConditionEntroy=[]
        slpitInfo=[]
        allFeatVList=[]
        for f in xrange(Num_Feats):
            featList=[example[f] for example in dataSet]
            [splitIfeatureValueList]=self.computeSplitInfo(featList)
            allFeatVList.append(featureValueList)
            slpitInfo.append(splitI)
            resultGain=0.0
            for value in featureValueList:
                subSet=self.splitDataSet(dataSetfvalue)
                appearNum=float(len(subSet))
                subEntropy=self.computeEntropy(subSet)
                resultGain+=(appearNum/totality)*subEntropy
            ConditionEntroy.append(resultGain)
        infoGainArray=baseEntropy*ones(Num_Feats)-array(ConditionEntroy)
        infoGainRatio=infoGainArray/array(slpitInfo)
        bestFeatureIndex=argsort(-infoGainRatio)[0]
        return bestFeatureIndexallFeatVList[bestFeatureIndex]

    #计算划分信息
    def computeSplitInfo(selffeatureVList):
        numEntries=len(featureVList)
        featureVauleSetList=list(set(featureVList))
        valueCounts=[featureVList.count(featVec) for featVec in featureVauleSetList]
        pList=[float(item)/n

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件        398  2018-04-22 20:45  决策树(ID3和C45)\C45DTree\.idea\C4.5DTree.iml

     文件        213  2018-04-22 20:45  决策树(ID3和C45)\C45DTree\.idea\misc.xml

     文件        782  2018-04-23 09:02  决策树(ID3和C45)\C45DTree\.idea\modules.xml

     文件      30492  2018-04-23 10:05  决策树(ID3和C45)\C45DTree\.idea\workspace.xml

     文件       5446  2018-04-23 09:21  决策树(ID3和C45)\C45DTree\C45DTree.py

     文件       4963  2018-04-23 09:21  决策树(ID3和C45)\C45DTree\C45DTree.pyc

     文件        807  2018-04-25 18:12  决策树(ID3和C45)\C45DTree\C45Test.py

     文件        303  2018-04-22 21:22  决策树(ID3和C45)\C45DTree\C45Train.py

     文件        195  2018-04-25 18:19  决策树(ID3和C45)\C45DTree\data.tree

     文件       5066  2018-04-25 18:05  决策树(ID3和C45)\C45DTree\dataset.dat

     文件         54  2018-04-23 09:26  决策树(ID3和C45)\C45DTree\test_data.dat

     文件        455  2018-04-25 18:05  决策树(ID3和C45)\ID3DTree\.idea\ID3DTree.iml

     文件        213  2018-04-22 19:58  决策树(ID3和C45)\ID3DTree\.idea\misc.xml

     文件        405  2018-04-25 18:05  决策树(ID3和C45)\ID3DTree\.idea\modules.xml

     文件      26132  2018-04-25 19:31  决策树(ID3和C45)\ID3DTree\.idea\workspace.xml

     文件        195  2018-04-25 18:16  决策树(ID3和C45)\ID3DTree\data.tree

     文件       5066  2018-04-25 18:04  决策树(ID3和C45)\ID3DTree\dataset.dat

     文件       3887  2018-04-22 20:43  决策树(ID3和C45)\ID3DTree\ID3DTree.py

     文件       4232  2018-04-22 20:43  决策树(ID3和C45)\ID3DTree\ID3DTree.pyc

     文件        806  2018-04-25 18:16  决策树(ID3和C45)\ID3DTree\ID3Test.py

     文件        304  2018-04-25 17:59  决策树(ID3和C45)\ID3DTree\ID3Train.py

     目录          0  2018-05-01 21:36  决策树(ID3和C45)\C45DTree\.idea

     目录          0  2018-05-01 21:36  决策树(ID3和C45)\ID3DTree\.idea

     目录          0  2018-05-01 21:36  决策树(ID3和C45)\C45DTree

     目录          0  2018-05-01 21:36  决策树(ID3和C45)\ID3DTree

     目录          0  2018-05-01 21:36  决策树(ID3和C45)

----------- ---------  ---------- -----  ----

                90414                    26


评论

共有 条评论