• 大小: 5KB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2021-06-15
  • 语言: Matlab
  • 标签: 决策树  matlab  

资源简介

对于分类和回归两种 离散属性和连续属性的处理 缺失值的处理

资源截图

代码片段和文件信息

function [tree_model] = buildDecisionTree(train_datatrain_labelfeat_typefeature_indexprediction_type)
%prediction_type means the object type classification or regression
%0 means classification and 1 means regression
num_of_feat = length(feat_type);
num_of_data = size(train_data1);

if (num_of_feat == 1) || (length(unique(train_label)) == 1) || num_of_data < 5
    %type域表示节点类型,0表示叶节点,1表示非叶结点
    tree_model.type = 0;
    if prediction_type == 0
        %设置叶节点的预测值
        tree_model.pre = FindMostFeat(train_label);
    else
        tree_model.pre = mean(train_label);
    end
    %set the other fileds not be used
    tree_model.split_feat = -1;
    tree_model.split_value = -1;
    tree_model.child = [];
else
    [split_feat split_value] = FindSplitFeat(train_data train_label feat_type0);
    tree_model.split_feat = feature_index(split_feat);
    tree_model.type = 1;
    tree_model.pre = FindMostFeat(train_label);
    if feat_type(split_feat) == 0
        %numeric feature
        tree_model.split_value = split_value;
        train_set1 = train_data(train_data(:split_feat) < split_value:);
        train_set2 = train_data(train_data(:split_feat) >= split_value:);
        train_set1(:split_feat) = [];
        train_set2(:split_feat) = [];
        if isempty(train_set1) || isempty(train_set2)
            tree_model.type = 0;
            if prediction_type == 0
            %设置叶节点的预测值
                tree_model.pre = FindMostFeat(train_label);
            else
                tree_model.pre = mean(train_label);
            end
            %set the other fileds not be used
            tree_model.split_feat = -1;
            tree_model.split_value = -1;
            tree_model.child = [];
            return
        end
        train_label1 = train_label(train_data(:split_feat) < split_value:);
        train_label2 = train_label(train_data(:split_feat) >= split_value:);
        feat_type(split_feat) = [];
        feature_index(split_feat) = [];
        tree_model.child = [buildDecisionTree(train_set1train_label1feat_typefeature_indexprediction_type)buildDecisionTree(train_set2train_label2feat_typefeature_indexprediction_type)];
    else
        %nominal feature
        current_feat_value = split_value;
        num_of_feat_value = length(current_feat_value);
        tree_model.split_value = split_value;
        tree_model.child = [];
        feat_type(split_feat) = [];
        feature_index(split_feat) = [];
        tree_child_empty = 0;
        for i = 1:num_of_feat_value
            sub_train_set = train_data(train_data(:split_feat) == current_feat_value(i):);
            sub_train_set(:split_feat) = [];
            if isempty(sub_train_set)
                tree_child_empty = 1;
            end
        end
        if tree_child_empty
            tree_model.type = 0;
            if prediction_type == 0
            %设置叶节点的预测值
                tree_model.pre = Fi

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     文件        1405  2013-10-16 16:58  DecisionTreePrediction.m
     文件        3802  2013-10-16 16:57  buildDecisionTree.m
     文件         371  2013-10-05 13:29  CalcEntropy.m
     文件         259  2013-10-06 21:46  CalcLeaves.m
     文件        1004  2013-10-16 14:18  DecisionTree.m
     文件         391  2013-10-06 11:33  FindMostFeat.m
     文件        4866  2013-10-06 20:57  FindSplitFeat.m
     文件         532  2013-10-05 14:31  choose_split_for_numeric.m
     文件        1038  2013-10-16 14:30  main.m

评论

共有 条评论