资源简介

Multiboost的Python实现

资源截图

代码片段和文件信息

__author__ = ‘Thesharing‘

import math
import random
import numpy as np
import pandas
from sklearn.base import clone ClassifierMixin RegressorMixin
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import accuracy_score


class MultiBoostClassifier:
    def __init__(self base_estimator=DecisionTreeClassifier() n_estimators=10):

        pandas.options.mode.chained_assignment = None

        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.beta = np.empty(0)
        self.error_rate = np.empty(0)
        self.data_size = 0
        self.y_range = np.empty(0)
        self._estimators = np.empty(0)
        self._class_num = 0

    def fit(self X: pandas.Dataframe y):
        # Initialization
        self._estimators = np.empty(self.n_estimators dtype=ClassifierMixin)
        self.beta = np.empty([self.n_estimators])
        self.error_rate = np.empty([self.n_estimators])
        self.data_size = X.shape[0]
        if isinstance(y pandas.Dataframe):
            y = y.values.reshape((-1))
        self._get_y_dict(y)
        y = self._transform_y(y)
        self.y_range = np.unique(y)
        # S‘ = S with instance weights assigned to be 1.
        sample_X sample_y sample_weight = self._reset_sample_and_standardize(X y mode=‘uniform‘)
        # set k = 1
        k = 1
        # for t = 1 to T {
        for t in range(self.n_estimators):
            # if Ik = t then
            if self._get_iteration_number(k) == t:
                # reset S‘ to random weights drawn from the continuous Poisson distribution.
                # standardize S‘ to sum to n.
                sample_X sample_y sample_weight = self._reset_sample_and_standardize(X y)
                # increment k
                k = k + 1
            # Ct = baseLearn(S‘)
            temp = clone(self.base_estimator).fit(sample_X sample_y sample_weight)
            self._estimators[t] = temp
            estimate_y = temp.predict(sample_X)
            self.error_rate[t] = 1 - accuracy_score(sample_y estimate_y sample_weight=sample_weight)
            while self.error_rate[t] > 1 - 1. / self._class_num:
                # reset S‘ to random weights drawn from the continuous Poisson distribution.
                # standardize S‘ to sum to n.
                sample_X sample_y sample_weight = self._reset_sample_and_standardize(X y)
                # increment k
                k = k + 1
                # go to step 8
                temp.fit(sample_X sample_y sample_weight)
                self._estimators[t] = temp
                estimate_y = temp.predict(sample_X)
                self.error_rate[t] = 1 - accuracy_score(sample_y estimate

评论

共有 条评论