谁能帮忙帮下面这个Adaboost算法的matlab代码加个详细注释
发布网友
发布时间:2023-06-14 19:15
我来回答
共1个回答
热心网友
时间:2024-05-30 16:40
clear all
clc
tr_n=200; %the population of the train set
te_n=200; %the population of the test set
weak_learner_n=20; %the population of the weak_learner
tr_set=[1,5;2,3;3,2;4,6;4,7;5,9;6,5;6,7;8,5;8,8];
te_se=[1,5;2,3;3,2;4,6;4,7;5,9;6,5;6,7;8,5;8,8];
tr_labels=[2,2,1,1,2,2,1,2,1,1];
te_labels=[2,2,1,1,2,2,1,2,1,1];
figure;
subplot(2,2,1);
hold on;axis square;
indices=tr_labels==1;
plot(tr_set(indices,1),tr_set(indices,2),'b*');
indices=~indices;
plot(tr_set(indices,1),tr_set(indices,2),'r*');
title('Training set');
subplot(2,2,2);
hold on;axis square;
indices=te_labels==1;
plot(te_set(indices,1),te_set(indices,2),'b*')3
;
indices=~indices;
plot(te_set(indices,1),te_set(indices,2),'r*');
title('Training set');
% Training and testing error rates
tr_error=zeros(1,weak_learner_n);
te_error=zeros(1,weak_learner_n);
for i=1:weak_learner_n
adaboost_model=adaboost_tr(@threshold_tr,@threshold_te,tr_set,tr_labels,i);
第 1 页
无标题
[L_tr,hits_tr]=adaboost_te(adaboost_model,@threshold_te,te_set,te_labels);
tr_error(i)=(tr_n-hits_tr)/tr_n;
[L_te,hits_te]=adaboost_te(adaboost_model,@threshold_te,te_set,te_labels);
te_error(i)=(te_n-hits_te)/te_n;
end
subplot(2,2,3);
plot(1:weak_learner_n,tr_error);
axis([1,weak_learner_n,0,1]);
title('Training Error');
xlabel('weak classifier number');
ylabel('error rate');
grid on;
subplot(2,2,4);axis square;
plot(1:weak_learner_n,te_error);
axis([1,weak_learner_n,0,1]);
title('Testing Error');
xlabel('weak classifier number');
ylabel('error rate');
grid on;
这里需要另外分别撰写两个函数,其中一个为生成adaboost模型的训练函数,另外为测试测
试样本的测试函数。代码如下:
function
adaboost_model=adaboost_tr(tr_func_handle,te_func_handle,train_set,labels,no_of_
hypothesis)
% 训练函数
adaboost_model = struct('weights',zeros(1,no_of_hypothesis),...
'parameters',[]); %cell(1,no_of_hypothesis));
sample_n = size(train_set,1);
samples_weight = ones(sample_n,1)/sample_n;
for turn=1:no_of_hypothesis
adaboost_model.parameters{turn} =
tr_func_handle(train_set,samples_weight,labels);
[L,hits,error_rate] = te_func_handle(adaboost_model.parameters{turn},...
train_set,samples_weight,labels);
if(error_rate==1)
error_rate=1-eps;
elseif(error_rate==0)
error_rate=eps;
end
% The weight of the turn-th weak classifier
adaboost_model.weights(turn) = log10((1-error_rate)/error_rate);
C=likelihood2class(L);
t_labeled=(C==labels); % true labeled samples
% Importance of the true classified samples is decreased for the next weak
classifier
samples_weight(t_labeled) = samples_weight(t_labeled)*...
((error_rate)/(1-error_rate));
% Normalization
第 2 页
无标题
samples_weight = samples_weight/sum(samples_weight);
end
% Normalization
adaboost_model.weights=adaboost_model.weights/sum(adaboost_model.weights);
-------------
function [L,hits]=adaboost_te(adaboost_model,te_func_handle,test_set,...
true_labels)
%测试函数
hypothesis_n=length(adaboost_model.weights);
sample_n=size(test_set,1);
class_n=length(unique(true_labels));
temp_L=zeros(sample_n,class_n,hypothesis_n);
for i=1:hypothesis_n
[temp_L(:,:,i),hits,error_rate]=te_func_handle(adaboost_model.parameters{i},...
test_set,ones(sample_n,1),true_labels);
temp_L(:,:,i)=temp_L(:,:,i)*adaboost_model.weights(i);
end
L=sum(temp_L,3);
hits=sum(likelihood2class(L)==true_labels);
-------
其中上面函数由于体积太大,另外还需要分别撰写两个阈值函数和一个隶属分配函数。
function model=threshold_tr(train_set,sample_weights,labels)
% 训练阈值函数
model=struct('min_error',[],'min_error_thr',[],'pos_neg',[],'dim',[]);
sample_n=size(train_set,1);
min_error=sum(sample_weights);
min_error_thr=0;
pos_neg='pos';
% for each dimension
for dim=1:size(train_set,2)
sorted=sort(train_set(:,dim),1,'ascend');
% for each interval in the specified dimension
for i=1:(sample_n+1)
if(i==1)
thr=sorted(1)-0.5;
elseif(i==sample_n+1)
thr=sorted(sample_n)+0.5;
else
thr=(sorted(i-1)+sorted(i))/2;
end
ind1=train_set(:,dim)<thr;
ind2=~ind1;
tmp_err=sum(sample_weights((labels.*ind1)==2))+sum(sample_weights((labels.*ind2)
==1));
if(tmp_err<min_error)
第 3 页
无标题
min_error=tmp_err;
min_error_thr=thr;
pos_neg='pos';
model.dim=dim;
end
ind1=train_set(:,dim)<thr;
ind2=~ind1;
tmp_err=sum(sample_weights((labels.*ind1)==1))+sum(sample_weights((labels.*ind2)
==2));
if(tmp_err<min_error)
min_error=tmp_err;
min_error_thr=thr;
pos_neg='neg';
model.dim=dim;
end
end
end
model.min_error=min_error;
model.min_error_thr=min_error_thr;
model.pos_neg=pos_neg;
function
[L,hits,error_rate]=threshold_te(model,test_set,sample_weights,true_labels)
% 测试阈值函数
feat=test_set(:,model.dim);
if(strcmp(model.pos_neg,'pos'))
ind=(feat>model.min_error_thr)+1;
else
ind=(feat<model.min_error_thr)+1;
end
hits=sum(ind==true_labels);
error_rate=sum(sample_weights(ind~=true_labels));
L=zeros(length(feat),2);
L(ind==1,1)=1;
L(ind==2,2)=1;
function classes=likelihood2class(likelihoods)
% 隶属分配函数
[sample_n,class_n] = size(likelihoods);
maxs = (likelihoods==repmat(max(likelihoods,[],2),[1,class_n]));
classes=zeros(sample_n,1);
for i=1:sample_n
classes(i) = find(maxs(i,:),1);
end