前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >朴素贝叶斯和LDA分类规则产生显著较差的最小值。

朴素贝叶斯和LDA分类规则产生显著较差的最小值。

作者头像
裴来凡
发布2022-05-28 15:38:48
1640
发布2022-05-28 15:38:48
举报

Q1.m

代码语言:javascript
复制
clear all; close all; clc;

n = 2;      % # of dimensions
N = 10000;  % # of samples

% Class mean and covariance
mu(:,1) = [-0.1;0]; Sigma(:,:,1) = [1 -0.9;-0.9 1];
mu(:,2) = [0.1;0];  Sigma(:,:,2) = [1 0.9; 0.9 1];

% Class priors and true labels
p = [0.8,0.2]; 
label = rand(1,N) >= p(1);
Nc = [sum(label==0),sum(label==1)];

% Draw samples from each class pdf
x = zeros(n,N);
x(:,label==0) = mvnrnd(mu(:,1),Sigma(:,:,1),Nc(1))';
x(:,label==1) = mvnrnd(mu(:,2),Sigma(:,:,2),Nc(2))';

% Plot true class labels
figure(1);
plot(x(1,label==0),x(2,label==0),'o',x(1,label==1),x(2,label==1),'+');
title('Class 0 and Class 1 True Class Labels')
xlabel('x_1'), ylabel('x_2')
legend('Class 0','Class 1')

%% ======================== Question 1: Part 1 ======================== %%
% Calculate discriminant scores and tau
discriminantScore = log(evalGaussian(x,mu(:,2),Sigma(:,:,2))./evalGaussian(x,mu(:,1),Sigma(:,:,1)));
tau = log(sort(discriminantScore(discriminantScore >= 0)));

% Find midpoints of tau to use as threshold values
mid_tau = [tau(1)-100 tau(1:end-1) + diff(tau)./2 tau(length(tau))+100];

% Make decision for every threshold and calculate error values
for i = 1:length(mid_tau)
    decision = (discriminantScore >= mid_tau(i));
    pFA(i) = sum(decision==1 & label==0)/Nc(1); % False alarm prob.
    pCD(i) = sum(decision==1 & label==1)/Nc(2); % Correct detection prob.
    pE(i) = pFA(i)*p(1)+(1-pCD(i))*p(2);        % Total error prob.
end

% Find minimum error and corresponding threshold
[min_error,min_index] = min(pE);
min_decision = (discriminantScore >= mid_tau(min_index));
min_FA = pFA(min_index); min_CD = pCD(min_index);

% Find theoretical minimum error(threshold calculated using class priors)
ideal_decision = (discriminantScore >= log(p(1)/p(2)));
ideal_pFA = sum(ideal_decision==1 & label==0)/Nc(1); % False alarm
ideal_pCD = sum(ideal_decision==1 & label==1)/Nc(2); % Correct detection
ideal_error = ideal_pFA*p(1)+(1-ideal_pCD)*p(2);

% Plot ROC curve with minimum error point labeled
figure(2); plot(pFA,pCD,'-',min_FA,min_CD,'o',ideal_pFA,ideal_pCD,'g+');
title('Minimum Expected Risk ROC Curve'); legend('ROC Curve', 'Calculated Min Error', 'Theoretical Min Error');
xlabel('P_{False Alarm}'); ylabel('P_{Correct Detection}');

% Print all results
fprintf('<strong>Theoretical Results</strong>\n');
fprintf('Minimum probability of error: %.2f%%\nThreshold Value: %.2f\n',ideal_error*100,p(1)/p(2));

fprintf('\n<strong>Calculated Results</strong>\n');
fprintf('Minimum probability of error: %.2f%%\nThreshold Value: %.2f\n\n',min_error*100,exp(mid_tau(min_index)));

%% ======================== Question 1: Part 2 ======================== %%
Sigma_NB(:,:,1) = [1 0;0 1]; Sigma_NB(:,:,2) = [1 0;0 1];

% Calculate discriminant scores and tau
discriminantScore_NB = log(evalGaussian(x,mu(:,2),Sigma_NB(:,:,2))./evalGaussian(x,mu(:,1),Sigma_NB(:,:,1)));
tau_NB = log(sort(discriminantScore_NB(discriminantScore_NB >= 0)));

% Find midpoints of tau to use as threshold values
mid_tau_NB = [tau_NB(1)-1 tau_NB(1:end-1) + diff(tau_NB)./2 tau_NB(length(tau_NB))+1];

% Make decision for every threshold and calculate error values
for i = 1:length(mid_tau_NB)
    decision_NB = (discriminantScore_NB >= mid_tau_NB(i));
    pFA_NB(i) = sum(decision_NB==1 & label==0)/Nc(1); % False alarm prob.
    pCD_NB(i) = sum(decision_NB==1 & label==1)/Nc(2); % Correct detection prob.
    pE_NB(i) = pFA_NB(i)*p(1)+(1-pCD_NB(i))*p(2);        % Total error prob.
end

% Find minimum error and corresponding threshold
[min_error_NB,min_index_NB] = min(pE_NB);
min_decision_NB = (discriminantScore >= mid_tau_NB(min_index_NB));
min_FA_NB = pFA_NB(min_index_NB); min_CD_NB = pCD_NB(min_index_NB);

% Plot ROC curve with minimum error point labeled
figure(4); plot(pFA_NB,pCD_NB,'-',min_FA_NB,min_CD_NB,'o');
title('Naive-Bayesian ROC Curve'); legend('NB ROC Curve', 'NB Minimum Error');
xlabel('P_{False Alarm}'); ylabel('P_{Correct Detection}');

fprintf('\n<strong>Calculated Results, Naive-Bayesian</strong>\n');
fprintf('Minimum probability of error: %.2f%%\nThreshold Value: %.2f\n\n',min_error_NB*100,exp(mid_tau_NB(min_index_NB)));

%% ======================== Question 1: Part 3 ======================== %%
% Code help and example from Prof. Deniz
% LDA setup
Sb = (mu(:,1)-mu(:,2))*(mu(:,1)-mu(:,2))';
Sw = Sigma(:,:,1) + Sigma(:,:,2);

% Calculating Fisher LDA projection vector
[V,D] = eig(inv(Sw)*Sb); % alpha w  = inv(Sw) Sb w
[~,ind] = sort(diag(D),'descend');
wLDA = V(:,ind(1));     % Fisher LDA projection vector
yLDA = wLDA'*x;         % All data projected on to the line spanned by wLDA
wLDA = sign(mean(yLDA(find(label==1)))-mean(yLDA(find(label==0))))*wLDA; % ensures class 1 falls on the + side of the axis
yLDA = sign(mean(yLDA(find(label==1)))-mean(yLDA(find(label==0))))*yLDA; % flip yLDA accordingly

% Plot LDA projection
figure(5);
plot(yLDA(find(label==0)),zeros(1,Nc(1)),'o',yLDA(find(label==1)),zeros(1,Nc(2)),'+');
title('LDA projection of data points and their true labels');
xlabel('x_1'); ylabel('x_2'); legend('Class 0','Class 1');

% Sort LDA projection vector and find midpoints
sorted_yLDA = sort(yLDA);
mid_tau_LDA = [sorted_yLDA(1)-1 sorted_yLDA(1:end-1)+diff(sorted_yLDA)./2 sorted_yLDA(length(sorted_yLDA))+1];

% Make decision for every threshold value and find error probabilities
for i = 1:length(mid_tau_LDA)-1
    decisionLDA = (yLDA >= mid_tau_LDA(i));
    pFA_LDA(i) = sum(decisionLDA==1 & label==0)/Nc(1); % False alarm
    pCD_LDA(i) = sum(decisionLDA==1 & label==1)/Nc(2); % Correct detection
    pE_LDA(i) = pFA_LDA(i)*p(1)+(1-pCD_LDA(i))*p(2);
end

% Find minimum error and corresponding threshold
[min_error_LDA,min_index_LDA] = min(pE_LDA);
min_decision_LDA = (yLDA >= mid_tau_LDA(min_index_LDA));
min_FA_LDA = pFA_LDA(min_index_LDA); min_CD_LDA = pCD_LDA(min_index_LDA);

% Plot LDA ROC Curve
figure(6); plot(pFA_LDA,pCD_LDA,'-',min_FA_LDA,min_CD_LDA,'o');
title('Fisher LDA ROC Curve'); legend('LDA ROC Curve', 'LDA Minimum Error');
xlabel('P(False Alarm)'); ylabel('P(Correct Detection)');

%Print Results
fprintf('\n<strong>Calculated Results, LDA</strong>\n');
fprintf('Minimum probability of error: %.2f%%\nThreshold Value (tau): %.2f\n\n',min_error_LDA*100,mid_tau_LDA(min_index_LDA));

%% ======================= Question 1: Functions ====================== %%
% Function credit: Prof. Deniz
function g = evalGaussian(x,mu,Sigma)
% Evaluates the Gaussian pdf N(mu,Sigma) at each coumn of X
[n,N] = size(x);

C = ((2*pi)^n * det(Sigma))^(-1/2); % coefficient
E = -0.5*sum((x-repmat(mu,1,N)).*(inv(Sigma)*(x-repmat(mu,1,N))),1); % exponent
g = C*exp(E); % final gaussian evaluation

end
本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2020-05-31,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 图像处理与模式识别研究所 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档