function a4_00000000
% Function for CISC271, Winter 2022, Assignment #4
% Read the test data from a CSV file
Copyright By PowCoder代写 加微信 powcoder
dmrisk = csvread(‘dmrisk.csv’,1,0);
% Columns for the data and labels; DM is diabetes, OB is obesity
% Extract the data matrices and labels
XDM = dmrisk(:, (1:size(dmrisk,2))~=jDM);
yDM = dmrisk(:,jDM);
XOB = dmrisk(:, (1:size(dmrisk,2))~=jOB);
yOB = dmrisk(:,jOB);
% Reduce the dimensionality to 2D using PCA
[~,rDM] = pca(zscore(XDM), ‘NumComponents’, 2);
[~,rOB] = pca(zscore(XOB), ‘NumComponents’, 2);
% Find the LDA vectors and scores for each data set
[qDM zDM qOB zOB] = a4q1(rDM, yDM, rOB, yOB);
% % STUDENT CODE GOES HERE: PLOT RELEVANT DATA
% Compute the ROC curve and its AUC where: “xroc” is the horizontal
% axis of false positive rates; “yroc” is the vertical
% axis of true positive rates; “auc” is the area under curve
% % STUDENT CODE GOES HERE: COMPUTE, PLOT, DISPLAY RELEVANT DATA
% END OF FUNCTION
function [q1, z1, q2, z2] = a4q1(Xmat1, yvec1, Xmat2, yvec2)
% [Q1 Z1 Q2 Z2]=A4Q1(X1,Y1,X2,Y2) computes an LDA axis and a
% score vector for X1 with Y1, and for X2 with Y2.
% X1 – MxN data, M observations of N variables
% Y1 – Mx1 labels, +/- computed as ==/~= 1
% X2 – MxN data, M observations of N variables
% Y2 – Mx1 labels, +/- computed as ==/~= 1
% OUTPUTS:
% Q1 – Nx1 vector, LDA axis of data set #1
% Z1 – Mx1 vector, scores of data set #1
% Q2 – Nx1 vector, LDA axis of data set #2
% Z2 – Mx1 vector, scores of data set #2
% Compute the LDA axis for each data set
q1 = lda2class(Xmat1(yvec1==1,:), Xmat1(yvec1~=1, :));
q2 = lda2class(Xmat2(yvec2==1,:), Xmat2(yvec2~=1, :));
% % STUDENT CODE GOES HERE: COMPUTE SCORES USING LDA AXES
% END OF FUNCTION
function qvec = lda2class(X1, X2)
% QVEC=LDA2(X1,X2) finds Fisher’s linear discriminant axis QVEC
% for data in X1 and X2. The data are assumed to be sufficiently
% independent that the within-label scatter matrix is full rank.
% X1 – M1xN data with M1 observations of N variables
% X2 – M2xN data with M2 observations of N variables
% OUTPUTS:
% qvec – Nx1 unit direction of maximum separation
qvec = ones(size(X1,2), 1);
xbar1 = mean(X1);
xbar2 = mean(X2);
% Compute the within-class means and scatter matrices
% % STUDENT CODE GOES HERE: COMPUTE S1, S2, Sw
% Compute the between-class scatter matrix
% % STUDENT CODE GOES HERE: COMPUTE Sb
% Fisher’s linear discriminant is the largest eigenvector
% of the Rayleigh quotient
% % STUDENT CODE GOES HERE: COMPUTE qvec
% May need to correct the sign of qvec to point towards mean of X1
if (xbar1 – xbar2)*qvec < 0
qvec = -qvec;
% END OF FUNCTION
function [fpr tpr auc bopt] = roccurve(yvec_in,zvec_in)
% [FPR TPR AUC BOPT]=ROCCURVE(YVEC,ZVEC) computes the
% ROC curve and related values for labels YVEC and scores ZVEC.
% Unique scores are used as thresholds for binary classification.
% YVEC - Mx1 labels, +/- computed as ==/~= 1
% ZVEC - Mx1 scores, real numbers
% OUTPUTS:
% FPR - Kx1 vector of False Positive Rate values
% TPR - Kx1 vector of True Positive Rate values
% AUC - scalar, Area Under Curve of ROC determined by TPR and FPR
% BOPT - scalar, optimal threshold for accuracy
% Sort the scores and permute the labels accordingly
[zvec zndx] = sort(zvec_in);
yvec = yvec_in(zndx);
% Sort and find a unique subset of the scores; problem size
bvec = unique(zvec);
bm = numel(bvec);
% Compute a confusion matrix for each unique threshold value;
% extract normalized entries into TPR and FPR vectors; track
% the accuracy and optimal B threshold
acc = -inf;
bopt = -inf;
for jx = 1:bm
% % STUDENT CODE GOES HERE: FIND TPR, FPR, OPTIMAL THRESHOLD
% Ensure that the rates, from these scores, will plot correctly
tpr = sort(tpr);
fpr = sort(fpr);
% Compute AUC for this ROC
auc = aucofroc(fpr, tpr);
function cmat = confmat(yvec, zvec, theta)
% CMAT=CONFMAT(YVEC,ZVEC,THETA) finds the confusion matrix CMAT for labels
% YVEC from scores ZVEC and a threshold THETA. YVEC is assumed to be +1/-1
% and each entry of ZVEC is scored as -1 if
% Compute the confusion matrix by entries
% % STUDENT CODE GOES HERE: COMPUTE MATRIX
function auc = aucofroc(fpr, tpr)
% AUC=AUCOFROC(TPR,FPR) finds the Area Under Curve of the
% ROC curve specified by the TPR, True Positive Rate, and
% the FPR, False Positive Rate.
% TPR – Kx1 vector, rate for underlying score threshold
% FPR – Kx1 vector, rate for underlying score threshold
% OUTPUTS:
% AUC – integral, from Trapezoidal Rule on [0,0] to [1,1]
[X undx] = sort(reshape(fpr, 1, numel(fpr)));
Y = sort(reshape(tpr(undx), 1, numel(undx)));
auc = abs(trapz([0 X 1] , [0 Y 1]));
程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com