8.2 某医院研究心电图指标对健康人(1),硬化病患者(2)和冠心病患者(3)的鉴别能力,先训练样本如下。试用距离判别(考虑方差相同与方差不同两种情况)、Bayes判别(考虑方差同和不同两种情况,且先验概率为11/23,7/23,5/23)对数据进行分析。
原始数据 data.txt如下, 3,4,5,6列为心电图指标,2列为类别
1 1 8.11 261.01 13.23 7.362 1 9.36 185.39 9.02 5.993 1 9.85 249.58 15.61 6.114 1 2.55 137.13 9.21 4.355 1 6.01 231.34 14.27 8.796 1 9.64 231.38 13.03 8.537 1 4.11 260.25 14.72 10.028 1 8.90 259.91 14.16 9.799 1 7.71 273.84 16.01 8.7910 1 7.51 303.59 19.14 8.5311 1 8.06 231.03 14.41 6.1512 2 6.80 308.90 15.11 8.4913 2 8.68 258.69 14.02 7.1614 2 5.67 355.54 15.03 9.4317 2 3.71 316.32 17.12 8.1717 2 5.37 274.57 16.75 9.6718 2 9.89 409.42 19.47 10.4919 3 5.22 330.34 18.19 9.6120 3 4.71 331.47 21.26 13.7221 3 4.71 352.50 20.79 11.0022 3 3.36 347.31 17.90 11.1923 3 8.27 189.56 12.74 6.94
函数distinguish.distance.R
distinguish.distance<-function (TrnX, TrnG, TstX = NULL, var.equal = FALSE){ if ( is.factor(TrnG) == FALSE){ mx<-nrow(TrnX); mg<-nrow(TrnG) TrnX<-rbind(TrnX, TrnG) TrnG<-factor(rep(1:2, c(mx, mg))) } if (is.null(TstX) == TRUE) TstX<-TrnX if (is.vector(TstX) == TRUE) TstX<-t(as.matrix(TstX)) else if (is.matrix(TstX) != TRUE) TstX<-as.matrix(TstX) if (is.matrix(TrnX) != TRUE) TrnX<-as.matrix(TrnX) nx<-nrow(TstX) blong<-matrix(rep(0, nx), nrow=1, dimnames=list("blong", 1:nx)) g<-length(levels(TrnG)) mu<-matrix(0, nrow=g, ncol=ncol(TrnX)) for (i in 1:g) mu[i,]<-colMeans(TrnX[TrnG==i,]) D<-matrix(0, nrow=g, ncol=nx) if (var.equal == TRUE || var.equal == T){ for (i in 1:g) D[i,]<- mahalanobis(TstX, mu[i,], var(TrnX)) } else{ for (i in 1:g) D[i,]<- mahalanobis(TstX, mu[i,], var(TrnX[TrnG==i,])) } for (j in 1:nx){ dmin<-Inf for (i in 1:g) if (D[i,j]
函数distinguish.bayes.R
distinguish.bayes<-function (TrnX, TrnG, p=rep(1, length(levels(TrnG))), TstX = NULL, var.equal = FALSE){ if ( is.factor(TrnG) == FALSE){ mx<-nrow(TrnX); mg<-nrow(TrnG) TrnX<-rbind(TrnX, TrnG) TrnG<-factor(rep(1:2, c(mx, mg))) } if (is.null(TstX) == TRUE) TstX<-TrnX if (is.vector(TstX) == TRUE) TstX<-t(as.matrix(TstX)) else if (is.matrix(TstX) != TRUE) TstX<-as.matrix(TstX) if (is.matrix(TrnX) != TRUE) TrnX<-as.matrix(TrnX) nx<-nrow(TstX) blong<-matrix(rep(0, nx), nrow=1, dimnames=list("blong", 1:nx)) g<-length(levels(TrnG)) mu<-matrix(0, nrow=g, ncol=ncol(TrnX)) for (i in 1:g) mu[i,]<-colMeans(TrnX[TrnG==i,]) D<-matrix(0, nrow=g, ncol=nx) if (var.equal == TRUE || var.equal == T){ for (i in 1:g){ d2 <- mahalanobis(TstX, mu[i,], var(TrnX)) D[i,] <- d2 - 2*log(p[i]) } } else{ for (i in 1:g){ S<-var(TrnX[TrnG==i,]) d2 <- mahalanobis(TstX, mu[i,], S) D[i,] <- d2 - 2*log(p[i])-log(det(S)) } } for (j in 1:nx){ dmin<-Inf for (i in 1:g) if (D[i,j]
运行脚本
data <- read.table("data.txt");X <- data[,3:6];G <- factor(data[,2]);source("distinguish.distance.R");distinguish.distance(X,G);# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22#blong 2 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 2 2 3 3 3 3distinguish.distance(X,G,var.equal=TRUE);# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22#blong 1 1 1 1 1 1 3 1 1 3 1 2 1 2 2 3 2 2 3 3 3 1source("distinguish.bayes.R");distinguish.bayes(X,G, p=c(rep(11/23,11),rep(7/23,7),rep(5/23,5)));# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22#blong 2 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 2 2 3 3 3 3distinguish.bayes(X,G, p=c(rep(11/23,11),rep(7/23,7),rep(5/23,5)),var.equal=TRUE);# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22#blong 1 1 1 1 1 1 3 1 1 3 1 2 1 2 2 3 2 2 3 3 3 1
博文源代码和习题均来自于教材《统计建模与R软件》(ISBN:9787302143666,作者:薛毅)。