期末 zy
##第一大题##
set.seed(17200115)
ttest<-function(n1,mean1,sd1,alpha)
{
count<-0
for(i in 1:1000)
{
d1<-rnorm(n1,mean=mean1,sd=sd1)
xb<-mean(d1)
tmp<-sd1/sqrt(n1)*qnorm(1-alpha/2);df<-n1
a<-xb-tmp
b<-xb+tmp
if(a>mean1)
{
count<-count+1
}
if(b<mean1)
{
count<-count+1
}
}
return(count)
}
ttest(10,0,1,0.9)
ttest(20,0,1,0.9)
ttest(50,0,1,0.9)
ttest(10,0,1,0.95)
ttest(20,0,1,0.95)
ttest(50,0,1,0.95)
ttest(10,0,1,0.99)
ttest(20,0,1,0.99)
ttest(50,0,1,0.99)
ttest(10,0,10,0.9)
ttest(20,0,10,0.9)
ttest(50,0,10,0.9)
ttest(10,0,10,0.95)
ttest(20,0,10,0.95)
ttest(50,0,10,0.95)
ttest(10,0,10,0.99)
ttest(20,0,10,0.99)
ttest(50,0,10,0.99)
##第二大题##
##1##
#根据不同的Species种类,分别使用均数±标准差描述不同种类植物Sepal.Length和Sepal.Width两个属性,
#使用中位数(四分位数间距)描述不同种类植物Petal.Length和Petal.Width两个属性
iris <- datasets::iris
iris1<-iris[which(iris$Species=='setosa'),]
iris2<-iris[which(iris$Species=='versicolor'),]
iris3<-iris[which(iris$Species=='virginica'),]
paste(round(mean(iris1$Sepal.Length),2),"±",round(sd(iris1$Sepal.Length),2))
paste(round(mean(iris2$Sepal.Length),2),"±",round(sd(iris2$Sepal.Length),2))
paste(round(mean(iris3$Sepal.Length),2),"±",round(sd(iris3$Sepal.Length),2))
paste(round(mean(iris1$Sepal.Width),2),"±",round(sd(iris1$Sepal.Width),2))
paste(round(mean(iris2$Sepal.Width),2),"±",round(sd(iris2$Sepal.Width),2))
paste(round(mean(iris3$Sepal.Width),2),"±",round(sd(iris3$Sepal.Width),2))
a<-quantile(iris1$Sepal.Length,0.25)
b<-quantile(iris1$Sepal.Length,0.75)
paste(median(iris1$Sepal.Length),"(",a[[1]],"-",b[[1]],")")#?????????????????
a<-quantile(iris2$Sepal.Length,0.25)
b<-quantile(iris2$Sepal.Length,0.75)
paste(median(iris2$Sepal.Length),"(",a[[1]],"-",b[[1]],")")
a<-quantile(iris3$Sepal.Length,0.25)
b<-quantile(iris3$Sepal.Length,0.75)
paste(median(iris3$Sepal.Length),"(",a[[1]],"-",b[[1]],")")
a<-quantile(iris1$Sepal.Width,0.25)
b<-quantile(iris1$Sepal.Width,0.75)
paste(median(iris1$Sepal.Width),"(",a[[1]],"-",b[[1]],")")
a<-quantile(iris2$Sepal.Width,0.25)
b<-quantile(iris2$Sepal.Width,0.75)
paste(median(iris2$Sepal.Width),"(",a[[1]],"-",b[[1]],")")
a<-quantile(iris3$Sepal.Width,0.25)
b<-quantile(iris3$Sepal.Width,0.75)
paste(median(iris3$Sepal.Width),"(",a[[1]],"-",b[[1]],")")
##2##计算植物属性的4个变量间的Pearson相关系数矩阵和相关系数假设检验的P值矩阵,无需分组
install.packages("psych")
library(psych)
data1<-iris[,c(1,2,3,4)]
data1
a<-cor(data1)
corr.test(data1)
##3##请自编变量转换函数,要求实现如下功能:
#将定量数据根据固定界值划分为定性数据, [0~2]定义为1,(2~4]定义为2,(4~6]定义为3, (6~8]定义为4。
#将编写的函数填写在试卷上并对Petal.Length变量做变换,对变换后的变量做频数统计
zhang<-function(a){
for(i in 1:length(a)){ #i作为指针进行循环 下面也用到了
if(a[i]>=0 & a[i]<=2){
a[i]=1
}else if(a[i]>2 & a[i]<=4){
a[i]=2
}else if(a[i]>4 & a[i]<=6){
a[i]=3
}else{
a[i]=4
}
}
return(a)
}
zhang(iris$Petal.Length)
freq<-table(zhang(iris$Petal.Length))#table 统计频数
freq
##4##绘制Sepal.Length在不同Species种类下的分布的箱式图和散点图,
#要求:1)在一张图中绘制,散点图直接绘制在箱式图上;
#2)箱式图不同种类填充色不同,并添加图例说明不同颜色与Species种类对应关系;
#3)散点为蓝色,且排列方式为随机扰动,而不是一串排列 ?????????????????如果不是随机扰动呢?????
boxplot(iris$Sepal.Length~iris$Species,col=c(2,3,5),xlab="x",ylab="y")
par(new=TRUE)
plot(iris$Sepal.Length,col="blue",xlab="x",ylab="y")
legend("topright", #图例位置为右上角
legend=c("setosa","versicolor","virginica"), #图例内容
col=c(2,3,5), #图例颜色
lty=1,lwd=2) #图例大小
##5##使用三种不同的分类算法,??????????????????????????????????????????
#基于植物属性的4个变量数据将其分为3类,
#并展示分类结果与真实标签比较,使用准确率指标进行评价。分类算法不限
#加载数据
iris <- datasets::iris
iris2 <- iris[,-5]#????????????????????????????????????????????
species_labels <- iris[,5]
library(colorspace) # 颜色包
species_col <- rev(rainbow_hcl(3))[as.numeric(species_labels)]
#绘制 SPLOM:
pairs(iris2, col = species_col,
lower.panel = NULL,
cex.labels=2, pch=19, cex = 1.2)
# 添加图例
par(xpd = TRUE)
legend(x = 0.05, y = 0.4, cex = 2,
legend = as.character(levels(species_labels)),
fill = unique(species_col))
par(xpd = NA)
par(las = 1, mar = c(4.5, 3, 3, 2) + 0.1, cex = .8)
MASS::parcoord(iris2, col = species_col, var.label = TRUE, lwd = 2)
# 添加标题
title("Parallel coordinates plot of the Iris data")
# 添加图例
par(xpd = TRUE)
legend(x = 1.75, y = -.25, cex = 1,
legend = as.character(levels(species_labels)),
fill = unique(species_col), horiz = TRUE)
data1<-iris[,c(1,2,3,4)]
data1
kmeans(data1)