关注微信公共号:小程在线
关注CSDN博客:程志伟的博客
R版本:3.6.1
nnet包:用于创建人工神经网络
nnet函数:用于人工神经网络的建立
> setwd('G:\\R语言\\大三下半年\\数据挖掘:R语言实战\\')
> library(nnet)
> wine=read.csv("G:\\R语言\\大三下半年\\数据挖掘:R语言实战\\数据挖掘:R语言实战(案例数据集)\\11 随机森林\\winequality-white.csv",header=T,sep = ";",na.strings="null")
> cha=0#使用随机森林中已标定品质的白酒数据
> for(i in 1:4898)
+ {
+ if(wine[i,12]>6)cha[i]="good"
+ else if(wine[i,12]>5)cha[i]="mid"
+ else cha[i]="bad"
+ }
> wine[,12]=factor(cha)
> scale01=function(x)
+ {
+ ncol=dim(x)[2]-1#提取特征变量个数
+ nrow=dim(x)[1]#提取样本集中的样本总量
+ new=matrix(0,nrow,ncol)#建立用于保存新样本集的矩阵
+ for(i in 1:ncol)
+ {
+ max=max(x[,i])#提取每个变量的最大值
+ min=min(x[,i])#提取每个变量的最小值
+ for (j in 1:nrow)
+ {
+ new[j,i]=(x[j,i]-min)/(max-min)#计算归一化后的新数据集
+ }
+ }
+ new
+ }
#第一种格式
> set.seed(71)
> samp=sample(1:4898,3000)
> wine[samp,1:11]=scale01(wine[samp,])#对样本进行预处理
> r=1/max(abs(wine[samp,1:11]))#确定参数rang的变化范围
> set.seed(101)
> model1=nnet(quality~.,data=wine,subest=samp,size=4,rang=r,decay=5e-4,maxit=200)#建立神经网络模型
# weights: 63
initial value 6001.624546
iter 10 value 5168.481875
iter 20 value 4934.585211
iter 30 value 4729.276435
iter 40 value 4632.644324
iter 50 value 4596.906940
iter 60 value 4570.845076
iter 70 value 4566.570580
iter 80 value 4559.148409
iter 90 value 4552.246633
iter 100 value 4539.121277
iter 110 value 4534.336144
iter 120 value 4526.449740
iter 130 value 4521.577576
iter 140 value 4519.922823
iter 150 value 4519.282343
iter 160 value 4518.852588
iter 170 value 4518.697570
iter 180 value 4518.548699
iter 190 value 4518.451290
iter 200 value 4518.440725
final value 4518.440725
stopped after 200 iterations
可以看见输入层11个节点,隐藏层4个,输出层3个
> summary(model1)
a 11-4-3 network with 63 weights
options were - softmax modelling decay=5e-04
b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1 i8->h1 i9->h1
-1.82 4.37 -2.43 1.06 5.20 -17.17 6.48 -5.65 -8.39 5.94
i10->h1 i11->h1
1.63 5.70
b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2 i8->h2 i9->h2
-2.02 -1.94 19.59 5.79 3.82 -0.97 5.82 -5.50 -10.84 5.94
i10->h2 i11->h2
0.85 10.65
b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3 i8->h3 i9->h3
1.70 1.31 21.79 -6.81 3.07 -61.68 -0.81 -1.61 -1.48 -4.02
i10->h3 i11->h3
-6.56 47.53
b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4 i8->h4 i9->h4
-1.30 -0.07 -1.15 0.39 1.49 0.81 0.37 -0.38 -2.23 0.30
i10->h4 i11->h4
0.41 0.06
b->o1 h1->o1 h2->o1 h3->o1 h4->o1
0.40 -2.53 4.29 -0.26 -15.51
b->o2 h1->o2 h2->o2 h3->o2 h4->o2
0.63 3.66 -3.92 -0.89 7.35
b->o3 h1->o3 h2->o3 h3->o3 h4->o3
-0.84 -1.22 -0.32 1.26 8.24
#第二种格式
> #提取wine数据集中除quality列以外的数据作为自变量
> x=subset(wine,select=-quality)
> y=wine[,12]#提取quality列数据作为响应变量
> y=class.ind(y)#预处理将其变为类指标矩阵
> set.seed(101)
> model2=nnet(x,y,decay=5e-4,maxit=200,size=4,rang=r)#建立神经网络模型
# weights: 63
initial value 4858.952916
iter 10 value 3341.111657
iter 20 value 3238.187532
iter 30 value 3107.618277
iter 40 value 2918.127297
iter 50 value 2868.865579
iter 60 value 2826.783720
iter 70 value 2821.629983
iter 80 value 2800.518382
iter 90 value 2773.896438
iter 100 value 2686.748771
iter 110 value 2642.000748
iter 120 value 2625.476330
iter 130 value 2619.429244
iter 140 value 2617.954127
iter 150 value 2617.256355
iter 160 value 2617.101508
iter 170 value 2617.073484
iter 180 value 2617.065439
iter 190 value 2617.062616
iter 200 value 2617.061575
final value 2617.061575
stopped after 200 iterations
#预测判别 #第一种建模方式建立的模型
> x=wine[,1:11]
> pred=predict(model1,x,type="class")#根据模型对x数据进行预测
> set.seed(110)
> pred[sample(1:4898,8)]
[1] "mid" "mid" "mid" "mid" "good" "mid" "mid" "mid"
#第二种建模方式建立的模型
> xt=wine[,1:11]
> pred=predict(model2,xt)#根据模型对xt数据进行预测
> dim(pred)
[1] 4898 3
> pred[sample(1:4898,4),]
bad good mid
[1,] 0.1168874 0.4144363 0.5504798
[2,] 0.1848776 0.2757557 0.4987190
[3,] 0.2290765 0.1872659 0.5602673
[4,] 0.1504286 0.3361246 0.5225780
> name=c("bad","good","mid")
> prednew=max.col(pred)#确定每行中最大值所在的列
> prednewn=name[prednew]#根据预测结果将其变为相对应的类别名称
> set.seed(201)
> prednewn[sample(1:4898,8)]
[1] "bad" "mid" "mid" "mid" "mid" "mid" "mid" "mid"
> true=max.col(y)#确定真实值的每行中最大值所在的列
> table(true,prednewn)#模型预测精度展示
prednewn
true bad good mid
1 984 23 633
2 47 241 772
3 447 156 1595
#优化模型
#size=i控制隐藏层节点
> set.seed(444)
> nrow.wine=dim(wine)[1]
> samp=sample(1:nrow.wine,nrow.wine*0.7)#抽取70%样本
> wine[samp,1:11]=scale01(wine[samp,])#对数据样本进行预处理
> wine[-samp,1:11]=scale01(wine[-samp,])#对测试集进行预处理
> r=1/max(abs(wine[samp,1:11]))#确定rang的变化范围
> n=length(samp)
> err1=0
> err2=0
> for(i in 1:17)
+ {
+ set.seed(111)
+ model=nnet(quality~.,data=wine,maxit=400,rang=r,size=i,subset=samp,decay=5e-4)
+ err1[i]=sum(predict(model,wine[samp,1:11],type='class')!=wine[samp,12])/n
+ err2[i]=sum(predict(model,wine[-samp,1:11],type='class')!=wine[-samp,12])/(nrow.wine-n)
+ } #运行时间较长,
# weights: 18
initial value 3848.796097
iter 10 value 3592.218560
iter 20 value 3464.981999
iter 30 value 3232.023296
iter 40 value 3179.179326
iter 50 value 3174.538937
iter 60 value 3173.128627
iter 70 value 3172.645809
iter 80 value 3172.308503
iter 90 value 3172.247426
iter 100 value 3172.221023
iter 110 value 3172.212552
final value 3172.207548
......
# weights: 258
initial value 5436.735837
iter 10 value 3590.032318
iter 20 value 3225.945094
iter 30 value 3088.217706
iter 40 value 3038.557713
iter 50 value 2985.256714
iter 60 value 2940.666433
iter 70 value 2887.612636
iter 80 value 2856.806967
iter 90 value 2826.222811
iter 100 value 2802.668013
iter 110 value 2781.458154
iter 120 value 2761.984928
iter 130 value 2738.214931
iter 140 value 2718.955795
iter 150 value 2700.324932
iter 160 value 2688.401961
iter 170 value 2680.835201
iter 180 value 2673.519743
iter 190 value 2664.845414
iter 200 value 2651.361108
iter 210 value 2640.714784
iter 220 value 2632.382846
iter 230 value 2625.034649
iter 240 value 2619.381170
iter 250 value 2615.577511
iter 260 value 2612.080917
iter 270 value 2607.951789
iter 280 value 2603.371910
iter 290 value 2600.418487
iter 300 value 2598.280348
iter 310 value 2597.287472
iter 320 value 2596.475492
iter 330 value 2595.666123
iter 340 value 2594.955022
iter 350 value 2594.334263
iter 360 value 2593.757000
iter 370 value 2592.964251
iter 380 value 2591.852259
iter 390 value 2590.050488
iter 400 value 2588.657908
final value 2588.657908
stopped after 400 iterations
plot(1:17,err1,'l',col=1,lty=1,ylab="模型误判率",xlab="隐藏层节点个",ylim=c(min(min(err1),min(err2)),max(max(err1),max(err2))))
lines(1:17,err2,col=1,lty=3)
points(1:17,err1,col=1,pch="+")
points(1:17,err2,col=1,pch="o")
legend(1,0.53,"测试集误判率",bty="n",cex=1.5)
legend(1,0.35,"训练集误判率",bty="n",cex=1.5)
# maxit:控制的是 模型的最大迭代次数
err11=0
err12=0
for(i in 1:500)
{
set.seed(111)
model=nnet(quality~.,data=wine,maxit=i,rang=r,size=8,subset=samp)
err11[i]=sum(predict(model,wine[samp,1:11],type='class')!=wine[samp,12])/n
err12[i]=sum(predict(model,wine[-samp,1:11],type='class')!=wine[-samp,12])/(nrow.wine-n)
}
plot(1:length(err11),err11,'l',ylab="模型误判率",xlab="训练周期",col=1,ylim=c(min(min(err11),min(err12)),max(max(err11),max(err12))))
lines(1:length(err11),err12,col=1,lty=3)
legend(250,0.50,"测试集误判率",bty="n",cex=1.2)
legend(250,0.45,"训练集误判率",bty="n",cex=1.2)
# 取迭代次数为300,隐藏节点为8
> set.seed(111)
> model=nnet(quality~.,data=wine,maxit=300,rang=r,size=8,subset=samp)
# weights: 48
initial value 4059.022325
iter 10 value 3538.182264
iter 20 value 3297.883481
iter 30 value 3129.523044
iter 40 value 3063.555895
iter 50 value 3031.585674
iter 60 value 2976.836958
iter 70 value 2962.092104
iter 80 value 2955.232556
iter 90 value 2945.811314
iter 100 value 2941.167645
iter 110 value 2937.195896
iter 120 value 2931.673119
iter 130 value 2929.899507
iter 140 value 2927.420533
iter 150 value 2924.502215
iter 160 value 2922.749660
iter 170 value 2918.661721
iter 180 value 2914.274935
iter 190 value 2913.275232
iter 200 value 2911.394127
iter 210 value 2909.892828
iter 220 value 2908.420454
iter 230 value 2907.853183
iter 240 value 2907.620816
iter 250 value 2905.127945
iter 260 value 2902.842029
iter 270 value 2899.456613
iter 280 value 2897.801236
iter 290 value 2897.711678
iter 300 value 2897.464632
final value 2897.464632
stopped after 300 iterations
> x=wine[-samp,1:11]
> pred=predict(model,x,type="class")
> table(wine[-samp,12],pred)
pred
bad good mid
bad 264 18 226
good 30 120 174
mid 164 111 363
|
请发表评论