I'm thinking this is a coding error on my part, but I can't figure out what I'm doing wrong. I'm trying to create marginal effects plots for x when x+x^2 are in a binomial glmnet model. When I made predictions using predict.cv.glmnet() on a new dataset it almost seemed as if the the signs of the model coefficients were switched. So I've been comparing predictions when hardcoding the model formula vs. the predict function. I'm getting different predictions on the new data but not the training data and I'm not sure why.
#############example############
library(tidyverse)
library(glmnet)
data(BinomialExample)
#head(x)
#head(y)
#training data
squareit<-function(x){x^2}
x%>%
as.data.frame()%>%
dplyr::select(V11, V23, V30)%>%#select three variables
mutate_all(list(SQ = squareit))%>%#square them
as.matrix()%>%{.->>x1}#back to matrix
# x1[1:5,]
# V11 V23 V30 V11_SQ V23_SQ V30_SQ
# [1,] 0.5706039 0.01473546 0.9080937 0.3255888 0.0002171337 0.8246342
# [2,] 0.4138668 -0.81898245 -2.0492817 0.1712857 0.6707322586 4.1995554
# [3,] -0.4876853 0.03927982 -1.2089006 0.2378369 0.0015429039 1.4614407
# [4,] 0.4644753 -0.98728378 1.7796744 0.2157373 0.9747292699 3.1672408
# [5,] -0.5239595 -0.13288456 -1.1970731 0.2745336 0.0176583076 1.4329841
#example model + coefficients
set.seed(4484)
final.glmnet<-cv.glmnet(x1, y, type.measure="auc",alpha=1,family="binomial")
coef(final.glmnet,s="lambda.min")#print model coefficients
# (Intercept) 0.4097381
# V11 -0.4487949
# V23 0.3322128
# V30 -0.2924600
# V11_SQ -0.3334376
# V23_SQ -0.2867963
# V30_SQ 0.3864748
#get model formula - I'm sure there is a better way
mc<-data.frame(as.matrix(coef(final.glmnet,s="lambda.min")))%>%
rownames_to_column(.,var="rowname")%>%mutate(rowname=recode(rowname,`(Intercept)`="1"))
paste("(",mc$rowname,"*",mc$X1,")",sep="",collapse = "+")
#model formula
# (1*0.409738094215867)+(V11*-0.44879489356345)+(V23*0.332212780157358)+
# (V30*-0.292459974168585)+(V11_SQ*-0.333437624504966)+
# (V23_SQ*-0.286796292157334)+(V30_SQ*0.386474813542322)
#####predict on training data -- same predictions!
#1 predict using predict.cv.glmnet()
pred_cv.glmnet<-predict(final.glmnet, s=final.glmnet$lambda.min, newx=x1[1:5,], type='link')
round(pred_cv.glmnet,3)
#2 predict using model formula
pred.model.formula<-data.frame(x1[1:5,])%>%
mutate(link=(1*0.409738094215867)+(V11*-0.44879489356345)+
(V23*0.332212780157358)+(V30*-0.292459974168585)+
(V11_SQ*-0.333437624504966)+(V23_SQ*-0.286796292157334)+
(V30_SQ*0.386474813542322))%>%
dplyr::select(link)
round(pred.model.formula,3)
# 1 0.103
# 2 1.925
# 3 1.480
# 4 0.225
# 5 1.408
#new data - set up for maringal effects plot
colnames(x1)
test<-data.frame(V23=seq(min(x1[,2]),max(x1[,2]),0.1))%>%#let V23 vary from min-max by .1
mutate(V23_SQ=V23^2, V11=0, V11_SQ=0, V30=0, V30_SQ=0)%>%#square V23 and set others to 0
as.matrix()
test[1:5,]
# > test[1:5,]
# V23 V23_SQ V11 V11_SQ V30 V30_SQ
# [1,] -2.071573 4.291414 0 0 0 0
# [2,] -1.971573 3.887100 0 0 0 0
# [3,] -1.871573 3.502785 0 0 0 0
# [4,] -1.771573 3.138470 0 0 0 0
# [5,] -1.671573 2.794156 0 0 0 0
#####predict on new data -- different predictions!
#1 predict using predict.cv.glmnet()
pred_cv.glmnet<-predict(final.glmnet, s=final.glmnet$lambda.min, newx=test[1:5,], type='link')
round(pred_cv.glmnet,3)
# [1,] 2.765
# [2,] 2.586
# [3,] 2.413
# [4,] 2.247
# [5,] 2.088
#2 predict using model formula
pred.model.formula<-data.frame(test[1:5,])%>%
mutate(link.longhand=(1*0.409738094215867)+(V11*-0.44879489356345)+
(V23*0.332212780157358)+(V30*-0.292459974168585)+
(V11_SQ*-0.333437624504966)+(V23_SQ*-0.286796292157334)+
(V30_SQ*0.386474813542322))%>%
dplyr::select(link.longhand)
round(pred.model.formula,3)
# 1 -1.509
# 2 -1.360
# 3 -1.217
# 4 -1.079
# 5 -0.947
question from:
https://stackoverflow.com/questions/65890622/how-does-predict-cv-glmnet-calculate-the-link-value-for-binomial-models