library(readr) dfc01 <- read_csv("210526dataATT.csv") head(dfc01) str(dfc01) library(glmnet) exp_vars <- as.matrix(dfc01[3:202]) scale_exp_vars <- scale(exp_vars) head(scale_exp_vars) target_var <- as.matrix(dfc01[2]) alpha <- seq(0.001, 0.999, 0.001) mse.df <- NULL for (i in 1:length(alpha)) { m <- cv.glmnet(x = scale_exp_vars, y = target_var, family = "gaussian", alpha = alpha[i]) mse.df <- rbind(mse.df, data.frame(alpha = alpha[i], mse = min(m$cvm))) } plot(mse.df) best.alpha <- mse.df$alpha[mse.df$mse == min(mse.df$mse)] best.alpha fitEN1 <- glmnet( x=scale_exp_vars, y=target_var, family="gaussian", alpha=best.alpha ) plot(fitEN1, xvar="lambda", label = TRUE) fitEN1CV <- cv.glmnet(x = scale_exp_vars, y = target_var, family = "gaussian", alpha = best.alpha) plot(fitEN1CV) best.lambda <- fitEN1CV$lambda.min best.lambda fitEN2 <- glmnet( x=scale_exp_vars, y=target_var, family="gaussian", lambda=best.lambda, alpha=best.alpha ) fitEN2$beta str(fitEN2$beta) summary(fitEN2$beta) est_target_var <- predict(fitEN2, newx = scale_exp_vars, type = 'response') a <- max(est_target_var)*(1+1/10) plot(target_var, est_target_var ,xlim = c(0,a),ylim = c(0,a),xlab = "true", ylab = "predict" ,ann = F) par(new=T) x <- c(0,a) y <- c(0,a) plot(x,y,type="l" ,xlim = c(0,a),ylim = c(0,a) ,xlab = "true" ,ylab = "predict") RMSE <- sqrt(mean((target_var - est_target_var)^2)) RMSE R_squared <- 1-((sum((target_var - est_target_var)^2))/(sum((target_var - mean(target_var))^2))) R_squared