## ---- lasso-example ----
# to obtain glmnet and install it directly from CRAN.
# install.packages("glmnet", repos = "http://cran.us.r-project.org")
# load the glmnet package:
library(glmnet)
# The default model used in the package is the Guassian linear
# model or "least squares", which we will demonstrate in this
# section. We load a set of data created beforehand for
# illustration. Users can either load their own data or use
# those saved in the workspace.
getwd()
load("Bardet.rda")
# The command loads an input matrix x and a response
# vector y from this saved R data archive.
#
# We fit the model using the most basic call to glmnet.
fit = glmnet(x, y)
# "fit" is an object of class glmnet that contains all the
# relevant information of the fitted model for further use.
# We do not encourage users to extract the components directly.
# Instead, various methods are provided for the object such
# as plot, print, coef and predict that enable us to execute
# those tasks more elegantly.
# We can visualize the coefficients by executing the plot function:
plot(fit)
# Each curve corresponds to a variable. It shows the path of
# its coefficient against the l1-norm of the whole
# coefficient vector at as lambda varies. The axis above
# indicates the number of nonzero coefficients at the
# current lambda, which is the effective degrees of freedom
# (df) for the lasso. Users may also wish to annotate
# the curves; this can be done by setting label = TRUE
# in the plot command.
# A summary of the glmnet path at each step is displayed
# if we just enter the object name or use
# the print function:
print(fit)
# It shows from left to right the number of nonzero
# coefficients (Df), the values of -log(likelihood)
# (%dev) and the value of lambda (Lambda).
# Although by default glmnet calls for 100 values of
# lambda the program stops early if %dev% does not
# change sufficently from one lambda to the next
# (typically near the end of the path.)
# We can obtain the actual coefficients at one or more lambda's
# within the range of the sequence:
coef0 = coef(fit,s=0.1)
# The function glmnet returns a sequence of models
# for the users to choose from. In many cases, users
# may prefer the software to select one of them.
# Cross-validation is perhaps the simplest and most
# widely used method for that task.
#
# cv.glmnet is the main function to do cross-validation
# here, along with various supporting methods such as
# plotting and prediction. We still act on the sample
# data loaded before.
cvfit = cv.glmnet(x, y)
# cv.glmnet returns a cv.glmnet object, which is "cvfit"
# here, a list with all the ingredients of the
# cross-validation fit. As for glmnet, we do not
# encourage users to extract the components directly
# except for viewing the selected values of lambda.
# The package provides well-designed functions
# for potential tasks.
# We can plot the object.
plot(cvfit)
# It includes the cross-validation curve (red dotted line),
# and upper and lower standard deviation curves along the
# lambda sequence (error bars). Two selected lambda's are
# indicated by the vertical dotted lines (see below).
# We can view the selected lambda's and the corresponding
# coefficients. For example,
cvfit$lambda.min
# lambda.min is the value of lambda that gives minimum
# mean cross-validated error. The other lambda saved is
# lambda.1se, which gives the most regularized model
# such that error is within one standard error of
# the minimum. To use that, we only need to replace
# lambda.min with lambda.1se above.
coef1 = coef(cvfit, s = "lambda.min")
# Note that the coefficients are represented in the
# sparse matrix format. The reason is that the
# solutions along the regularization path are
# often sparse, and hence it is more efficient
# in time and space to use a sparse format.
# If you prefer non-sparse format,
# pipe the output through as.matrix().
# Predictions can be made based on the fitted
# cv.glmnet object. Let's see a toy example.
predict(cvfit, newx = x[1:5,], s = "lambda.min")
# newx is for the new input matrix and s,
# as before, is the value(s) of lambda at which
# predictions are made.
## ---- adaptive-lasso-example ----
################################################
# penalty.factor example
################################################
# [penalty.factor] argument allows users to apply separate penalty
# factors to each coefficient. Its default is 1 for each parameter,
# but other values can be specified. In particular,
# any variable with penalty.factor equal to zero is not penalized at all! Let
# v_j denote the penalty factor for j-th variable.
# Note the penalty factors are internally rescaled to sum to nvars.
# This is very useful when people have prior knowledge or
# preference over the variables. In many cases, some
# variables may be so important that one wants to keep
# them all the time, which can be achieved by setting
# corresponding penalty factors to 0:
p.fac = rep(1, 200)
p.fac[c(31, 174, 151)] = 0
pfit = glmnet(x, y, penalty.factor = p.fac)
plot(pfit, xvar="lambda", label = TRUE)
# We see from the labels that the three variables with 0
# penalty factors always stay in the model, while the
# others follow typical regularization paths and
# shrunken to 0 eventually.
################################################
# Adaptive lasso example
################################################
# Some other useful arguments: [exclude] allows one to block
# certain variables from being the model at all. Of course,
# one could simply subset these out of x, but sometimes
# exclude is more useful, since it returns a full vector
# of coefficients, just with the excluded ones set to zero.
# There is also an intercept argument which defaults to
# TRUE; if FALSE the intercept is forced to be zero.
## The adaptive lasso needs a first stage that is consistent.
## Zou (2006) recommends OLS or ridge
## first stage lasso
thelasso.cv<-cv.glmnet(x,y,family = "gaussian",alpha=1)
## Second stage weights from the coefficients of the first stage
## coef() is a sparseMatrix
bhat<-as.matrix(coef(thelasso.cv,s="lambda.1se"))[-1,1]
if(all(bhat==0)){
## if bhat is all zero then assign very close to zero weight to all.
## Amounts to penalizing all of the second stage to zero.
bhat<-rep(.Machine$double.eps*2,length(bhat))
}
## the adaptive lasso weight
adpen<-(1/pmax(abs(bhat),.Machine$double.eps))
## Second stage lasso (the adaptive lasso)
m_adlasso <- glmnet(x,y,family = "gaussian",alpha=1,exclude=which(bhat==0),
penalty.factor=adpen)
plot(m_adlasso)
## ---- elastic-net ----
################################################
# Elastic net example
################################################
# glmnet provides various options for users to customize
# the fit. We introduce some commonly used options here
# and they can be specified in the glmnet function.
# [family="gaussian"] is the default family option in
# the function glmnet. "gaussian"
# [alpha] is for the elastic-net mixing parameter alpha,
# with range alpha in [0,1]. alpha=1 is the lasso
# (default) and alpha=0 is the ridge.
# [nlambda] is the number of lambda values in the sequence.
# Default is 100.
# As an example, we set alpha=0.2 (more like a ridge regression),
# and give double weights to the latter half of the observations.
# To avoid too long a display here, we set nlambda to 20.
# In practice, however, the number of values of lambda is
# recommended to be 100 (default) or more. In most cases,
# it does not come with extra cost because of the warm-starts
# used in the algorithm, and for nonlinear models leads to
# better convergence properties.
fit = glmnet(x, y, alpha = 0.2, family="gaussian")
plot(fit, xvar = "lambda", label = TRUE)
## ---- group-lasso ----
################################################
# Group lasso example
################################################
install.packages("gglasso", repos = "http://cran.us.r-project.org")
# load gglasso library
library(gglasso)
# load bardet data set
data(bardet)
# define 20 groups
group1 <- rep(1:20,each=5)
# fit group lasso penalized least squares
m1 <- gglasso(x=bardet$x,y=bardet$y,group=group1,loss="ls")
plot(m1)
# 5-fold cross validation using group lasso
# penalized ls regression
cv <- cv.gglasso(x=bardet$x, y=bardet$y, group=group1, loss="ls",
pred.loss="L2", lambda.factor=0.05, nfolds=5)
plot(cv)
# load colon data set
data(colon)
# define group index
group2 <- rep(1:20,each=5)
# fit group lasso penalized logistic regression
m2 <- gglasso(x=colon$x,y=colon$y,group=group2,loss="logit")
plot(m2)
# 5-fold cross validation using group lasso
# penalized logisitic regression
cv2 <- cv.gglasso(x=colon$x, y=colon$y, group=group2, loss="logit",
pred.loss="misclass", lambda.factor=0.05, nfolds=5)
plot(cv2)
# the coefficients at lambda = lambda.1se
pre = coef(cv$gglasso.fit, s = cv$lambda.1se)
## ---- simulation-demo-1 ----
# Compare lasso, adaptive lasso, scad and
# mcp in the logistic regression and least squares
#
# load R libraries
# glmnet: for LASSO, adaptive LASSO, elastic net
# penalized least squares and logistic regression (all
# supports poisson, multinomial and cox model)
# ncvreg: for SCAD and MCP penalized least squares and logistic regression
library(glmnet)
library(ncvreg)
library(MASS)
##############################
# PART I Least squares
n=100
p=200
# true beta
truebeta <- c(4,4,4,-6*sqrt(2),4/3,rep(0,p-5))
# error variance
sigma2 <- 0.3
# The covariance between Xj and Xk is cov(X_j, X_k) = rho^|i-j|
# covariance matrix
covmat <- function(rho, p) {
rho^(abs(outer(seq(p), seq(p), "-")))
}
# rho = 0.1, generate covariance matrix for X
sigma <- covmat(0.1,p)
# X ~ N(0, sigma)
# epsilon ~ N(0, sigma2)
# The true model: y = x*truebeta + epsilon
x <- mvrnorm(n,rep(0,p),sigma)
epsilon <- rnorm(n,0,sd=sqrt(sigma2))
y <- x %*% truebeta + epsilon
# fit lasso and use five-fold CV to select lambda
cvfit <- cv.glmnet(x = x, y = y, alpha = 1, family = "gaussian")
plot(cvfit)
# lambda selected by CV
cvfit$lambda.min
# plot the solution paths
plot(cvfit$glmnet.fit,label=TRUE,xvar="lambda")
# plot lambda.min
abline(v=log(cvfit$lambda.min),lty=1)
# plot lambda.1se
abline(v=log(cvfit$lambda.1se),lty=2)
model_compare <- matrix(NA, nrow=5, ncol=p,
dimnames=list(c("true model",
"lasso","adaptive lasso","mcp","scad"),
paste("V",seq(p),sep="")))
# save the true model
model_compare[1, ] <- truebeta
## coefficients estimated by lasso
cvfit <- cv.glmnet(x = x, y = y, alpha = 1,
family = "gaussian")
tmp <- cvfit$glmnet.fit$beta
lasso_beta <- as.matrix(tmp[,cvfit$lambda==cvfit$lambda.min])
model_compare[2, ] <- lasso_beta
# Compute weight and fit an adaptive lasso
weight = 1/(lasso_beta)^2
# Some est. coef. is zero, the corresponding weight is Inf
# to prevent numerical error, convert Inf to a large number (e.g. 1e6)
weight[weight==Inf] = 1e6
cvfit <- cv.glmnet(x = x, y = y, alpha = 1,
family = "gaussian",
nfolds = 5, penalty.factor=weight)
## coefficients estimated by adaptive lasso
tmp <- cvfit$glmnet.fit$beta
adaptive_lasso_beta <- as.matrix(tmp[,cvfit$lambda==cvfit$lambda.min])
model_compare[3, ] <- adaptive_lasso_beta
## coefficients estimated by mcp
cvfit <- cv.ncvreg(X = x, y = y, penalty = "MCP", family = "gaussian")
mcp_beta <- cvfit$fit$beta[, cvfit$min]
model_compare[4, ] <- mcp_beta[-1]
## coefficients estimated by scad
cvfit <- cv.ncvreg(X = x, y = y, penalty = "SCAD", family = "gaussian")
scad_beta <- cvfit$fit$beta[, cvfit$min]
model_compare[5, ] <- scad_beta[-1]
# make a comparison of the estimated coef. from four methods
# we see that lasso over-selected, adaptive lasso, scad and mcp fix the problem.
model_compare
## ---- simulation-demo-2 ----
##############################
# PART II logistic regression
# generate data
n <- 200
p <- 8
# truebeta
truebeta <- c(6,3.5,0,5,rep(0,p-4))
truebeta
# generate x and y from the true model
# The true model: P(y=1|x) = exp(x*truebeta)/(exp(x*truebeta)+1)
x <- matrix(rnorm(n*p), n, p)
feta <- x %*% truebeta
fprob <- ifelse(feta < 0, exp(feta)/(1+exp(feta)), 1/(1 + exp(-feta)))
y <- rbinom(n, 1, fprob)
model_compare <- matrix(NA, nrow=5, ncol=p,
dimnames=list(c("true model","lasso",
"adaptive lasso","mcp","scad"),
paste("V",seq(p),sep="")))
# save the true model
model_compare[1, ] <- truebeta
# lasso case
# cv.glmfit fit lasso model and use cross validation for lambda selection
# family = "binomial", logistic regression
# family = "gaussian", least squares
# alpha controls the degree of L1 penalty term,
# alpha = 1, lasso,
# alpha = 0, ridge,
# alpha = (0,1), elastic net
# nfolds = 5, five-fold cross validation (CV)
cvfit <- cv.glmnet(x = x, y = y, alpha = 1, family = "binomial", nfolds = 5)
# make a plot of the CV result
# the left vertical line (lambda.min) correspondes to the lambda
# that gives smallest deviance.
# the right vertical line (lambda.1se) correspondes to the lambda
# from the one standard deviation rule
plot(cvfit)
# plot the solution paths
plot(cvfit$glmnet.fit,label=TRUE,xvar="lambda")
# plot lambda.min
abline(v=log(cvfit$lambda.min),lty=1)
# plot lambda.1se
abline(v=log(cvfit$lambda.1se),lty=2)
# save the Lasso coefficient from solution path, each column
# represents an estimate for a lambda value
tmp <- cvfit$glmnet.fit$beta
tmp
# the beta that correspondes to lambda.min selected by CV
lasso_beta <- as.matrix(tmp[,cvfit$lambda==cvfit$lambda.min])
model_compare[2, ] <- lasso_beta
# Compute weight and fit an adaptive lasso
weight = 1/(lasso_beta)^2
# Some est. coef. is zero, the corresponding weight is Inf
# to prevent numerical error, convert Inf to a large number (e.g. 1e6)
weight[weight==Inf] = 1e6
cvfit <- cv.glmnet(x = x, y = y, alpha = 1, family = "binomial",
nfolds = 5, penalty.factor=weight)
## coefficients estimated by adaptive lasso
tmp <- cvfit$glmnet.fit$beta
adaptive_lasso_beta <- as.matrix(tmp[,cvfit$lambda==cvfit$lambda.min])
model_compare[3, ] <- adaptive_lasso_beta
## coefficients estimated by mcp
cvfit <- cv.ncvreg(X = x, y = y, penalty = "MCP", family = "binomial")
mcp_beta <- cvfit$fit$beta[, cvfit$min]
model_compare[4, ] <- mcp_beta[-1]
## coefficients estimated by scad
cvfit <- cv.ncvreg(X = x, y = y, penalty = "SCAD", family = "binomial")
scad_beta <- cvfit$fit$beta[, cvfit$min]
model_compare[5, ] <- scad_beta[-1]
# make a comparison of the estimated coef. from four methods
model_compare