ConformalSmallest

This package implements two selection algorithms for conformal prediction regions to obtain the smallest prediction set in practice; these are called “efficiency first” and “validity first” conformal prediction algorithms, EFCP and VFCP for short. For details please refer to our paper.

Installation

You can install the released version of ConformalSmallest from CRAN with:

install.packages("ConformalSmallest")

Or directly from github

devtools::install_github("Elsa-Yang98/ConformalSmallest")

Example

This is a basic example which shows you how to solve a common problem:

library(ConformalSmallest)
## basic example code

Example 1: Tuning free ridge regression with conformal prediction

library(glmnet)
library(MASS)
library(mvtnorm)
source("ginverse.fun.R")
source("functions.R")
name=paste("linear_fm_t3",sep="")


df <- 3  #degrees of freedom
l <- 60    #number of dimensions 
l.lambda <- 100
lambda_seq <- seq(0,200,l=l.lambda)
dim <- round(seq(5,300,l=l))
alpha <- 0.1
n <- 200   #number of training samples
n0 <- 100  #number of prediction points
nrep <- 100 #number of independent trials
rho <- 0.5

cov.efcp <- len.efcp <- matrix(0,nrep,l)
cov.vfcp <- len.vfcp <- matrix(0,nrep,l)
cov.naive <- len.naive <- matrix(0,nrep,l)
cov.param <- len.param <- matrix(0,nrep,l)
cov.star <- len.star <- matrix(0,nrep,l)
cov.cv10 <- len.cv10 <- matrix(0,nrep,l)
cov.cv5 <- len.cv5 <- matrix(0,nrep,l)
cov.cvloo <- len.cvloo <- matrix(0,nrep,l)


out.efcp.up <- out.efcp.lo <- matrix(0,n0,l)
out.vfcp.up <- out.vfcp.lo <- matrix(0,n0,l)
out.naive.up <- out.naive.lo <- matrix(0,n0,l)
out.param.up <- out.param.lo <- matrix(0,n0,l)
out.star.up <- out.star.lo <- matrix(0,n0,l)
out.cv10.up <- out.cv10.lo <- matrix(0,n0,l)
out.cv5.up <- out.cv5.lo <- matrix(0,n0,l)
out.cvloo.up <- out.cvloo.lo <- matrix(0,n0,l)


for(i in 1:nrep){
  cat(i,"\n")
  for (r in 1:l){
    d <- dim[r]
    set.seed(i)
    
    Sigma <- matrix(rho,d,d)
    diag(Sigma) <- rep(1,d)
    X <- rmvt(n,Sigma,df)	#multivariate t distribution
    beta <- rep(1:5,d/5)
    eps <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
    Y <- X%*%beta+eps
    
    
    X0 <- rmvt(n0,Sigma,df)
    eps0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
    Y0 <- X0%*%beta+eps0
    
    
    out.param <- ginverse.fun(X,Y,X0,alpha=alpha)
    out.param.lo[,r] <- out.param$lo
    out.param.up[,r] <- out.param$up    
    cov.param[i,r] <- mean(out.param.lo[,r] <= Y0 & Y0 <= out.param.up[,r]) 
    len.param[i,r] <- mean(out.param.up[,r]-out.param.lo[,r]) 
    
    
    out.efcp <- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
    out.efcp.up[,r] <- out.efcp$up
    out.efcp.lo[,r] <- out.efcp$lo    
    cov.efcp[i,r] <- mean(out.efcp.lo[,r] <= Y0 & Y0 <= out.efcp.up[,r]) 
    len.efcp[i,r] <- mean(out.efcp.up[,r]-out.efcp.lo[,r]) 
    
    
    out.vfcp <- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
    out.vfcp.up[,r] <- out.vfcp$up
    out.vfcp.lo[,r] <- out.vfcp$lo    
    cov.vfcp[i,r] <- mean(out.vfcp.lo[,r] <= Y0 & Y0 <= out.vfcp.up[,r]) 
    len.vfcp[i,r] <- mean(out.vfcp.up[,r]-out.vfcp.lo[,r]) 
    
    out.naive <- naive.fun(X,Y,X0,alpha=alpha)
    out.naive.up[,r] <- out.naive$up
    out.naive.lo[,r] <- out.naive$lo    
    cov.naive[i,r] <- mean(out.naive.lo[,r] <= Y0 & Y0 <= out.naive.up[,r]) 
    len.naive[i,r] <- mean(out.naive.up[,r]-out.naive.lo[,r]) 
    
    
    out.star <- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
    out.star.up[,r] <- out.star$up
    out.star.lo[,r] <- out.star$lo   
    cov.star[i,r] <- mean(out.star.lo[,r] <= Y0 & Y0 <= out.star.up[,r]) 
    len.star[i,r] <- mean(out.star.up[,r] - out.star.lo[,r]) 
    
    
    out.cv5 <- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
    out.cv5.up[,r] <- out.cv5$up
    out.cv5.lo[,r] <- out.cv5$lo    
    cov.cv5[i,r] <- mean(out.cv5.lo[,r] <= Y0 & Y0 <= out.cv5.up[,r]) 
    len.cv5[i,r] <- mean(out.cv5.up[,r] - out.cv5.lo[,r])
    
  }
}

df.cov <- data.frame(dim,apply(cov.param,2,mean),apply(cov.naive,2,mean),apply(cov.vfcp,2,mean),apply(cov.star,2,mean),apply(cov.cv5,2,mean), apply(cov.efcp,2,mean))

df.len <- data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean))

save(dim,cov.param, cov.naive, cov.vfcp, cov.star, cov.cv5, cov.efcp, file = "cov100_t3.RData" )
save(dim,len.param, len.naive, len.vfcp, len.star, len.cv5, len.efcp, file = "len100_t3.RData" )

Example 2: Tuning free conformal quantile regression with random forest

This output the right panal of Figure 1 in our paper.

df <- 3
d <- 3
l.lambda <- 100
lambda_seq <- seq(0,200,l=l.lambda)
nset <- c(50,100,500,1000,5000)
alpha <- 0.1 #miscoverage level
n0 <- 100  #number of prediction points
nrep <- 100 #number of independent trials
rho <- 0.5

evaluations <- expand.grid(1:nrep, nset, c("efficient", "valid"))
no_eval <- nrow(evaluations)
width_mat <- cov_mat <- data.frame(number = rep(0, no_eval), 
                                   rep = evaluations[,1], 
                                   nset = evaluations[,2],
                                   method = evaluations[,3])
colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "method")

Sigma <- matrix(rho,d,d)
diag(Sigma) <- rep(1,d) #covariance matrix for X


for(idx in 1:nrow(evaluations)){
  set.seed(evaluations[idx, 1])
  if(idx%%1 == 0){
    print(idx)
  }
  n <- evaluations[idx, 2]  
  
  X <- rmvt(n,Sigma,df)	#multivariate t distribution
  
  eps1 <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
  eps2 <- rt(n,df)*(1+sqrt(X[,1]^4+X[,2]^4))
  Y <- rpois(n,sin(X[,1])^2 + cos(X[,2])^4+0.01 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01)*eps2
  
  X0 <- rmvt(n0,Sigma,df)
  eps01 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
  eps02 <- rt(n0,df)*(1+sqrt(X0[,1]^4+X0[,2]^4))
  Y0 <- rpois(n0,sin(X0[,1])^2 + cos(X0[,2])^4+0.01 )+0.03*X0[,1]*eps01+25*(runif(n0,0,1)<0.01)*eps02
  
  width_mat[idx,3] <- cov_mat[idx, 3] <- n
  method <- evaluations[idx, 3]
  width_mat[idx,4] <- cov_mat[idx, 4] <- method
  width_mat[idx, 2] <- cov_mat[idx, 2] <- evaluations[idx, 1]
  
  if(method == "valid"){
    split <- c(1/2, 1/2)
  } else {
    split <- 1/2
  }  
  
  beta_grid <- seq(1e-03, 4, length = 20)*alpha
  mtry_grid <- unique(ceiling(seq(1/10, 1, length = 20)*d))
  ntree_grid <- seq(50, 400, by = 50)
  
  tmp <- try(conf_CQR_reg(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))
  
  while (class(tmp)=="try-error"){
    
    tmp <- try(conf_CQR_reg(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
    
  }
  width_mat[idx, 1] <- tmp$width
  cov_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0))
}



par(mfrow <- c(1,2))
width_efcp <- width_vfcp <- sd_width_efcp <- sd_width_vfcp <- NULL
#sd_efcp <- sd_vfcp <- NULL
for(n in nset){
  TMP <- width_mat[evaluations[,3] == "efficient", ]
  TMP_prime <- TMP[TMP[,3] == n,]
  
  TMP <- width_mat[evaluations[,3] == "valid", ]
  TMP_prime_vfcp <- TMP[TMP[,3] == n,]
  TMP_prime_vfcp_clean =TMP_prime_vfcp[ TMP_prime_vfcp[,1]<=10^5,1]
  
  
  width_efcp <- c(width_efcp, mean(TMP_prime[,1] / TMP_prime_vfcp[,1]))
  sd_width_efcp <- c(sd_width_efcp, sd(TMP_prime[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
  #sd_efcp  = c(sd_efcp , sd(TMP_prime[,1])/sqrt(nrep) )
  
  width_vfcp <- c(width_vfcp, mean(TMP_prime_vfcp[,1] / TMP_prime_vfcp[,1]))
  sd_width_vfcp <- c(sd_width_vfcp, sd(TMP_prime_vfcp[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
  #sd_vfcp  = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
  
}




#plot(dim, width_efcp, type = 'l', ylim = range(c(width_efcp+sd_efcp)), lwd = 2)
plot(nset, width_efcp, type = 'l', ylim =c(-10,25), lwd = 2)
lines(nset, width_efcp - sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(nset, width_efcp + sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(nset, width_vfcp, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "red")
lines(nset, width_vfcp - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(nset, width_vfcp + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
abline(h = 1)

cov_efcp <- cov_vfcp <-sd_cov_efcp <- sd_cov_vfcp <- NULL
for(n in nset){
  TMP <- cov_mat[evaluations[,3] == "efficient", ]
  TMP_prime <- TMP[TMP[,3] == n,]
  cov_efcp <- c( cov_efcp, mean(TMP_prime[,1] ) )
  sd_cov_efcp <- c(sd_cov_efcp, sd(TMP_prime[,1])/sqrt(nrep))
  
  TMP <- cov_mat[evaluations[,3] == "valid", ]
  TMP_prime <- TMP[TMP[,3] == n,]
  cov_vfcp <- c(cov_vfcp, mean(TMP_prime[,1]))
  sd_cov_vfcp <- c(sd_cov_vfcp, sd(TMP_prime[,1])/sqrt(nrep))
}
plot(nset, cov_efcp, type = 'l', ylim = c(0, 1), lwd = 2)
lines(nset, cov_vfcp, type = 'l', col = "red", lwd = 2)
abline(h = 1-alpha)

save(nset,nrep,width_mat, cov_mat, evaluations, width_efcp, sd_cov_efcp, sd_width_efcp,width_vfcp, sd_cov_vfcp,sd_width_vfcp, cov_efcp, cov_vfcp, alpha, file = "pois-100-repetitions.RData" )

Example 3: Conditional coverage and width for EFCP, VFCP and CQR

This output the right panal of Figure 1 in our paper.

df = 3
d = 1 #x is of one dimension
nset = c(400) #number of training sample
x_test = seq(0,5,by=0.2) # a grid of test points for x
alpha = 0.1 #miscoverage level
nrep = 100 #number of independent trials
nrep2 = 100 #number of test samples y for each test prediction sample x


evaluations <- expand.grid(1:nrep, nset, x_test, c("efficient", "valid","CQR"))
no_eval <- nrow(evaluations)
width_mat <- cov_mat <- data.frame(number = rep(0, no_eval), 
                                   rep = evaluations[,1], 
                                   nset = evaluations[,2],
                                   X_test = evaluations[,3],
                                   method = evaluations[,4])
colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "test_value","method")

for(idx in 1:nrow(evaluations)){
  set.seed(evaluations[idx, 1])
  if(idx%%1 == 0){
    print(idx)
  }
  n <- evaluations[idx, 2] 
  x0 = evaluations[idx, 3]
  

  X = as.matrix(runif(n,0,5))	
  eps1 = rnorm(n)
  eps2 = rnorm(n)
  Y = rpois(n,sin(X[,1])^2 +0.1 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01*eps2)

  X0 = as.matrix( rep(x0,nrep2) )
  eps01 = rnorm(nrep2)
  eps02 = rnorm(nrep2)
  Y0 = rpois(nrep2,sin(X0)^2 +0.1 )+0.03*X0*eps01+25*(runif(nrep2,0,1)<0.01*eps02)
  
  width_mat[idx,3] <- cov_mat[idx, 3] <- n
  method <- evaluations[idx, 4]
  #width_mat[idx,5] <- cov_mat[idx, 5] <- method
  #width_mat[idx, 2] <- cov_mat[idx, 2] <- evaluations[idx, 1]
  if (method =="CQR"){
    beta_fixed = 0.05
    mtry_fixed = 1
    ntree_fixed = 100
    
    tmp = try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha))
    
    while (class(tmp)=="try-error"){
      
      tmp = try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha),silent=TRUE)
      
    }
    width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[2]])
    cov_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]])
  }else{ if(method == "valid"){
    split <- c(1/2, 1/2)
  } else {
    split <- 1/2
  }  
  
  beta_grid <- seq(1e-03, 4, length = 20)*alpha
  mtry_grid <- unique(ceiling(seq(1/10, 1, length = 20)*d))
  ntree_grid <- seq(50, 400, by = 50)
  
  tmp = try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))
  
  while (class(tmp)=="try-error"){
    
    tmp = try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
    
  }
  width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[2]])
  cov_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]])
  }
}



par(mfrow = c(1,2))
width_cqr <- sd_width_cqr  <-  width_efcp <- width_vfcp <- sd_width_efcp <- sd_width_vfcp <- NULL
#sd_efcp <- sd_vfcp <- NULL
for(x in x_test){
  TMP <- width_mat[evaluations[,4] == "efficient", ]
  TMP_prime <- TMP[TMP[,4] == x,]
  
  TMP <- width_mat[evaluations[,4] == "valid", ]
  TMP_prime_vfcp <- TMP[TMP[,4] == x,]
  TMP_prime_vfcp_clean =TMP_prime_vfcp[ TMP_prime_vfcp[,1]<=10^5,1]
  
  TMP <- width_mat[evaluations[,4] == "CQR", ]
  TMP_prime_cqr <- TMP[TMP[,4] == x,]
  
  
  width_efcp <- c(width_efcp, mean(TMP_prime[,1] / TMP_prime_vfcp[,1]))
  sd_width_efcp <- c(sd_width_efcp, sd(TMP_prime[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
  #sd_efcp  = c(sd_efcp , sd(TMP_prime[,1])/sqrt(nrep) )
  
  width_vfcp <- c(width_vfcp, mean(TMP_prime_vfcp[,1] / TMP_prime_vfcp[,1]))
  sd_width_vfcp <- c(sd_width_vfcp, sd(TMP_prime_vfcp[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
  #sd_vfcp  = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
  
  width_cqr <- c(width_cqr, mean(TMP_prime_cqr[,1] / TMP_prime_vfcp[,1]))
  sd_width_cqr <- c(sd_width_cqr, sd(TMP_prime_cqr[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
  #sd_vfcp  = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
  
}




#plot(dim, width_efcp, type = 'l', ylim = range(c(width_efcp+sd_efcp)), lwd = 2)
plot(x_test, width_efcp, type = 'l', ylim =c(-5,20), lwd = 2)
lines(x_test, width_efcp - sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(x_test, width_efcp + sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(x_test, width_vfcp, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "red")
lines(x_test, width_vfcp - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(x_test, width_vfcp + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(x_test, width_cqr, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "blue")
lines(x_test, width_cqr - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "blue")
lines(x_test, width_cqr + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "blue")
abline(h = 1)

cov_cqr <-sd_cov_cqr <-cov_efcp <- cov_vfcp <-sd_cov_efcp <- sd_cov_vfcp <- NULL
for(x in x_test){
  TMP <- cov_mat[evaluations[,4] == "efficient", ]
  TMP_prime <- TMP[TMP[,4] == x,]
  cov_efcp <- c( cov_efcp, mean(TMP_prime[,1] ) )
  sd_cov_efcp <- c(sd_cov_efcp, sd(TMP_prime[,1])/sqrt(nrep))
  
  TMP <- cov_mat[evaluations[,4] == "valid", ]
  TMP_prime <- TMP[TMP[,4] == x,]
  cov_vfcp <- c(cov_vfcp, mean(TMP_prime[,1]))
  sd_cov_vfcp <- c(sd_cov_vfcp, sd(TMP_prime[,1])/sqrt(nrep))
  
  TMP <- cov_mat[evaluations[,4] == "CQR", ]
  TMP_prime <- TMP[TMP[,4] == x,]
  cov_cqr <- c(cov_cqr, mean(TMP_prime[,1]))
  sd_cov_cqr <- c(sd_cov_cqr, sd(TMP_prime[,1])/sqrt(nrep))
}
plot(x_test, cov_efcp, type = 'l', ylim = c(0, 1), lwd = 2)
lines(x_test, cov_vfcp, type = 'l', col = "red", lwd = 2)
lines(x_test, cov_cqr, type = 'l', col = "blue", lwd = 2)
legend(0,0.2, legend=c("EFCP", "VFCP","CQR"),
       col=c("black","red", "blue"), lty=1, cex=0.8)
abline(h = 1-alpha)

Name		Name	Last commit message	Last commit date
Latest commit History 1 Commit
R		R
build		build
data		data
inst/doc		inst/doc
man		man
tests/testthat		tests/testthat
vignettes		vignettes
DESCRIPTION		DESCRIPTION
MD5		MD5
NAMESPACE		NAMESPACE
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Repository files navigation

ConformalSmallest

This package implements two selection algorithms for conformal prediction regions to obtain the smallest prediction set in practice; these are called “efficiency first” and “validity first” conformal prediction algorithms, EFCP and VFCP for short. For details please refer to our paper.

Installation

Example

Example 1: Tuning free ridge regression with conformal prediction

Example 2: Tuning free conformal quantile regression with random forest

Example 3: Conditional coverage and width for EFCP, VFCP and CQR

About

Uh oh!

Releases

Packages

Languages

cran/ConformalSmallest

Folders and files

Latest commit

History

Repository files navigation

ConformalSmallest

This package implements two selection algorithms for conformal prediction regions to obtain the smallest prediction set in practice; these are called “efficiency first” and “validity first” conformal prediction algorithms, EFCP and VFCP for short. For details please refer to our paper.

Installation

Example

Example 1: Tuning free ridge regression with conformal prediction

Example 2: Tuning free conformal quantile regression with random forest

Example 3: Conditional coverage and width for EFCP, VFCP and CQR

About

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages