The cpasimulations from quantlet

simulations

Name of Quantlet: simulations

Published in: submitted to N/A 

Description: ‘Performs the Penalized Adaptive Method (PAM), a combination of propagation-separation approach and a SCAD penalty, to fit a model to a simulated data with two pre-defined change points. Computes the percentage of correctly identifing the homogeneous intervals.’

Keywords: ‘linear model, regression, SCAD penalty, bic, change points, simulations, bootstrap’

Author: Lenka Zboňáková; Xinjue Li

Submitted:  25 Nov. 2018 

Input: 
- n.obs   : Number of observations
- n.par   : Number of parameters
- n.sim   : Number of simulated scenarios
- r       : Correlation parameter of the design matrix X
- sd.eps  : Standard deviation of the error term
- cp1.seq : Sequence of change points
- n.boot  : Number of bootstrap loops
- K       : Sequence of increments between adjacent subintervals
- mb.type : Distribution of multipliers (Bound, Exp or Pois)

R Code

# Clear all variables
rm(list = ls(all = TRUE))
graphics.off()

# Set directory
setwd("C:/Users/xinjueli/PAM/simulations")

# Install and load packages
libraries = c("MASS", "bbmle", "glmnet", "doParallel", "LaplacesDemon", "optimx",
              "lars", "scales", "tilting")
lapply(libraries, function(x) if (!(x %in% installed.packages())) {
  install.packages(x)} )
lapply(libraries, library, quietly = TRUE, character.only = TRUE)

source("PAMfunc.r")

# Simulation setup
n.obs       = 500                   # No of observations
n.par       = 10                    # No  of parameters
n.sim       = 50                    # No of simulations
seed1       = 20171110              # Seed simulation X
seed2       = 20170602              # Seed simulation epsilon
r           = 0.5                   # Correlation parameter of X
sd.eps      = 1                     # Standard deviation of epsilon
cp1.seq     = matrix(c(50,150, 100,150), nrow = 2, ncol = 2, byrow = TRUE)              # Define change points
a           = 3.7                   # Recommended value of parameter a for SCAD
n.boot      = 10                    # Number of bootstrapped multipliers
K           = 50                    # Increment between adjacent sub-intervals
M           = n.obs/K               # Number of sub-intervals
mb.type     = "Exp"               # Bound, Exp or Pois
sel         = 0                     # Position of s

# Initiate cluster for parallel computing
n.cores = detectCores()           # Number of cores to be used
cl      = makeCluster(n.cores)
registerDoParallel(cl)
getDoParWorkers()

# Define number of scenarios in every core
n.simcores = rep((n.sim %/% n.cores), n.cores)
h.simcores = n.sim %% n.cores
if (h.simcores != 0){
  n.simcores[1:h.simcores] = n.simcores[1:h.simcores] + 1
}

# True beta coefficients with change in t = cp1
tmp1.1  = rep(1, 5)
tmp1.2  = rep(0, n.par - length(tmp1.1))
b1      = c(tmp1.1, tmp1.2)

tmp2.1  = rep(1, 3)
tmp2.2  = rep(0, n.par - length(tmp2.1))
b2      = c(tmp2.1, tmp2.2)

tmp3.1  = rep(1, 5)
tmp3.2  = rep(0, n.par - length(tmp1.1))
b3      = c(tmp3.1, tmp3.2)

# Simulation of the design matrix
mu    = rep(0, n.par)
Sigma = matrix(0, nrow = n.par, ncol = n.par)

for (i in 1:n.par) {
  for (j in 1:n.par) {
    if (i == j){
      Sigma[i, j] = 1
    }else {
      Sigma[i, j] = r^abs(i - j)
    }
  }
}

X = list()
for (i in 1:n.sim){
  set.seed(seed1)
  X[[i]] = mvrnorm(n = n.obs, mu, Sigma)
}  

# Simulation of the error term for t = 1, ..., n.obs
eps  = list()
set.seed(seed2)
for (i in 1:n.sim){
  eps[[i]] = rnorm(n.obs, mean = 0, sd = sd.eps)
} 

# Function for computing Y for t = 1, ..., n.obs
Y.sim = function(cp1, cp2){
  # Computation of Y for t = 1, ..., cp1
  Y1    = list()
  for (i in 1:n.sim){
    Y.tmp = numeric(0)
    for (j in 1:cp1){
      Y.tmp = c(Y.tmp, b1 %*% X[[i]][j, ] + eps[[i]][j])
    }
    Y1[[i]] = Y.tmp
  }
  
  # Computation of Y for t = cp1, ..., cp2
  Y2    = list()
  for (i in 1:n.sim){
    Y.tmp = numeric(0)
    for (j in (cp1 + 1):cp2){
      Y.tmp = c(Y.tmp, b2 %*% X[[i]][j, ] + eps[[i]][j])
    }
    Y2[[i]] = Y.tmp
  }
  
  # Computation of Y for t = cp2, ..., n.obs
  Y3    = list()
  for (i in 1:n.sim){
    Y.tmp = numeric(0)
    for (j in (cp2 + 1):n.obs){
      Y.tmp = c(Y.tmp, b3 %*% X[[i]][j, ] + eps[[i]][j])
    }
    Y3[[i]] = Y.tmp
  }
  
  Y   = list()
  for (i in 1:n.sim){
    Y[[i]] = c(Y1[[i]], Y2[[i]], Y3[[i]])
  }
  Y
}

# Simulating mutlipliers from a bounded distribution
runif.mod = function(n){
  unif1    = runif(n, min = 0, max = 12)
  unif.a   = unif1[which(unif1 <= 1)]
  index.a  = which(unif1 <= 1)
  unif.b   = unif1[which((1 < unif1) & (unif1 <= 4))]
  index.b  = which((1 < unif1) & (unif1 <= 4))
  unif.c   = (unif1[which(unif1 > 4)] - 4)/8
  index.c  = which(unif1 > 4)
  unif2    = cbind(c(index.a, index.b, index.c), c(unif.a, unif.b, unif.c))
  unif.fin = unif2[order(unif2[, 1], decreasing = FALSE), 2]
  unif.fin
}



sim.change.points = function(interval.in, s, mb.type = c("Bound", "Exp", "Pois")){
  
    b.pts          = numeric()
    ratio.sele     = numeric()
    Beta.r         = numeric()
    interval.index = numeric()
    k           = interval.in+1
    k1          = interval.in
    k2          = interval.in+1
    beta        = list()
    beta0.tmp   = list()
    a0          = list()
    lambda      = list()
    b.pts.tmp   = 0
    
    # 1.step: Fit the model for assumed homogeneous interval I_t^(1)
    X.tmp.L        = X[[s]][((k1 * K) + 1):(k2 * K), ]
    Y.tmp.L        = Y[[s]][((k1 * K) + 1):(k2 * K)]
    n.obs.tmp.L    = length(Y.tmp.L)
    object.tmp.L   = Onestep.SCAD(X.tmp.L, Y.tmp.L, a, n.obs.tmp.L)
    beta[[k]]      = object.tmp.L$beta
    a0[[k]]        = object.tmp.L$a
    lambda[[k]]    = object.tmp.L$lambda
    beta0.tmp[[k]] = object.tmp.L$beta.0
    
    while (k < M){
      # Fit the model over the next interval I_t^(k + 1) - I_t^(k)
      X.tmp.R      = X[[s]][((k2 * K) + 1):((k2 + 1) * K), ]
      Y.tmp.R      = Y[[s]][((k2 * K) + 1):((k2 + 1) * K)]
      n.obs.tmp.R  = length(Y.tmp.R)
      object.tmp.R = Onestep.SCAD(X.tmp.R, Y.tmp.R, a, n.obs.tmp.R)
      beta.tmp.R   = object.tmp.R$beta
      a.tmp.R      = object.tmp.R$a
      lambda.tmp.R = object.tmp.R$lambda
      beta0.tmp.R  = object.tmp.R$beta.0
      
      # Fit the model over the whole interval I_t^(k + 1)
      X.tmp.T      = X[[s]][((k1 * K) + 1):((k2 + 1) * K), ]
      Y.tmp.T      = Y[[s]][((k1 * K) + 1):((k2 + 1) * K)]
      n.obs.tmp.T  = length(Y.tmp.T)
      object.tmp.T = Onestep.SCAD(X.tmp.T, Y.tmp.T, a, n.obs.tmp.T)
      beta.tmp.T   = object.tmp.T$beta
      a.tmp.T      = object.tmp.T$a
      lambda.tmp.T = object.tmp.T$lambda
      beta0.tmp.T  = object.tmp.T$beta.0
      
      lik.T        = loglik.pen(a.tmp.T, beta0.tmp.T, beta.tmp.T, lambda.tmp.T, 
                                X.tmp.T, Y.tmp.T)
      
      # Simulation of multipliers u_i (i = 1, ..., n.obs.tmp.T)
      set.seed        = 20170424 * k * s * 10
      
      if (mb.type == "Bound"){
        multipliers   = matrix(runif.mod(n.boot * n.obs.tmp.T),      
                               ncol = n.obs.tmp.T, nrow = n.boot) # Bounded distribution
      }
      if (mb.type == "Exp"){
        multipliers   = matrix(rexp(n.boot * n.obs.tmp.T, rate = 1), 
                               ncol = n.obs.tmp.T, nrow = n.boot) # Exp(1) distribution
      }
      if (mb.type == "Pois"){
        multipliers   = matrix(rpois(n.boot * n.obs.tmp.T, lambda = 1), 
                               ncol = n.obs.tmp.T, nrow = n.boot) # Pois(1) distribution
      }
      
      MB.ratios      = numeric(0)
      lik.sel        = numeric(0)
      
      
        lik.ratio    = numeric(0)
        
        # 1.step: Fit the model for assumed homogeneous interval I_t^(k,s)
        X.sel.L      = X[[s]][((k1 * K) + 1):(k2 * K ), ]
        Y.sel.L      = Y[[s]][((k1 * K) + 1):(k2 * K )]
        n.obs.sel.L  = length(Y.sel.L)
        object.sel.L = Onestep.SCAD(X.sel.L, Y.sel.L, a, n.obs.sel.L)
        beta.sel.L   = object.sel.L$beta
        a.sel.L      = object.sel.L$a
        lambda.sel.L = object.sel.L$lambda
        beta0.sel.L  = object.sel.L$beta.0
        
        # 2. a) step: Fit the model over the next interval I_t^(k + 1,s) - I_t^(k,s)
        X.sel.R      = X[[s]][((k2 * K) + 1 ):((k2 + 1) * K), ]
        Y.sel.R      = Y[[s]][((k2 * K) + 1 ):((k2 + 1) * K)]
        n.obs.sel.R  = length(Y.sel.R)
        object.sel.R = Onestep.SCAD(X.sel.R, Y.sel.R, a, n.obs.sel.R)
        beta.sel.R   = object.sel.R$beta
        a.sel.R      = object.sel.R$a
        lambda.sel.R = object.sel.R$lambda
        beta0.sel.R  = object.sel.R$beta.0
        
        # 2. b) step: Fit the model over the whole interval I_t^(k + 1,s)
        X.sel.T      = X.tmp.T
        Y.sel.T      = Y.tmp.T
        n.obs.sel.T  = n.obs.tmp.T
        
        # 3.step: Evaluate the test statistic
        # Real likelihood ratio
        lik.sel.L    = loglik.pen(a.sel.L, beta0.sel.L, beta.sel.L, lambda.sel.L, 
                                  X.sel.L, Y.sel.L)
        lik.sel.R    = loglik.pen(a.sel.R, beta0.sel.R, beta.sel.R, lambda.sel.R, 
                                  X.sel.R, Y.sel.R)
        lik.sel.T    = lik.T
        
        lik.sel      = c(lik.sel, (((n.obs.sel.L/n.obs.sel.T) * lik.sel.L)
                                   + ((n.obs.sel.R/n.obs.sel.T) * lik.sel.R) 
                                   - lik.sel.T))
        
        # 4.step: Find quantiles of LR with use of MB
        for (l in 1:(n.boot)){
          # Multiplier bootstrap for left-hand interval I_t^(k)
          multipl.L      = multipliers[l, 1:n.obs.sel.L]
          
          boot.object.L  = Onestep.SCAD.MB(X.sel.L, Y.sel.L, a, n.obs.sel.L, 
                                           as.numeric(lambda.sel.L), multipl.L)
          
          boot.beta.L    = boot.object.L$beta
          boot.a.L       = boot.object.L$a
          boot.a0.L      = boot.object.L$a.0
          boot.beta0.L   = boot.object.L$beta.0
          
          lik.boot.L     = loglik.pen.MB(boot.a.L, boot.beta0.L, boot.beta.L, 
                                         as.numeric(lambda.sel.L), X.sel.L, Y.sel.L, 
                                         multipl.L)
          
          # Multiplier bootstrap for right-hand interval I_t^(k + 1) - I_t^(k)
          multipl.R      = multipliers[l, (n.obs.sel.L + 1):n.obs.sel.T]
          
          boot.object.R  = Onestep.SCAD.MB(X.sel.R, Y.sel.R, a, n.obs.sel.R, 
                                           as.numeric(lambda.sel.R), multipl.R)
          
          boot.beta.R    = boot.object.R$beta
          boot.a.R       = boot.object.R$a
          boot.a0.R      = boot.object.R$a.0
          boot.beta0.R   = boot.object.R$beta.0
          
          lik.boot.R     = loglik.pen.MB(boot.a.R, boot.beta0.R, boot.beta.R, 
                                         as.numeric(lambda.sel.R), X.sel.R, Y.sel.R, 
                                         multipl.R)
          
          # Multiplier bootstrap for the whole interval I_t^(k + 1) (with shift)
          multipl        = multipliers[l, ]
          
          boot.object.T  = Onestep.SCAD.MB.shift(X.sel.L, Y.sel.L, X.sel.R, Y.sel.R, a, 
                                                 lambda.sel.L, lambda.sel.R, multipl.L, 
                                                 multipl.R, beta.sel.L, beta.sel.R, 
                                                 a.sel.L, a.sel.R)
          
          boot.beta.T    = boot.object.T$beta
          boot.a.T       = boot.object.T$a
          boot.a0.T      = boot.object.T$a.0
          boot.lambda.T  = boot.object.T$lambda
          boot.beta0.T   = boot.object.T$beta.0
          
          Y.boot.T       = c(Y.sel.L, (Y.sel.R - rep((a.sel.R - a.sel.L), n.obs.sel.R) 
                                       - X.sel.R %*% (beta.sel.R - beta.sel.L)))
          
          lik.boot.T     = loglik.pen.MB(boot.a.T, boot.beta0.T, boot.beta.T, 
                                         boot.lambda.T, X.sel.T, Y.boot.T, multipl)
          
          lik.ratio      = c(lik.ratio, ((n.obs.sel.L/n.obs.sel.T) * lik.boot.L 
                                         + (n.obs.sel.R/n.obs.sel.T) * lik.boot.R 
                                         - lik.boot.T))
        }
        
        MB.ratios = cbind(MB.ratios, lik.ratio)
      
      
      # Find maximum over the real and bootstrapped likelihood ratios
      t.stat        = max(lik.sel)
      MB.ratios.max = apply(MB.ratios, 1, max)
      
      q90 = quantile(MB.ratios.max, probs = 1-(0.1*k/M))
      q95 = quantile(MB.ratios.max, probs = 1-(0.05*k/M))
      
      k2  = k2 + 1
      k   = k + 1
      
      # 5.step: Test for homogeneity and evaluate current beta estimator
      if (t.stat <= q95){
        for (k3 in (k1 + 1):k){
          beta[[k3]]   = beta.tmp.T
          a0[[k3]]     = a.tmp.T
          lambda[[k3]] = lambda.tmp.T
        }
      } else {
        b.pts.tmp      = (k - 1) * K + 1
        k1             = k2 - 1
        beta[[k]]      = beta.tmp.R
        a0[[k]]        = a.tmp.R
        lambda[[k]]    = lambda.tmp.R
        index          = which(beta[[(k)]]<= 0.2*max(beta[[(k)]]))
        BETA           = beta[[(k)]]
        BETA[index]    = 0
        if(k<=2){
          ratio        = 1-(length(which(BETA[6:10]!= 0))/length(b1))}
        if(k>2 && k<=3){
          ratio        = 1-(length(which(BETA[4:10]!= 0))/length(b1))}
        else{
          ratio          = 1-(length(which(BETA[6:10]!= 0))/length(b1))}
        break
      } 
    }
    
    if(b.pts.tmp == 0){
     sim.no                    = s
     b.pts[sim.no]           = 500
     index                     = which(beta.tmp.T <= 0.4*max(beta.tmp.T))
     Beta.r[sim.no]          = beta.tmp.T
     Beta.r[sim.no][index]   = 0
     ratio.sele[sim.no]      = 1-(length(which((Beta.r[[sim.no]][6:10])!= 0))/length(b1))
     interval.index[sim.no]  = k
     value                     = list(b.pts[sim.no], ratio.sele[sim.no], Beta.r[sim.no], interval.index[sim.no])
     return(value)
    }
    else{
    Sys.time()
    sim.no                   = s
    b.pts[sim.no]          = b.pts.tmp 
    ratio.sele[sim.no]     = ratio
    Beta.r[sim.no]         = BETA
    interval.index[sim.no] = k-1
    value                    = list(b.pts[sim.no], ratio.sele[sim.no], Beta.r[sim.no], interval.index[sim.no])
     return(value)
    }
}

results = function(h){
value.interval.1 = list()
value.ratio.1    = list()
a1               = matrix(0, 50, 2)
a2               = matrix(0, 50, 2)
s1 = ifelse(h == 1, 1, sum(n.simcores[1:(h - 1)],1))
s2 = sum(n.simcores[1:h])
for(i in s1:s2){
value.interval        = list()
value.ratio           = list()
values1               = sim.change.points(0, i, mb.type)
j                     = values1[[4]]
value.interval        = j
value.ratio           = values1[[2]]
while(j < 9){
values1               = sim.change.points(j, i, mb.type)
value.interval        = c(value.interval, values1[[4]])
value.ratio           = c(value.ratio, values1[[2]])
j                     = values1[[4]]}
value.interval.1[[i]] = value.interval
if(c(2, 3)%in%value.interval == c(T,T)){a1[i, ]=c(1,1)}
else{a1[i, ]=c(0,0)}
value.ratio.1[[i]]    = value.ratio
if(c(2, 3)%in%value.interval == c(T,T)){
a2[i, ] = value.ratio[c(1,2)]}
else{a2[i, ]=c(0,0)}
}
Value.final           = list(a1[s1:s2, ], a2[s1:s2, ])
names(Value.final)    = list("interval.index", "selection.v.ratio")
return(Value.final)
}

out.sum    = list()
final.list = list() 
Sys.time()
Y   = Y.sim(100, 150)
  final.chpts       = foreach(ipar = 1:n.cores, 
                              .packages = c("MASS", "bbmle", "glmnet", "doParallel", 
                                            "LaplacesDemon", "optimx", "lars", "scales", 
                                            "tilting"), 
                              .combine = 'c') %dopar% results(ipar)   
  final.list = final.chpts
Sys.time()
stopCluster(cl)

subinterval     = final.list[[1]]
for(k in 2:n.cores){
  subinterval   = rbind(subinterval, final.list[[(2*k-1)]])
}

Ratio.select    = final.list[[2]]
for(k in 2:n.cores){
  Ratio.select  = rbind(Ratio.select, final.list[[(2*k)]])
}

corr.ratio.structural.break.1 = sum(subinterval[, 1])/n.sim
corr.ratio.structural.break.2 = sum(subinterval[, 2])/n.sim

corr.ratio.v.selection.1      = mean(Ratio.select[, 1])
corr.ratio.v.selection.2      = mean(Ratio.select[, 2])

corr.ratio.structural.break.1
corr.ratio.structural.break.2
corr.ratio.v.selection.1 
corr.ratio.v.selection.2

automatically created on 2019-11-26

quantlet / cpasimulations Goto Github PK

cpasimulations's Introduction

simulations

R Code

cpasimulations's People

Contributors

Watchers

Forkers

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent