Giter VIP home page Giter VIP logo

cpasimulations's Introduction

Visit QuantNet

Visit QuantNet simulations Visit QuantNet 2.0

Name of Quantlet: simulations

Published in: submitted to N/A 

Description: ‘Performs the Penalized Adaptive Method (PAM), a combination of propagation-separation approach and a SCAD penalty, to fit a model to a simulated data with two pre-defined change points. Computes the percentage of correctly identifing the homogeneous intervals.’

Keywords: ‘linear model, regression, SCAD penalty, bic, change points, simulations, bootstrap’

Author: Lenka Zboňáková; Xinjue Li

Submitted:  25 Nov. 2018 

Input: 
- n.obs   : Number of observations
- n.par   : Number of parameters
- n.sim   : Number of simulated scenarios
- r       : Correlation parameter of the design matrix X
- sd.eps  : Standard deviation of the error term
- cp1.seq : Sequence of change points
- n.boot  : Number of bootstrap loops
- K       : Sequence of increments between adjacent subintervals
- mb.type : Distribution of multipliers (Bound, Exp or Pois)

R Code

# Clear all variables
rm(list = ls(all = TRUE))
graphics.off()

# Set directory
setwd("C:/Users/xinjueli/PAM/simulations")

# Install and load packages
libraries = c("MASS", "bbmle", "glmnet", "doParallel", "LaplacesDemon", "optimx",
              "lars", "scales", "tilting")
lapply(libraries, function(x) if (!(x %in% installed.packages())) {
  install.packages(x)} )
lapply(libraries, library, quietly = TRUE, character.only = TRUE)

source("PAMfunc.r")

# Simulation setup
n.obs       = 500                   # No of observations
n.par       = 10                    # No  of parameters
n.sim       = 50                    # No of simulations
seed1       = 20171110              # Seed simulation X
seed2       = 20170602              # Seed simulation epsilon
r           = 0.5                   # Correlation parameter of X
sd.eps      = 1                     # Standard deviation of epsilon
cp1.seq     = matrix(c(50,150, 100,150), nrow = 2, ncol = 2, byrow = TRUE)              # Define change points
a           = 3.7                   # Recommended value of parameter a for SCAD
n.boot      = 10                    # Number of bootstrapped multipliers
K           = 50                    # Increment between adjacent sub-intervals
M           = n.obs/K               # Number of sub-intervals
mb.type     = "Exp"               # Bound, Exp or Pois
sel         = 0                     # Position of s

# Initiate cluster for parallel computing
n.cores = detectCores()           # Number of cores to be used
cl      = makeCluster(n.cores)
registerDoParallel(cl)
getDoParWorkers()

# Define number of scenarios in every core
n.simcores = rep((n.sim %/% n.cores), n.cores)
h.simcores = n.sim %% n.cores
if (h.simcores != 0){
  n.simcores[1:h.simcores] = n.simcores[1:h.simcores] + 1
}

# True beta coefficients with change in t = cp1
tmp1.1  = rep(1, 5)
tmp1.2  = rep(0, n.par - length(tmp1.1))
b1      = c(tmp1.1, tmp1.2)

tmp2.1  = rep(1, 3)
tmp2.2  = rep(0, n.par - length(tmp2.1))
b2      = c(tmp2.1, tmp2.2)

tmp3.1  = rep(1, 5)
tmp3.2  = rep(0, n.par - length(tmp1.1))
b3      = c(tmp3.1, tmp3.2)

# Simulation of the design matrix
mu    = rep(0, n.par)
Sigma = matrix(0, nrow = n.par, ncol = n.par)

for (i in 1:n.par) {
  for (j in 1:n.par) {
    if (i == j){
      Sigma[i, j] = 1
    }else {
      Sigma[i, j] = r^abs(i - j)
    }
  }
}

X = list()
for (i in 1:n.sim){
  set.seed(seed1)
  X[[i]] = mvrnorm(n = n.obs, mu, Sigma)
}  

# Simulation of the error term for t = 1, ..., n.obs
eps  = list()
set.seed(seed2)
for (i in 1:n.sim){
  eps[[i]] = rnorm(n.obs, mean = 0, sd = sd.eps)
} 

# Function for computing Y for t = 1, ..., n.obs
Y.sim = function(cp1, cp2){
  # Computation of Y for t = 1, ..., cp1
  Y1    = list()
  for (i in 1:n.sim){
    Y.tmp = numeric(0)
    for (j in 1:cp1){
      Y.tmp = c(Y.tmp, b1 %*% X[[i]][j, ] + eps[[i]][j])
    }
    Y1[[i]] = Y.tmp
  }
  
  # Computation of Y for t = cp1, ..., cp2
  Y2    = list()
  for (i in 1:n.sim){
    Y.tmp = numeric(0)
    for (j in (cp1 + 1):cp2){
      Y.tmp = c(Y.tmp, b2 %*% X[[i]][j, ] + eps[[i]][j])
    }
    Y2[[i]] = Y.tmp
  }
  
  # Computation of Y for t = cp2, ..., n.obs
  Y3    = list()
  for (i in 1:n.sim){
    Y.tmp = numeric(0)
    for (j in (cp2 + 1):n.obs){
      Y.tmp = c(Y.tmp, b3 %*% X[[i]][j, ] + eps[[i]][j])
    }
    Y3[[i]] = Y.tmp
  }
  
  Y   = list()
  for (i in 1:n.sim){
    Y[[i]] = c(Y1[[i]], Y2[[i]], Y3[[i]])
  }
  Y
}

# Simulating mutlipliers from a bounded distribution
runif.mod = function(n){
  unif1    = runif(n, min = 0, max = 12)
  unif.a   = unif1[which(unif1 <= 1)]
  index.a  = which(unif1 <= 1)
  unif.b   = unif1[which((1 < unif1) & (unif1 <= 4))]
  index.b  = which((1 < unif1) & (unif1 <= 4))
  unif.c   = (unif1[which(unif1 > 4)] - 4)/8
  index.c  = which(unif1 > 4)
  unif2    = cbind(c(index.a, index.b, index.c), c(unif.a, unif.b, unif.c))
  unif.fin = unif2[order(unif2[, 1], decreasing = FALSE), 2]
  unif.fin
}



sim.change.points = function(interval.in, s, mb.type = c("Bound", "Exp", "Pois")){
  
    b.pts          = numeric()
    ratio.sele     = numeric()
    Beta.r         = numeric()
    interval.index = numeric()
    k           = interval.in+1
    k1          = interval.in
    k2          = interval.in+1
    beta        = list()
    beta0.tmp   = list()
    a0          = list()
    lambda      = list()
    b.pts.tmp   = 0
    
    # 1.step: Fit the model for assumed homogeneous interval I_t^(1)
    X.tmp.L        = X[[s]][((k1 * K) + 1):(k2 * K), ]
    Y.tmp.L        = Y[[s]][((k1 * K) + 1):(k2 * K)]
    n.obs.tmp.L    = length(Y.tmp.L)
    object.tmp.L   = Onestep.SCAD(X.tmp.L, Y.tmp.L, a, n.obs.tmp.L)
    beta[[k]]      = object.tmp.L$beta
    a0[[k]]        = object.tmp.L$a
    lambda[[k]]    = object.tmp.L$lambda
    beta0.tmp[[k]] = object.tmp.L$beta.0
    
    while (k < M){
      # Fit the model over the next interval I_t^(k + 1) - I_t^(k)
      X.tmp.R      = X[[s]][((k2 * K) + 1):((k2 + 1) * K), ]
      Y.tmp.R      = Y[[s]][((k2 * K) + 1):((k2 + 1) * K)]
      n.obs.tmp.R  = length(Y.tmp.R)
      object.tmp.R = Onestep.SCAD(X.tmp.R, Y.tmp.R, a, n.obs.tmp.R)
      beta.tmp.R   = object.tmp.R$beta
      a.tmp.R      = object.tmp.R$a
      lambda.tmp.R = object.tmp.R$lambda
      beta0.tmp.R  = object.tmp.R$beta.0
      
      # Fit the model over the whole interval I_t^(k + 1)
      X.tmp.T      = X[[s]][((k1 * K) + 1):((k2 + 1) * K), ]
      Y.tmp.T      = Y[[s]][((k1 * K) + 1):((k2 + 1) * K)]
      n.obs.tmp.T  = length(Y.tmp.T)
      object.tmp.T = Onestep.SCAD(X.tmp.T, Y.tmp.T, a, n.obs.tmp.T)
      beta.tmp.T   = object.tmp.T$beta
      a.tmp.T      = object.tmp.T$a
      lambda.tmp.T = object.tmp.T$lambda
      beta0.tmp.T  = object.tmp.T$beta.0
      
      lik.T        = loglik.pen(a.tmp.T, beta0.tmp.T, beta.tmp.T, lambda.tmp.T, 
                                X.tmp.T, Y.tmp.T)
      
      # Simulation of multipliers u_i (i = 1, ..., n.obs.tmp.T)
      set.seed        = 20170424 * k * s * 10
      
      if (mb.type == "Bound"){
        multipliers   = matrix(runif.mod(n.boot * n.obs.tmp.T),      
                               ncol = n.obs.tmp.T, nrow = n.boot) # Bounded distribution
      }
      if (mb.type == "Exp"){
        multipliers   = matrix(rexp(n.boot * n.obs.tmp.T, rate = 1), 
                               ncol = n.obs.tmp.T, nrow = n.boot) # Exp(1) distribution
      }
      if (mb.type == "Pois"){
        multipliers   = matrix(rpois(n.boot * n.obs.tmp.T, lambda = 1), 
                               ncol = n.obs.tmp.T, nrow = n.boot) # Pois(1) distribution
      }
      
      MB.ratios      = numeric(0)
      lik.sel        = numeric(0)
      
      
        lik.ratio    = numeric(0)
        
        # 1.step: Fit the model for assumed homogeneous interval I_t^(k,s)
        X.sel.L      = X[[s]][((k1 * K) + 1):(k2 * K ), ]
        Y.sel.L      = Y[[s]][((k1 * K) + 1):(k2 * K )]
        n.obs.sel.L  = length(Y.sel.L)
        object.sel.L = Onestep.SCAD(X.sel.L, Y.sel.L, a, n.obs.sel.L)
        beta.sel.L   = object.sel.L$beta
        a.sel.L      = object.sel.L$a
        lambda.sel.L = object.sel.L$lambda
        beta0.sel.L  = object.sel.L$beta.0
        
        # 2. a) step: Fit the model over the next interval I_t^(k + 1,s) - I_t^(k,s)
        X.sel.R      = X[[s]][((k2 * K) + 1 ):((k2 + 1) * K), ]
        Y.sel.R      = Y[[s]][((k2 * K) + 1 ):((k2 + 1) * K)]
        n.obs.sel.R  = length(Y.sel.R)
        object.sel.R = Onestep.SCAD(X.sel.R, Y.sel.R, a, n.obs.sel.R)
        beta.sel.R   = object.sel.R$beta
        a.sel.R      = object.sel.R$a
        lambda.sel.R = object.sel.R$lambda
        beta0.sel.R  = object.sel.R$beta.0
        
        # 2. b) step: Fit the model over the whole interval I_t^(k + 1,s)
        X.sel.T      = X.tmp.T
        Y.sel.T      = Y.tmp.T
        n.obs.sel.T  = n.obs.tmp.T
        
        # 3.step: Evaluate the test statistic
        # Real likelihood ratio
        lik.sel.L    = loglik.pen(a.sel.L, beta0.sel.L, beta.sel.L, lambda.sel.L, 
                                  X.sel.L, Y.sel.L)
        lik.sel.R    = loglik.pen(a.sel.R, beta0.sel.R, beta.sel.R, lambda.sel.R, 
                                  X.sel.R, Y.sel.R)
        lik.sel.T    = lik.T
        
        lik.sel      = c(lik.sel, (((n.obs.sel.L/n.obs.sel.T) * lik.sel.L)
                                   + ((n.obs.sel.R/n.obs.sel.T) * lik.sel.R) 
                                   - lik.sel.T))
        
        # 4.step: Find quantiles of LR with use of MB
        for (l in 1:(n.boot)){
          # Multiplier bootstrap for left-hand interval I_t^(k)
          multipl.L      = multipliers[l, 1:n.obs.sel.L]
          
          boot.object.L  = Onestep.SCAD.MB(X.sel.L, Y.sel.L, a, n.obs.sel.L, 
                                           as.numeric(lambda.sel.L), multipl.L)
          
          boot.beta.L    = boot.object.L$beta
          boot.a.L       = boot.object.L$a
          boot.a0.L      = boot.object.L$a.0
          boot.beta0.L   = boot.object.L$beta.0
          
          lik.boot.L     = loglik.pen.MB(boot.a.L, boot.beta0.L, boot.beta.L, 
                                         as.numeric(lambda.sel.L), X.sel.L, Y.sel.L, 
                                         multipl.L)
          
          # Multiplier bootstrap for right-hand interval I_t^(k + 1) - I_t^(k)
          multipl.R      = multipliers[l, (n.obs.sel.L + 1):n.obs.sel.T]
          
          boot.object.R  = Onestep.SCAD.MB(X.sel.R, Y.sel.R, a, n.obs.sel.R, 
                                           as.numeric(lambda.sel.R), multipl.R)
          
          boot.beta.R    = boot.object.R$beta
          boot.a.R       = boot.object.R$a
          boot.a0.R      = boot.object.R$a.0
          boot.beta0.R   = boot.object.R$beta.0
          
          lik.boot.R     = loglik.pen.MB(boot.a.R, boot.beta0.R, boot.beta.R, 
                                         as.numeric(lambda.sel.R), X.sel.R, Y.sel.R, 
                                         multipl.R)
          
          # Multiplier bootstrap for the whole interval I_t^(k + 1) (with shift)
          multipl        = multipliers[l, ]
          
          boot.object.T  = Onestep.SCAD.MB.shift(X.sel.L, Y.sel.L, X.sel.R, Y.sel.R, a, 
                                                 lambda.sel.L, lambda.sel.R, multipl.L, 
                                                 multipl.R, beta.sel.L, beta.sel.R, 
                                                 a.sel.L, a.sel.R)
          
          boot.beta.T    = boot.object.T$beta
          boot.a.T       = boot.object.T$a
          boot.a0.T      = boot.object.T$a.0
          boot.lambda.T  = boot.object.T$lambda
          boot.beta0.T   = boot.object.T$beta.0
          
          Y.boot.T       = c(Y.sel.L, (Y.sel.R - rep((a.sel.R - a.sel.L), n.obs.sel.R) 
                                       - X.sel.R %*% (beta.sel.R - beta.sel.L)))
          
          lik.boot.T     = loglik.pen.MB(boot.a.T, boot.beta0.T, boot.beta.T, 
                                         boot.lambda.T, X.sel.T, Y.boot.T, multipl)
          
          lik.ratio      = c(lik.ratio, ((n.obs.sel.L/n.obs.sel.T) * lik.boot.L 
                                         + (n.obs.sel.R/n.obs.sel.T) * lik.boot.R 
                                         - lik.boot.T))
        }
        
        MB.ratios = cbind(MB.ratios, lik.ratio)
      
      
      # Find maximum over the real and bootstrapped likelihood ratios
      t.stat        = max(lik.sel)
      MB.ratios.max = apply(MB.ratios, 1, max)
      
      q90 = quantile(MB.ratios.max, probs = 1-(0.1*k/M))
      q95 = quantile(MB.ratios.max, probs = 1-(0.05*k/M))
      
      k2  = k2 + 1
      k   = k + 1
      
      # 5.step: Test for homogeneity and evaluate current beta estimator
      if (t.stat <= q95){
        for (k3 in (k1 + 1):k){
          beta[[k3]]   = beta.tmp.T
          a0[[k3]]     = a.tmp.T
          lambda[[k3]] = lambda.tmp.T
        }
      } else {
        b.pts.tmp      = (k - 1) * K + 1
        k1             = k2 - 1
        beta[[k]]      = beta.tmp.R
        a0[[k]]        = a.tmp.R
        lambda[[k]]    = lambda.tmp.R
        index          = which(beta[[(k)]]<= 0.2*max(beta[[(k)]]))
        BETA           = beta[[(k)]]
        BETA[index]    = 0
        if(k<=2){
          ratio        = 1-(length(which(BETA[6:10]!= 0))/length(b1))}
        if(k>2 && k<=3){
          ratio        = 1-(length(which(BETA[4:10]!= 0))/length(b1))}
        else{
          ratio          = 1-(length(which(BETA[6:10]!= 0))/length(b1))}
        break
      } 
    }
    
    if(b.pts.tmp == 0){
     sim.no                    = s
     b.pts[sim.no]           = 500
     index                     = which(beta.tmp.T <= 0.4*max(beta.tmp.T))
     Beta.r[sim.no]          = beta.tmp.T
     Beta.r[sim.no][index]   = 0
     ratio.sele[sim.no]      = 1-(length(which((Beta.r[[sim.no]][6:10])!= 0))/length(b1))
     interval.index[sim.no]  = k
     value                     = list(b.pts[sim.no], ratio.sele[sim.no], Beta.r[sim.no], interval.index[sim.no])
     return(value)
    }
    else{
    Sys.time()
    sim.no                   = s
    b.pts[sim.no]          = b.pts.tmp 
    ratio.sele[sim.no]     = ratio
    Beta.r[sim.no]         = BETA
    interval.index[sim.no] = k-1
    value                    = list(b.pts[sim.no], ratio.sele[sim.no], Beta.r[sim.no], interval.index[sim.no])
     return(value)
    }
}

results = function(h){
value.interval.1 = list()
value.ratio.1    = list()
a1               = matrix(0, 50, 2)
a2               = matrix(0, 50, 2)
s1 = ifelse(h == 1, 1, sum(n.simcores[1:(h - 1)],1))
s2 = sum(n.simcores[1:h])
for(i in s1:s2){
value.interval        = list()
value.ratio           = list()
values1               = sim.change.points(0, i, mb.type)
j                     = values1[[4]]
value.interval        = j
value.ratio           = values1[[2]]
while(j < 9){
values1               = sim.change.points(j, i, mb.type)
value.interval        = c(value.interval, values1[[4]])
value.ratio           = c(value.ratio, values1[[2]])
j                     = values1[[4]]}
value.interval.1[[i]] = value.interval
if(c(2, 3)%in%value.interval == c(T,T)){a1[i, ]=c(1,1)}
else{a1[i, ]=c(0,0)}
value.ratio.1[[i]]    = value.ratio
if(c(2, 3)%in%value.interval == c(T,T)){
a2[i, ] = value.ratio[c(1,2)]}
else{a2[i, ]=c(0,0)}
}
Value.final           = list(a1[s1:s2, ], a2[s1:s2, ])
names(Value.final)    = list("interval.index", "selection.v.ratio")
return(Value.final)
}

out.sum    = list()
final.list = list() 
Sys.time()
Y   = Y.sim(100, 150)
  final.chpts       = foreach(ipar = 1:n.cores, 
                              .packages = c("MASS", "bbmle", "glmnet", "doParallel", 
                                            "LaplacesDemon", "optimx", "lars", "scales", 
                                            "tilting"), 
                              .combine = 'c') %dopar% results(ipar)   
  final.list = final.chpts
Sys.time()
stopCluster(cl)

subinterval     = final.list[[1]]
for(k in 2:n.cores){
  subinterval   = rbind(subinterval, final.list[[(2*k-1)]])
}

Ratio.select    = final.list[[2]]
for(k in 2:n.cores){
  Ratio.select  = rbind(Ratio.select, final.list[[(2*k)]])
}

corr.ratio.structural.break.1 = sum(subinterval[, 1])/n.sim
corr.ratio.structural.break.2 = sum(subinterval[, 2])/n.sim

corr.ratio.v.selection.1      = mean(Ratio.select[, 1])
corr.ratio.v.selection.2      = mean(Ratio.select[, 2])

corr.ratio.structural.break.1
corr.ratio.structural.break.2
corr.ratio.v.selection.1 
corr.ratio.v.selection.2

automatically created on 2019-11-26

cpasimulations's People

Contributors

lxjue avatar quantletteam avatar

Watchers

 avatar  avatar

Forkers

michaelalthof

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.