# ========================================
# Breast Cancer Data Simulation Script - 
# Stepwise Gaussian Graphical Model
# 
# Works in conjunction with:
# StepGraph R Package
# 
# ========================================

# Clear memory
rm(list=ls())

# Required Libraries
library(Rlab)
library(MASS)
library(huge)
library(glmnet)
library(clime)
library(CVglasso)
library(psych)
library(StepGraph)


# =======================
# Simulation Experiment 
# =======================

# ----------------------
# Training Set Function
# ----------------------

training_set = function(data){
  
  data_X = data[-1,-1]
  
  G = dim(data_X)[1]
  
  index = data[1,-1]
  
  index_RD = which(index==0)
  
  index_pCR = which(index==1)
  
  training_index_RD = sample(index_RD, 83)
  
  training_index_pCR = sample(index_pCR,29)
  
  training_index = c(training_index_RD,training_index_pCR)
  
  training_set = data_X[,training_index]
  
  testing_set = data_X[,-training_index]
  
  group_true = index[-training_index]
  
  n_RD = length(training_index_RD)
  
  n_pCR = length(training_index_pCR)
  
  p_values = (unlist(lapply(1:G, function(g) 
    t.test(unlist(data_X[g,training_index_RD]),
           unlist(data_X[g,training_index_pCR]))$p.value)))
  
  G = order(p_values)[1:113]
  
  training_set_RD = t(data_X[G,training_index_RD])
  
  training_set_pCR = t(data_X[G,training_index_pCR])
  
  training_set = t(training_set[G,])
  
  testing_set = t(testing_set[G,])
  
  return(list(training_set,
              testing_set,training_set_RD,training_set_pCR,
              group_true,n_RD,n_pCR))
}

# -----------------------
# Generate Training Sets
# -----------------------

set.seed(1)
R = 100

breast_cancer = read.table("breast_cancer_data.csv", sep = ";", header = TRUE, dec = ".")

data_set = list()
for (r in 1:R){
  data_set = training_set(breast_cancer)
  save(data_set, file = paste("data_set_", r,".RData",sep = ""))
}


# ---------------------------
# Generate Simulation Output
# ---------------------------

# Estimate precision matrix
R = 100

# Generate and save output for CLIME
list_clime = list()
for (r in 1:R){
  load(file = paste("data_set_", r,".RData",sep = ""))
  rho = cv.clime(clime(data_set[[1]]),loss="likelihood",fold=5)$lambdaopt
  list_clime = clime(data_set[[1]],rho)$Omega[[1]]
  save(list_clime, file = paste("clime_opt_", r,".RData",sep = ""))
}

# Generate and save output for CVglasso
list_glasso = list()
for (r in 1:R){
  load(file = paste("data_set_", r,".RData",sep = ""))
  list_glasso = CVglasso(scale(data_set[[1]]),K=5)$Omega 
  save(list_glasso, file = paste("glasso_opt_", r,".RData",sep = ""))
}

# Generate and save output for StepGraph
list = list()
for (r in 1:R){
  load(file = paste("data_set_", r,".RData",sep = ""))
  alpha = cv.StepGraph(data_set[[1]],fold=5,alpha_f_min=0.10,alpha_f_max=0.75,n_alpha=10,nei.max=20)
  list = StepGraph(scale(data_set[[1]]),alpha_f=alpha[[1]][[1]],alpha_b=alpha[[2]][[1]],nei.max=20)[[6]]
  save(list, file = paste("list_opt_", r,".RData",sep = ""))
}





