### This script reproduces figures and results from the paper.

# to set current directory:
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

source("KOD_functions.R")

########### Figures of toy datasets ##################
######################################################

# labels
y5  <- c(rep(0, 950), rep(1, 50))   # 5 percent
y10 <- c(rep(0, 900), rep(1, 100))  # 10 percent
y20 <- c(rep(0, 800), rep(1, 200))  # 20 percent

# Inside-Outside Figures -------------------------------------------------

inoutcirc10 = cans_insideoutside(p.cont = 0.10, seed = 42)
inoutcirc20 = cans_insideoutside(p.cont = 0.20, seed = 42)


# > Figure 1 (Right)  ----------------------------------------------------

plot(inoutcirc20, col = ifelse(y20 == 0, "#649C3D", "black"),
     pch = 1, xlab = "", ylab = "", main = "", xaxt = "n",
     yaxt = "n", asp = 1, cex = 1)


# > Figure 1 (Middle) ----------------------------------------------------

# Exact SDO values

tic() # this takes a while to compute:
res1 = depth.projection(x = inoutcirc20, data = inoutcirc20,
                        method = "linearize", seed = 42)
toc()
# 129 seconds

# Transforming depth to outlyingness
# depth = 1/(1+outlyingness) => outlyingness = 1/depth - 1
outl1 = 1/res1 - 1


# Plot (Figure 1, Middle)
plot(outl1, col = ifelse(y20 == 0, "#649C3D", "black"), pch = 19,
     xlab = "Index", ylab = "SDO", main = "", tck = 0)


## To obtain the exact same plot as in the paper, we use ggplot2:
tmpdf = data.frame(outlyingness = outl1, label = y20,
                   index = 1:length(outl1))

p <- ggplot(tmpdf, aes(x = index + 1, y = outlyingness,
                       color = factor(label))) +
  geom_point(size = 1.5) +
  scale_color_manual(values = c("0" = "#649C3D", "1" = "black"),
                     guide = "none") +
  labs(y = "SDO", x = "Index") +
  theme_minimal(base_size = 14) +
  theme(
    panel.background = element_rect(fill = "white", color = NA), # panel area
    plot.background = element_rect(fill = "white", color = NA), # entire image
    panel.grid = element_blank(),           # 1 - remove all grid lines
    panel.border = element_rect(color = "black", fill = NA,
                                linewidth = 0.8),  # 2 - add black border
    strip.text = element_text(size = 15),   # 3 - increase facet title size
    axis.text.x = element_text(size = 15),  # 4 - increase x-axis value size
    axis.text.y = element_text(size = 15),  # 5 - increase y-axis value size
    axis.title.x = element_text(size = 15), # 6 - increase x-axis title size
    axis.title.y = element_text(size = 15)  # 7 - increase y-axis title size
  )
p

ggsave(
  filename = "figure1_middle.png",
  plot = p,
  width = 3500 / 200 / 3.5,   # = 17.5 inches
  height = 1000 / 200,  # = 5 inches
  dpi = 200,
  units = "in",
  pointsize = 15
)


# > Figure 1 (Right) ------------------------------------------------------

# This requires outl1 variable from above: Figure 1 (Middle) part

## WARNING: took 18 minutes to run (depth.projection)

inoutcirc_grid <- expand.grid(
  x1 = seq(-2.5, 2.5, length.out = 100),
  x2 = seq(-2.5, 2.5, length.out = 100)
)
inoutcirc_grid = as.matrix(inoutcirc_grid)

# Compute outlyingness values of the full grid (for heatmap)
tic()
res1grid = depth.projection(x = inoutcirc_grid, data = inoutcirc20, 
                            method = "linearize")
toc()
# 18 minutes

# Again convert depth to outlyingness
outl1grid = 1/res1grid - 1

# Use log outlyingness for coloring
logoutl1 = log(outl1 + 0.1)
logoutl1grid = log(outl1grid + 0.1)

min_overall = min(c(logoutl1grid, logoutl1))
max_overall = max(c(logoutl1grid, logoutl1))
middle_observation = quantile(logoutl1, 0.5)
normalized_cutoff = (middle_observation - min_overall) / 
  (max_overall - min_overall)

value_positions = c(0, normalized_cutoff, 1)

origplot_df_all = data.frame(x1 = inoutcirc20[,1], x2 = inoutcirc20[,2], 
                             outlyingness = logoutl1) 
                             # outl not needed here actually
gridplot_df_all = data.frame(x1 = inoutcirc_grid[,1], 
                             x2 = inoutcirc_grid[,2],
                             outlyingness = logoutl1grid)

gridplot_plot <- ggplot() +
  geom_raster(data = gridplot_df_all, aes(x = x1, y = x2, 
                                          fill = outlyingness)) +
  geom_point(data = origplot_df_all, aes(x = x1, y = x2),
             color = ifelse(y20 == 0, "#649C3D", "black"),
             pch = 1,
             size = 1,
             alpha = 1,
             stroke = 0.35) +
  scale_fill_gradientn(colors = c("#F0F0F0", "#F0F0F0", "firebrick2"),
                       values = value_positions,
                       limits = c(min_overall, max_overall)) +
  theme_void() +
  labs(title = "") +
  labs(fill = "Outlyingness Value", color = "True Label") +
  theme(legend.position = "none") +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        plot.title = element_text(hjust = 0.5))


gridplot_plot = gridplot_plot + coord_fixed()
# gridplot_plot  ## Note: in Rstudio viewer data points look smaller

ggsave("figure1_right.png", plot = gridplot_plot, width = 2,
       height = 2.5, dpi = 1200)


# > Figure 2 ----------------------------------------------------------------

# Scatterplot in feature space

bw <- 1/(2*median((stats::dist(inoutcirc20))^2))
kernel_function <- kernlab::rbfdot(sigma = bw)
# Obtain kernel matrix
Kmat <- center_Kmat(kernlab::kernelMatrix(kernel_function, inoutcirc20))
# Obtain feature vectors
inoutcirc20_fv <- classmap::makeFV(Kmat)$Xf
rm(Kmat)

# png("figure2.png", width = 1500, height = 1500, 
#     res = 300, pointsize = 10)
plot(inoutcirc20_fv[,c(1,3)], col = ifelse(y20 == 0, "#649C3D", "black"),
     pch = 1, xlab = "FV1", ylab = "FV3", main = "", cex = 1, asp = 1)
# dev.off()


# > Figure 3 (Right) -------------------------------------------------
# Same as Figure 1 (Right)

# png("figure3_right.png", width = 1500, height = 1500,
#     res = 300, pointsize = 10)
plot(inoutcirc20, col = ifelse(y20 == 0, "#649C3D", "black"),
     pch = 1, xlab = "", ylab = "", main = "", xaxt = "n",
     yaxt = "n", asp = 1, cex = 1)
# dev.off()


#> Figure 8 ----------------------------------------------------------

# Example KOD result
inoutcirc20_res = KOD(x = inoutcirc20, kernel = "rbf",
                                           seed = 42, labels = y20)
# Plot
plot_all_outl(inoutcirc20_res, labels = y20, ggsave_path = "figure8.png")


# > Figure 9 ---------------------------------------------------------
# Heatmap

# summary(inoutcirc10)
inoutcirc_grid <- expand.grid(
  x1 = seq(-2.5, 2.5, length.out = 100),
  x2 = seq(-2.5, 2.5, length.out = 100)
)

inoutcirc_grid = as.matrix(inoutcirc_grid)

### 20%
cont = 20

grid_res = KOD_gridcolor(x = get(paste0("inoutcirc", cont)),
                         kernel = "rbf",
                         poly_degree = NULL,
                         dir_types = c("point", "two_points", 
                                       "basis_vector", "random"),
                         num_dirs = list(point = NULL, two_points = 5000, 
                                         basis_vector = NULL, random = 1000),
                         seed = 42,
                         tol_fv = 1e-12,
                         tol_mad = 0.2,
                         labels = get(paste0("y", cont)),
                         expvar = 99,
                         center_for_point = "L1-median",
                         grid_data = inoutcirc_grid)

grid_data = inoutcirc_grid

generate_heatmap(grid_res = grid_res,
                 grid_data = grid_data,
                 data = get(paste0("inoutcirc", cont)),
                 y = get(paste0("y", cont)),
                 color_scale = c("#F0F0F0", "#F0F0F0", "firebrick2"),
                 ggsave_path = "figure9.png",
                 ggsave_details = list(width = 10, height = 2.5, 
                                       dpi = 1200),
                 ggpoint_details = list(pch = 1, size = 1, 
                                        alpha = 1, stroke = 0.35),
                 usefinal = FALSE,
                 point_colors = c("black", "#649C3D"),
                 put_cutoff = FALSE,
                 asp_fixed = TRUE,
                 title_text = NULL)     

# Salt-Pepper Ring Figures -------------------------------------------------

salty20 <- cans_saltpepper(nclean = 800, p.cont = 0.20, rad = 1,
                           noise = 0.05, border = 0.3,
                           sp.mult = 2, seed = 42)


#> Figure 3 (Left) ------------------------------------------------

# png("figure3_left.png", width = 1500, height = 1500,
#     res = 300, pointsize = 10)
plot(salty20, col = ifelse(y20 == 0, "#649C3D", "black"), pch = 1,
     xlab = "", ylab = "", main = "", xaxt = "n", yaxt = "n",
     asp = 1, cex = 1)
# dev.off()


# > Figure 4 -------------------------------------------------------

# Example KOD result
salty20_res = KOD(x = salty20, kernel = "rbf",
                                       seed = 42, labels = y20)
# Plot
plot_all_outl(salty20_res, labels = y20, ggsave_path = "figure4.png")



# > Figure 5 -------------------------------------------------------

# summary(salty20)
salty_grid <- expand.grid(
  x1 = seq(-2.25, 2.25, length.out = 100),
  x2 = seq(-2.25, 2.25, length.out = 100)
)

salty_grid = as.matrix(salty_grid)
cont = 20

grid_res = KOD_gridcolor(x = get(paste0("salty", cont)),
                         kernel = "rbf",
                         poly_degree = NULL,
                         dir_types = c("point", "two_points", 
                                       "basis_vector", "random"),
                         num_dirs = list(point = NULL, two_points = 5000, 
                                         basis_vector = NULL, random = 1000),
                         seed = 42,
                         tol_fv = 1e-12,
                         tol_mad = 0.2,
                         labels = get(paste0("y", cont)),
                         expvar = 99,
                         center_for_point = "L1-median",
                         grid_data = salty_grid)

grid_data = salty_grid

generate_heatmap(grid_res = grid_res,
                 grid_data = grid_data,
                 data = salty20,
                 y = y20,
                 color_scale = c("#F0F0F0", "#F0F0F0", "firebrick2"),
                 ggsave_path = "figure5.png",
                 plot_the_data = TRUE,
                 ggsave_details = list(width = 10, height = 2.5, 
                                       dpi = 1200),
                 ggpoint_details = list(pch = 1, size = 1, alpha = 1, 
                                        stroke = 0.35),
                 usefinal = FALSE,
                 point_colors = c("black", "#649C3D"),
                 put_cutoff = FALSE,
                 asp_fixed = TRUE,
                 title_text = NULL)    


# Circle-Cluster Figures -----------------------------------------------------------------

circlust10 <- cans_circle(900, 0.1, seed = 42)
circlust20 <- cans_circle(800, 0.2, seed = 42)


# > Figure 3 (Middle) -------------------------------------------

plot(circlust20, col = ifelse(y20 == 0, "#649C3D", "black"), pch = 1, 
     xlab = "", ylab = "", main = "", xaxt = "n", yaxt = "n", 
     asp = 1, cex = 1)

# > Figure 6 ----------------------------------------------------
# Example KOD result
circlust20_res = KOD(x = circlust20, kernel = "rbf", 
                                          seed = 42, labels = y20)
# Plot
plot_all_outl(circlust20_res, labels = y20, ggsave_path = "figure6.png")


# > Figure 7 ---------------------------------------------------------
# Heatmap

# summary(circlust20)
circlust_grid <- expand.grid(
  x1 = seq(-1.75, 1.75, length.out = 100),
  x2 = seq(-1.75, 1.75, length.out = 100)
)
circlust_grid = as.matrix(circlust_grid)

cont = 20
grid_res = KOD_gridcolor(x = get(paste0("circlust", cont)),
                         kernel = "rbf",
                         poly_degree = NULL,
                         dir_types = c("point", "two_points", 
                                       "basis_vector", "random"),
                         num_dirs = list(point = NULL, two_points = 5000, 
                                         basis_vector = NULL, random = 1000),
                         seed = 42,
                         tol_fv = 1e-12,
                         tol_mad = 0.2,
                         labels = get(paste0("y", cont)),
                         expvar = 99,
                         center_for_point = "L1-median",
                         grid_data = circlust_grid)

grid_data = circlust_grid

generate_heatmap(grid_res = grid_res,
                 grid_data = grid_data,
                 data = circlust20,
                 y = y20,
                 color_scale = c("#F0F0F0", "#F0F0F0", "firebrick2"),
                 ggsave_path = "figure7.png",
                 ggsave_details = list(width = 10, height = 2.5, 
                                       dpi = 1200),
                 ggpoint_details = list(pch = 1, size = 1, alpha = 1, 
                                        stroke = 0.35),
                 usefinal = FALSE,
                 point_colors = c("black", "#649C3D"),
                 put_cutoff = FALSE,
                 asp_fixed = TRUE,
                 title_text = NULL)     


# Moons Figures ----------------------------------------------------------

set.seed(999)
moonsx  = make_moons(n_samples = 2000, shuffle = TRUE, noise = 0.05)
data1   = moonsx$samples[moonsx$labels == 1, ]
data2   = moonsx$samples[moonsx$labels == 2, ]
moons10 = rbind(data1[1:900, ], data2[1:100, ]) # 10% contamination
rm(moonsx, data1, data2)


# > Figure A.1. -----------------------------------------------------------

plot(moons10, col = ifelse(y10 == 0, "#649C3D", "black"), pch = 1,
     xlab = "", ylab = "", main = "", xaxt = "n", yaxt = "n",
     asp = 1, cex = 1)


# > Figure A.2. -----------------------------------------------------------

moons10_res  = KOD(x = moons10, kernel = "rbf",
                                        seed = 42,  labels = y10)
# Plot
plot_all_outl(moons10_res, labels = y10, ggsave_path = "figureA2.png")


# > Figure A.3. -----------------------------------------------------------

moons_grid <- expand.grid(
  x1 = seq(-1.3, 2.25, length.out = 100),
  x2 = seq(-1.25, 1.75, length.out = 100)
)

moons_grid = as.matrix(moons_grid)
cont = 10

grid_res = KOD_gridcolor(x = get(paste0("moons", cont)),
                         kernel = "rbf",
                         poly_degree = NULL,
                         dir_types = c("point", "two_points", "basis_vector", 
                                       "random"),
                         num_dirs = list(point = NULL, two_points = 5000, 
                                         basis_vector = NULL, random = 1000),
                         seed = 42,
                         tol_fv = 1e-12,
                         tol_mad = 0.2,
                         labels = get(paste0("y", cont)),
                         expvar = 99,
                         center_for_point = "L1-median",
                         grid_data = moons_grid)

grid_data = moons_grid

generate_heatmap(grid_res = grid_res,
                 grid_data = grid_data,
                 data = moons10,
                 y = y10,
                 color_scale = c("#F0F0F0", "#F0F0F0", "firebrick2"),
                 ggsave_path = "figureA3.png",
                 ggsave_details = list(width = 10, height = 2.5, 
                                       dpi = 1200),
                 ggpoint_details = list(pch = 1, size = 1, 
                                        alpha = 1, stroke = 0.35),
                 usefinal = FALSE,
                 point_colors = c("black", "#649C3D"),
                 put_cutoff = FALSE,
                 asp_fixed = TRUE,
                 title_text = NULL)     


#############################################################################

####### SIMULATIONS: KOD's PERFORMANCE ON TOY DATASETS ######################
#############################################################################

# labels
y5  <- c(rep(0, 950), rep(1, 50))   # 5 percent
y10 <- c(rep(0, 900), rep(1, 100))  # 10 percent
y20 <- c(rep(0, 800), rep(1, 200))  # 20 percent

# > Inside-Outside performance -----------------------------

inoutcirc_KOD_results <- vector("list", length = 10)

tic()
for (seed in c(42:51)) { # takes 75 sec
  inoutcirc5  = cans_insideoutside(p.cont = 0.05, seed = seed)
  inoutcirc10 = cans_insideoutside(p.cont = 0.10, seed = seed)
  inoutcirc20 = cans_insideoutside(p.cont = 0.20, seed = seed)
  ## results
  inoutcirc5_res  = KOD(x = inoutcirc5,  kernel = "rbf",
                        seed = 42, labels = y5)
  inoutcirc10_res = KOD(x = inoutcirc10, kernel = "rbf",
                        seed = 42, labels = y10)
  inoutcirc20_res = KOD(x = inoutcirc20, kernel = "rbf",
                        seed = 42, labels = y20)
  tmp = data.frame(data = "inoutcirc",
                   cont = c(5, 10, 20),
                   seed = seed,
                   mcc = c(inoutcirc5_res$mcc, inoutcirc10_res$mcc,
                           inoutcirc20_res$mcc),
                   patn = c(inoutcirc5_res$patn, inoutcirc10_res$patn,
                            inoutcirc20_res$patn))
  inoutcirc_KOD_results[[seed - 41]] = tmp
}
toc() # 75 sec

rm(inoutcirc5, inoutcirc10, inoutcirc20, inoutcirc5_res,
   inoutcirc10_res, inoutcirc20_res)

inoutcirc_KOD_df = do.call(rbind, inoutcirc_KOD_results)

inoutcirc_KOD_df %>%
  group_by(cont) %>%
  summarise(mcc = round(mean(mcc), 2), patn = round(mean(patn), 2))
# cont   mcc  patn
# <dbl> <dbl> <dbl>
# 1     5  0.83     1
# 2    10  0.94     1
# 3    20  1        1


# > Salt-Pepper Ring performance -----------------------------------

salty_KOD_results = vector("list", length = 10)

tic()
for (seed in c(42:51)) {
  salty5  <- cans_saltpepper(nclean = 950, p.cont = 0.05, seed = seed)
  salty10 <- cans_saltpepper(nclean = 900, p.cont = 0.1,  seed = seed)
  salty20 <- cans_saltpepper(nclean = 800, p.cont = 0.2,  seed = seed)
  ###
  salty5_res  = KOD(x = salty5,  kernel = "rbf",
                    seed = 42, labels = y5)
  salty10_res = KOD(x = salty10, kernel = "rbf",
                    seed = 42, labels = y10)
  salty20_res = KOD(x = salty20, kernel = "rbf",
                    seed = 42, labels = y20)
  tmp = data.frame(data = "salty",
                   cont = c(5, 10, 20),
                   seed = seed,
                   mcc = c(salty5_res$mcc, salty10_res$mcc,
                           salty20_res$mcc),
                   patn = c(salty5_res$patn, salty10_res$patn,
                            salty20_res$patn))
  salty_KOD_results[[seed - 41]] = tmp
}
toc()

rm(salty5, salty10, salty20, salty5_res, salty10_res, salty20_res)

salty_KOD_rdf = do.call(rbind, salty_KOD_results)

salty_KOD_rdf %>%
  group_by(cont) %>%
  summarise(mcc = round(mean(mcc), 2), patn = round(mean(patn), 2))
# cont   mcc  patn
# <dbl> <dbl> <dbl>
# 1     5  0.85  1
# 2    10  0.94  1
# 3    20  0.93  0.94


# > Circle-Cluster performance --------------------------------

circlust_KOD_results <- vector("list", length = 10)

tic()
for (seed in c(42:51)) {
  circlust5  <- cans_circle(950, 0.05, seed = seed)
  circlust10 <- cans_circle(900, 0.1,  seed = seed)
  circlust20 <- cans_circle(800, 0.2,  seed = seed)
  ###
  circlust5_res  <- KOD(x = circlust5, kernel = "rbf",
                        seed = 42, labels = y5)
  circlust10_res <- KOD(x = circlust10, kernel = "rbf",
                        seed = 42, labels = y10)
  circlust20_res <- KOD(x = circlust20, kernel = "rbf",
                        seed = 42, labels = y20)
  tmp <- data.frame(data = "circlust",
                    cont = c(5, 10, 20),
                    seed = seed,
                    mcc = c(circlust5_res$mcc, circlust10_res$mcc,
                            circlust20_res$mcc),
                    patn = c(circlust5_res$patn, circlust10_res$patn, 
                             circlust20_res$patn))
  circlust_KOD_results[[seed - 41]] <- tmp
}
toc()

rm(circlust5, circlust10, circlust20, circlust5_res,
   circlust10_res, circlust20_res)

circlust_KOD_rdf = do.call(rbind, circlust_KOD_results)

circlust_KOD_rdf %>%
  group_by(cont) %>%
  summarise(mcc = round(mean(mcc), 2), patn = round(mean(patn), 2))
#    cont   mcc  patn
#   <dbl> <dbl> <dbl>
# 1     5  0.92     1
# 2    10  0.98     1
# 3    20  1        1


# > Moons Performance ---------------------------------------

moons_KOD_results <- vector("list", length = 10)

tic()
for (seed in c(999:1008)) {
  set.seed(seed)
  moonsx  = make_moons(n_samples = 2000, shuffle = TRUE, noise = 0.05)
  data1   = moonsx$samples[moonsx$labels == 1, ]
  data2   = moonsx$samples[moonsx$labels == 2, ]
  moons5  = rbind(data1[1:950, ], data2[1:50, ])  # 5% contamination
  moons10 = rbind(data1[1:900, ], data2[1:100, ]) # 10% contamination
  moons20 = rbind(data1[1:800, ], data2[1:200, ]) # 20% contamination
  rm(moonsx, data1, data2)
  moons5_res   = KOD(x = moons5,  kernel = "rbf",
                     seed = 42,  labels = y5)
  moons10_res  = KOD(x = moons10, kernel = "rbf",
                     seed = 42,  labels = y10)
  moons20_res  = KOD(x = moons20, kernel = "rbf",
                     seed = 42,  labels = y20)
  tmp = data.frame(data = "moons",
                   cont = c(5, 10, 20),
                   seed = seed,
                   mcc  = c(moons5_res$mcc, moons10_res$mcc,
                            moons20_res$mcc),
                   patn = c(moons5_res$patn, moons10_res$patn,
                            moons20_res$patn))
  moons_KOD_results[[seed - 998]] = tmp
}
toc() # 71 sec

rm(moons5, moons10, moons20, moons5_res, moons10_res, moons20_res)

moons_KOD_df <- do.call(rbind, moons_KOD_results)

moons_KOD_df %>%
  group_by(cont) %>%
  summarise(mcc = round(mean(mcc), 2), patn = round(mean(patn), 2))
# cont   mcc  patn
# <dbl> <dbl> <dbl>
# 1     5  0.94  1
# 2    10  0.99  1
# 3    20  0.91  0.99


#####################################################################
## SIMULATIONS: COMPETING METHODS' PERFORMANCE ON TOY DATASETS ######
#####################################################################

# labels
y5  <- c(rep(0, 950), rep(1, 50))   # 5 percent
y10 <- c(rep(0, 900), rep(1, 100))  # 10 percent
y20 <- c(rep(0, 800), rep(1, 200))  # 20 percent


# Competing Methods Hyperparameters ---------------------------------
## knn
ksqrt = 32
klog  = 7
## local outlier factor
lof_minPts_a = 21 # 20 + 1
lof_minPts_b = seq(11, 51, by = 10)
## isolation forest
iso_default_params <- list(
  scoring_metric = "depth",
  ndim = 1,
  ntrees = 100,
  sample_size = 256,
  max_depth = 8,
  prob_pick_pooled_gain = 0,
  prob_pick_avg_gain = 0,
  prob_pick_col_by_var = 0,
  penalize_range = FALSE,
  coefs = "uniform"
)
iso_density_params <- list(
  scoring_metric = "density",
  ndim = 1,
  ntrees = 200,
  sample_size = 256,
  max_depth = 8,
  prob_pick_pooled_gain = 0,
  prob_pick_avg_gain = 0,
  prob_pick_col_by_var = 0,
  penalize_range = FALSE,
  coefs = "uniform"
)
## one-class svm
ocsvm_kernel = "radial"
ocsvm_nu = 0.5
## kernel random projection depth
krpd_kernel = "rbf"
krpd_num_dirs = 8000


# Inside-Outside performance -----------------------------

inoutcirc_competing_results <- vector("list", length = 30)
list_counter = 1

tic()
for (seed in c(42:51)) { # takes 90 sec 
  for (p.cont in c(5, 10, 20)) { 
    # data generation
    x = cans_insideoutside(p.cont = p.cont/100, seed = seed)
    y = get(paste0("y", p.cont))  # true labels
    ### results
    dm <- dist(x, method = "manhattan") 
    # Computes distance once for knn and lof
    ## knn
    knn_res_sqrt <- dbscan::kNNdist(dm, k = 32)
    knn_patn_sqrt <- prec_n(true_labels = y, outl = knn_res_sqrt, 
                            top_n = sum(y))
    #
    knn_res_log <- dbscan::kNNdist(dm, k = 7)
    knn_patn_log <- prec_n(true_labels = y, outl = knn_res_log, 
                           top_n = sum(y))

    ## local outlier factor
    lof_res_a <- dbscan::lof(dm, minPts = 1 + 20)
    lof_patn_a <- prec_n(true_labels = y, outl = lof_res_a, 
                         top_n = sum(y))
    #
    lof_res_b <- lof_pmax(dm, minPts = 1 + seq(10, 50, by = 10))
    lof_patn_b <- prec_n(true_labels = y, outl = lof_res_b, 
                         top_n = sum(y))

    ## isolation forest
    iso_default_params$seed <- seed
    iso_model_default <- do.call(isotree::isolation.forest, 
                                 c(list(data = x), iso_default_params))
    iso_res_default <- predict(iso_model_default, x)
    iso_patn_default <- prec_n(true_labels = y, outl = iso_res_default, 
                               top_n = sum(y))
    #
    iso_density_params$seed <- seed
    iso_model_density <- do.call(isotree::isolation.forest, 
                                 c(list(data = x), iso_density_params))
    iso_res_density <- predict(iso_model_density, x)
    iso_patn_density <- prec_n(true_labels = y, outl = iso_res_density, 
                               top_n = sum(y))

    ## one-class svm
    bw <- 1/(2*median(as.numeric(stats::dist(x))^2))
    # model <- suppressWarnings(svm(x = x, type = "one-classification", 
    # kernel = ocsvm_kernel, gamma = bw, nu = ocsvm_nu, scale = FALSE))
    ocsvm_model <- svm(x = x, type = "one-classification", 
                       kernel = ocsvm_kernel,
                       gamma = bw, nu = ocsvm_nu, scale = FALSE)
    ocsvm_patn = prec_n(true_labels = y, outl = -ocsvm_model$decision.values, 
                        top_n = sum(y))
    ocsvm_mcc <- mltools::mcc(preds = !ocsvm_model$fitted, actuals=(y == 1))

    ## kernel random projection depth
    krpd_res <- KOD(
      x = x,
      kernel = krpd_kernel,
      poly_degree = NULL,
      dir_types = "random",
      num_dirs = list(two_points = 0, random = krpd_num_dirs),
      seed = 42,  # Using fixed seed for KRPD algorithm itself
      expvar = 99,
      tol_fv = 1e-12,
      tol_mad = 0,
      labels = y)

    ## create results data frame
    tmp = data.frame(
      data = "insideoutside",
      cont = p.cont,
      seed = seed,
      method = c("knn_sqrt", "knn_log", "lof_a", "lof_b", "iso_default",
                 "iso_density", "ocsvm", "krpd"),
      patn = c(knn_patn_sqrt, knn_patn_log, lof_patn_a, lof_patn_b,
               iso_patn_default, iso_patn_density, ocsvm_patn, 
               krpd_res$patn),
      mcc  = c(NA, NA, NA, NA, NA, NA, ocsvm_mcc, krpd_res$mcc)
    )
    inoutcirc_competing_results[[list_counter]] = tmp
    list_counter <- list_counter + 1
  }
}
toc()


inoutcirc_competing_df = do.call(rbind, inoutcirc_competing_results)

inoutcirc_competing_df %>%
  group_by(method, cont) %>%
  summarise(mcc = round(mean(mcc), 2), patn = round(mean(patn), 2)) %>%
  print(n = Inf)
# method       cont   mcc  patn
# <chr>       <dbl> <dbl> <dbl>
# 1 iso_default     5 NA     0.62
# 2 iso_default    10 NA     0.58
# 3 iso_default    20 NA     0.5
# 4 iso_density     5 NA     1
# 5 iso_density    10 NA     0.92
# 6 iso_density    20 NA     0.58
# 7 knn_log         5 NA     0.8
# 8 knn_log        10 NA     0.77
# 9 knn_log        20 NA     0.69
# 10 knn_sqrt        5 NA     1
# 11 knn_sqrt       10 NA     0.92
# 12 knn_sqrt       20 NA     0.69
# 13 krpd            5  0.58  0.47
# 14 krpd           10  0.66  0.5
# 15 krpd           20  0.66  0.54
# 16 lof_a           5 NA     0.5
# 17 lof_a          10 NA     0.66
# 18 lof_a          20 NA     0.78
# 19 lof_b           5 NA     0.99
# 20 lof_b          10 NA     0.62
# 21 lof_b          20 NA     0.77
# 22 ocsvm           5  0     0.5
# 23 ocsvm          10  0     0.5
# 24 ocsvm          20  0     0.5


# Salt-Pepper Ring performance -----------------------------

salty_competing_results <- vector("list", length = 30)
list_counter = 1

salty_competing_df = do.call(rbind, salty_competing_results)

salty_competing_df %>%
  group_by(method, cont) %>%
  summarise(mcc = round(mean(mcc), 2), patn = round(mean(patn), 2)) %>%
  print(n = Inf)
# method       cont   mcc  patn
# <chr>       <dbl> <dbl> <dbl>
# 1 iso_default     5 NA     0.9
# 2 iso_default    10 NA     0.94
# 3 iso_default    20 NA     0.96
# 4 iso_density     5 NA     0.98
# 5 iso_density    10 NA     0.98
# 6 iso_density    20 NA     0.99
# 7 knn_log         5 NA     1
# 8 knn_log        10 NA     1
# 9 knn_log        20 NA     1
# 10 knn_sqrt        5 NA     1
# 11 knn_sqrt       10 NA     1
# 12 knn_sqrt       20 NA     1
# 13 krpd            5  0.81  0.78
# 14 krpd           10  0.78  0.77
# 15 krpd           20  0.74  0.81
# 16 lof_a           5 NA     1
# 17 lof_a          10 NA     0.97
# 18 lof_a          20 NA     0.75
# 19 lof_b           5 NA     1
# 20 lof_b          10 NA     1
# 21 lof_b          20 NA     0.98
# 22 ocsvm           5  0.17  0.87
# 23 ocsvm          10  0.24  0.86
# 24 ocsvm          20  0.38  0.88


# > Circle-Cluster performance --------------------------------

circlust_competing_results <- vector("list", length = 30)
list_counter = 1

tic()
for (seed in c(42:51)) { # takes 100 sec
  for (p.cont in c(5, 10, 20)) {
    # data generation
    x = cans_circle(nclean = 1000 - p.cont*10, p.cont = p.cont/100,  
                    seed = seed)
    y = get(paste0("y", p.cont))  # true labels
    ### results
    dm <- dist(x, method = "manhattan") 
    # Computes distance once for knn and lof
    ## knn
    knn_res_sqrt <- dbscan::kNNdist(dm, k = 32)
    knn_patn_sqrt <- prec_n(true_labels = y, outl = knn_res_sqrt, 
                            top_n = sum(y))
    #
    knn_res_log <- dbscan::kNNdist(dm, k = 7)
    knn_patn_log <- prec_n(true_labels = y, outl = knn_res_log, 
                           top_n = sum(y))

    ## local outlier factor
    lof_res_a <- dbscan::lof(dm, minPts = 1 + 20)
    lof_patn_a <- prec_n(true_labels = y, outl = lof_res_a, 
                         top_n = sum(y))
    #
    lof_res_b <- lof_pmax(dm, minPts = 1 + seq(10, 50, by = 10))
    lof_patn_b <- prec_n(true_labels = y, outl = lof_res_b, 
                         top_n = sum(y))

    ## isolation forest
    iso_default_params$seed <- seed
    iso_model_default <- do.call(isotree::isolation.forest, 
                                 c(list(data = x), iso_default_params))
    iso_res_default <- predict(iso_model_default, x)
    iso_patn_default <- prec_n(true_labels = y, outl = iso_res_default, 
                               top_n = sum(y))
    #
    iso_density_params$seed <- seed
    iso_model_density <- do.call(isotree::isolation.forest, 
                                 c(list(data = x), iso_density_params))
    iso_res_density <- predict(iso_model_density, x)
    iso_patn_density <- prec_n(true_labels = y, outl = iso_res_density, 
                               top_n = sum(y))

    ## one-class svm
    bw <- 1/(2*median(as.numeric(stats::dist(x))^2))
    # model <- suppressWarnings(svm(x = x, type = "one-classification",
    #  kernel = ocsvm_kernel, gamma = bw, nu = ocsvm_nu, scale = FALSE))
    ocsvm_model <- svm(x = x, type = "one-classification", 
                       kernel = ocsvm_kernel,
                       gamma = bw, nu = ocsvm_nu, scale = FALSE)
    ocsvm_patn = prec_n(true_labels = y, outl = -ocsvm_model$decision.values, 
                        top_n = sum(y))
    ocsvm_mcc <- mltools::mcc(preds = !ocsvm_model$fitted, actuals=(y == 1))

    ## kernel random projection depth
    krpd_res <- KOD(
      x = x,
      kernel = krpd_kernel,
      poly_degree = NULL,
      dir_types = "random",
      num_dirs = list(two_points = 0, random = krpd_num_dirs),
      seed = 42,  # Using fixed seed for KRPD algorithm itself
      expvar = 99,
      tol_fv = 1e-12,
      tol_mad = 0,
      labels = y)

    ## create results data frame
    tmp = data.frame(
      data = "circle_cluster",
      cont = p.cont,
      seed = seed,
      method = c("knn_sqrt", "knn_log", "lof_a", "lof_b", "iso_default",
                 "iso_density", "ocsvm", "krpd"),
      patn = c(knn_patn_sqrt, knn_patn_log, lof_patn_a, lof_patn_b,
               iso_patn_default, iso_patn_density, ocsvm_patn, 
               krpd_res$patn),
      mcc  = c(NA, NA, NA, NA, NA, NA, ocsvm_mcc, krpd_res$mcc)
    )

    circlust_competing_results[[list_counter]] = tmp
    list_counter <- list_counter + 1
  }

}
toc()


circlust_competing_df = do.call(rbind, circlust_competing_results)

circlust_competing_df %>%
  group_by(method, cont) %>%
  summarise(mcc = round(mean(mcc), 2), patn = round(mean(patn), 2)) %>%
  print(n = Inf)
# method       cont   mcc  patn
# <chr>       <dbl> <dbl> <dbl>
# 1 iso_default      5 NA     0.12
# 2 iso_default     10 NA     0.03
# 3 iso_default     20 NA     0
# 4 iso_density      5 NA     0.98
# 5 iso_density     10 NA     0.6
# 6 iso_density     20 NA     0.14
# 7 knn_log          5 NA     0.5
# 8 knn_log         10 NA     0.39
# 9 knn_log         20 NA     0.23
# 10 knn_sqrt        5 NA     0.83
# 11 knn_sqrt       10 NA     0.41
# 12 knn_sqrt       20 NA     0.2
# 13 krpd            5 -0.03  0
# 14 krpd           10 -0.03  0
# 15 krpd           20 -0.03  0
# 16 lof_a           5 NA     0.3
# 17 lof_a          10 NA     0.52
# 18 lof_a          20 NA     0.6
# 19 lof_b           5 NA     0.27
# 20 lof_b          10 NA     0.46
# 21 lof_b          20 NA     0.66
# 22 ocsvm           5 -0.23  0
# 23 ocsvm          10 -0.33  0
# 24 ocsvm          20 -0.5   0


# Moons performance --------------------------------

moons_competing_results <- vector("list", length = 30)
list_counter = 1

tic()
for (seed in c(999:1008)) { # takes 100 sec
  for (p.cont in c(5, 10, 20)) {
    # data generation
    set.seed(seed)
    moonsx  = make_moons(n_samples = 2000, shuffle = TRUE, noise = 0.05)
    data1   = moonsx$samples[moonsx$labels == 1, ]
    data2   = moonsx$samples[moonsx$labels == 2, ]

    x = rbind(data1[1:(1000 - p.cont*10), ], data2[1:(p.cont*10), ])
    y = get(paste0("y", p.cont))  # true labels

    ### results
    dm <- dist(x, method = "manhattan") 
    # Computes distance once for knn and lof
    ## knn
    knn_res_sqrt <- dbscan::kNNdist(dm, k = 32)
    knn_patn_sqrt <- prec_n(true_labels = y, outl = knn_res_sqrt, 
                            top_n = sum(y))
    #
    knn_res_log <- dbscan::kNNdist(dm, k = 7)
    knn_patn_log <- prec_n(true_labels = y, outl = knn_res_log, 
                           top_n = sum(y))

    ## local outlier factor
    lof_res_a <- dbscan::lof(dm, minPts = 1 + 20)
    lof_patn_a <- prec_n(true_labels = y, outl = lof_res_a, 
                         top_n = sum(y))
    #
    lof_res_b <- lof_pmax(dm, minPts = 1 + seq(10, 50, by = 10))
    lof_patn_b <- prec_n(true_labels = y, outl = lof_res_b, 
                         top_n = sum(y))

    ## isolation forest
    iso_default_params$seed <- seed
    iso_model_default <- do.call(isotree::isolation.forest, 
                                 c(list(data = x), iso_default_params))
    iso_res_default <- predict(iso_model_default, x)
    iso_patn_default <- prec_n(true_labels = y, outl = iso_res_default, 
                               top_n = sum(y))
    #
    iso_density_params$seed <- seed
    iso_model_density <- do.call(isotree::isolation.forest, 
                                 c(list(data = x), iso_density_params))
    iso_res_density <- predict(iso_model_density, x)
    iso_patn_density <- prec_n(true_labels = y, outl = iso_res_density, 
                               top_n = sum(y))

    ## one-class svm
    bw <- 1/(2*median(as.numeric(stats::dist(x))^2))
    # model <- suppressWarnings(svm(x = x, type = "one-classification", 
    #  kernel = ocsvm_kernel, gamma = bw, nu = ocsvm_nu, scale = FALSE))
    ocsvm_model <- svm(x = x, type = "one-classification", 
                       kernel = ocsvm_kernel,
                       gamma = bw, nu = ocsvm_nu, scale = FALSE)
    ocsvm_patn = prec_n(true_labels = y, outl = -ocsvm_model$decision.values, 
                        top_n = sum(y))
    ocsvm_mcc <- mltools::mcc(preds = !ocsvm_model$fitted, actuals=(y == 1))

    ## kernel random projection depth
    krpd_res <- KOD(
      x = x,
      kernel = krpd_kernel,
      poly_degree = NULL,
      dir_types = "random",
      num_dirs = list(two_points = 0, random = krpd_num_dirs),
      seed = 42,  # Using fixed seed for KRPD algorithm itself
      expvar = 99,
      tol_fv = 1e-12,
      tol_mad = 0,
      labels = y)

    ## create results data frame
    tmp = data.frame(
      data = "moons",
      cont = p.cont,
      seed = seed,
      method = c("knn_sqrt", "knn_log", "lof_a", "lof_b", "iso_default",
                 "iso_density", "ocsvm", "krpd"),
      patn = c(knn_patn_sqrt, knn_patn_log, lof_patn_a, lof_patn_b,
               iso_patn_default, iso_patn_density, ocsvm_patn, 
               krpd_res$patn),
      mcc  = c(NA, NA, NA, NA, NA, NA, ocsvm_mcc, krpd_res$mcc)
    )

    moons_competing_results[[list_counter]] = tmp
    list_counter <- list_counter + 1
  }

}
toc()

moons_competing_df = do.call(rbind, moons_competing_results)

moons_competing_df %>%
  group_by(method, cont) %>%
  summarise(mcc = round(mean(mcc), 2), patn = round(mean(patn), 2)) %>%
  print(n = Inf)
# method       cont   mcc  patn
# <chr>       <dbl> <dbl> <dbl>
# 1  iso_default     5 NA     0.9
# 2  iso_default    10 NA     0.89
# 3  iso_default    20 NA     0.85
# 4  iso_density     5 NA     1
# 5  iso_density    10 NA     1
# 6  iso_density    20 NA     0.97
# 7  knn_log         5 NA     0.97
# 8  knn_log        10 NA     0.89
# 9  knn_log        20 NA     0.76
# 10 knn_sqrt        5 NA     1
# 11 knn_sqrt       10 NA     1
# 12 knn_sqrt       20 NA     0.98
# 13 krpd            5  0.29  0.6
# 14 krpd           10  0.17  0.7
# 15 krpd           20  0.04  0.85
# 16 lof_a           5 NA     0.56
# 17 lof_a          10 NA     0.1
# 18 lof_a          20 NA     0.11
# 19 lof_b           5 NA     0.99
# 20 lof_b          10 NA     0.73
# 21 lof_b          20 NA     0.21
# 22 ocsvm           5  0.15  0.72
# 23 ocsvm          10  0.2   0.69
# 24 ocsvm          20  0.25  0.67

######################################################
