library(sn) library(perm) # I. Shamsudheen, C. Hennig: Should we test the model assumptions # before running a model-based test? # This reproduces the results of Example 1, Table 1. # Example 2 has been computed by straightforward arithmetic from these results. # Note that here power is computed whereas Table 1 gives type 2 error # probabilities, i.e., 1-power. # Note also that some more experimental tests were computed, which # means that results with the same random seed may not reproduce precisely, # but differences with 100000 replicates should be very small. combinetwo <- function(x1,x2,alphams=0.05,alphafinal=0.05,mstest="SW"){ shiftedx <- c(x1-median(x1),x2-median(x2)) if(mstest=="SW") msr <- shapiro.test(shiftedx)$p.value sum(tresults<0.05)/simruns [1] 0.04981 > sum(wresults<0.05)/simruns [1] 0.04754 > sum(cresults<0.05)/simruns [1] 0.0512 > sum(cmsresults)/simruns [1] 0.05658 > sum(presults<0.05)/simruns [1] 0.04869 # plain normal, power set.seed(6543287) simruns <- 100000 n1 <- 20 n2 <- 30 cent1 <- 1 cent2 <- 2 scale <- 1 presults <- tresults <- wresults <- cresults <- cmsresults <- numeric(0) for(i in 1:simruns){ if(i %% 2500==0) print(i) x1 <- rnorm(n1,cent1,scale) x2 <- rnorm(n2,cent2,scale) tresults[i] <- t.test(x1,x2)$p.value wresults[i] <- wilcox.test(x1,x2)$p.value ct <- combinetwo(x1,x2) cresults[i] <- ct$finalp cmsresults[i] <- ct$msr presults[i] <- permTS(as.vector(x1),as.vector(x2))$p.value } > sum(tresults<0.05)/simruns [1] 0.92153 > sum(wresults<0.05)/simruns [1] 0.90758 > sum(cresults<0.05)/simruns [1] 0.92181 > sum(cmsresults)/simruns [1] 0.05658 > sum(presults<0.05)/simruns [1] 0.92225 # t3, level set.seed(6543287) simruns <- 100000 n1 <- 20 n2 <- 30 cent1 <- 1 cent2 <- 1 scale <- 1 presults <- tresults <- wresults <- cresults <- cmsresults <- numeric(0) for(i in 1:simruns){ if(i %% 2500==0) print(i) x1 <- cent1+scale*rt(n1,3) x2 <- cent2+scale*rt(n2,3) tresults[i] <- t.test(x1,x2)$p.value wresults[i] <- wilcox.test(x1,x2)$p.value ct <- combinetwo(x1,x2) cresults[i] <- ct$finalp cmsresults[i] <- ct$msr presults[i] <- permTS(as.vector(x1),as.vector(x2))$p.value } > sum(tresults<0.05)/simruns [1] 0.04528 > sum(wresults<0.05)/simruns [1] 0.04828 > sum(cresults<0.05)/simruns [1] 0.05151 > sum(cmsresults)/simruns [1] 0.65184 > sum(presults<0.05)/simruns [1] 0.04502 # t3. power set.seed(6543287) simruns <- 100000 n1 <- 20 n2 <- 30 cent1 <- 1 cent2 <- 2 scale <- 1 presults <- tresults <- wresults <- cresults <- cmsresults <- numeric(0) for(i in 1:simruns){ if(i %% 2500==0) print(i) x1 <- cent1+scale*rt(n1,3) x2 <- cent2+scale*rt(n2,3) tresults[i] <- t.test(x1,x2)$p.value wresults[i] <- wilcox.test(x1,x2)$p.value ct <- combinetwo(x1,x2) cresults[i] <- ct$finalp cmsresults[i] <- ct$msr presults[i] <- permTS(as.vector(x1),as.vector(x2))$p.value } > sum(tresults<0.05)/simruns [1] 0.58731 > sum(wresults<0.05)/simruns [1] 0.74148 > sum(cresults<0.05)/simruns [1] 0.72996 > sum(cmsresults)/simruns [1] 0.65184 > sum(presults<0.05)/simruns [1] 0.58575 # exp, level set.seed(6543287) simruns <- 100000 n1 <- 20 n2 <- 30 cent1 <- 1 cent2 <- 1 scale <- 1 presults <- tresults <- wresults <- cresults <- cmsresults <- numeric(0) for(i in 1:simruns){ if(i %% 2500==0) print(i) x1 <- rexp(n1,1/cent1) x2 <- rexp(n2,1/cent2) tresults[i] <- t.test(x1,x2)$p.value wresults[i] <- wilcox.test(x1,x2)$p.value ct <- combinetwo(x1,x2) cresults[i] <- ct$finalp cmsresults[i] <- ct$msr presults[i] <- permTS(as.vector(x1),as.vector(x2))$p.value } > sum(tresults<0.05)/simruns [1] 0.0474 > sum(wresults<0.05)/simruns [1] 0.04705 > sum(cresults<0.05)/simruns [1] 0.04759 > sum(cmsresults)/simruns [1] 0.99322 > sum(presults<0.05)/simruns [1] 0.0455 # exp, power set.seed(6543287) simruns <- 100000 n1 <- 20 n2 <- 30 cent1 <- 1 cent2 <- 2 scale <- 1 presults <- tresults <- wresults <- cresults <- cmsresults <- numeric(0) for(i in 1:simruns){ if(i %% 2500==0) print(i) x1 <- rexp(n1,1/cent1) x2 <- rexp(n2,1/cent2) tresults[i] <- t.test(x1,x2)$p.value wresults[i] <- wilcox.test(x1,x2)$p.value ct <- combinetwo(x1,x2) cresults[i] <- ct$finalp cmsresults[i] <- ct$msr presults[i] <- permTS(as.vector(x1),as.vector(x2))$p.value } > sum(tresults<0.05)/simruns [1] 0.66106 > sum(wresults<0.05)/simruns [1] 0.5137 > sum(cresults<0.05)/simruns [1] 0.51511 > sum(cmsresults)/simruns [1] 0.98507 > sum(presults<0.05)/simruns [1] 0.55277 # sn, level set.seed(6543287) simruns <- 100000 n1 <- 20 n2 <- 30 alpha <- 3 delta <- alpha/sqrt(1+alpha^2) alphavar <- (1-2*delta^2/pi) omega <- sqrt(1/alphavar) alphamean <- alpha/sqrt(1+alpha^2)*sqrt(2/pi)*omega xi1 <- 5-alphamean xi2 <- 5-alphamean omegavar <- omega^2*alphavar presults <- tresults <- wresults <- cresults <- cmsresults <- numeric(0) for(i in 1:simruns){ if(i %% 2500==0) print(i) x1 <- rsn(n1,xi1,omega,alpha) x2 <- rsn(n2,xi2,omega,alpha) tresults[i] <- t.test(x1,x2)$p.value wresults[i] <- wilcox.test(x1,x2)$p.value ct <- combinetwo(x1,x2) cresults[i] <- ct$finalp cmsresults[i] <- ct$msr presults[i] <- permTS(as.vector(x1),as.vector(x2))$p.value } > sum(tresults<0.05)/simruns [1] 0.04931 > sum(wresults<0.05)/simruns [1] 0.04807 > sum(cresults<0.05)/simruns [1] 0.0531 > sum(cmsresults)/simruns [1] 0.39082 > sum(presults<0.05)/simruns [1] 0.04775 # sn power set.seed(6543287) simruns <- 100000 n1 <- 20 n2 <- 30 alpha <- 3 delta <- alpha/sqrt(1+alpha^2) alphavar <- (1-2*delta^2/pi) omega <- sqrt(1/alphavar) alphamean <- alpha/sqrt(1+alpha^2)*sqrt(2/pi)*omega xi1 <- 5-alphamean xi2 <- 6-alphamean omegavar <- omega^2*alphavar presults <- tresults <- wresults <- cresults <- cmsresults <- numeric(0) for(i in 1:simruns){ if(i %% 2500==0) print(i) x1 <- rsn(n1,xi1,omega,alpha) x2 <- rsn(n2,xi2,omega,alpha) tresults[i] <- t.test(x1,x2)$p.value wresults[i] <- wilcox.test(x1,x2)$p.value ct <- combinetwo(x1,x2) cresults[i] <- ct$finalp cmsresults[i] <- ct$msr presults[i] <- permTS(as.vector(x1),as.vector(x2))$p.value } > sum(tresults<0.05)/simruns [1] 0.91321 > sum(wresults<0.05)/simruns [1] 0.9222 > sum(cresults<0.05)/simruns [1] 0.92649 > sum(cmsresults)/simruns [1] 0.39082 > sum(presults<0.05)/simruns [1] 0.92229 # Test of some differences > binom.test(5151,100000,p=0.05, alternative="greater") # 5% level of combined, normal Exact binomial test data: 5151 and 1e+05 number of successes = 5151, number of trials = 1e+05, p-value = 0.01479 alternative hypothesis: true probability of success is greater than 0.05 95 percent confidence interval: 0.05036491 1.00000000 sample estimates: probability of success 0.05151 > binom.test(5120,100000,p=0.05, alternative="greater") # 5% level of combined, t3 Exact binomial test data: 5120 and 1e+05 number of successes = 5120, number of trials = 1e+05, p-value = 0.04185 alternative hypothesis: true probability of success is greater than 0.05 95 percent confidence interval: 0.05005819 1.00000000 sample estimates: probability of success 0.0512 > prop.test(c(92153,92225),c(100000,100000)) # Powers against t-test on normal 2-sample test for equality of proportions with continuity correction data: c(92153, 92225) out of c(1e+05, 1e+05) X-squared = 0.35003, df = 1, p-value = 0.5541 alternative hypothesis: two.sided 95 percent confidence interval: -0.003082099 0.001642099 sample estimates: prop 1 prop 2 0.92153 0.92225 > prop.test(c(91321,92220),c(100000,100000)) # Power t-test vs WMW, skew norm 2-sample test for equality of proportions with continuity correction data: c(91321, 92220) out of c(1e+05, 1e+05) X-squared = 53.388, df = 1, p-value = 2.737e-13 alternative hypothesis: two.sided 95 percent confidence interval: -0.011408481 -0.006571519 sample estimates: prop 1 prop 2 0.91321 0.92220 > prop.test(c(92649,92220),c(100000,100000)) # Power WMW vs combined, skew norm 2-sample test for equality of proportions with continuity correction data: c(92649, 92220) out of c(1e+05, 1e+05) X-squared = 13.097, df = 1, p-value = 0.0002957 alternative hypothesis: two.sided 95 percent confidence interval: 0.001962154 0.006617846 sample estimates: prop 1 prop 2 0.92649 0.92220 > prop.test(c(58575,58731),c(100000,100000)) # t3 permutation vs t 2-sample test for equality of proportions with continuity correction data: c(58575, 58731) out of c(1e+05, 1e+05) X-squared = 0.49534, df = 1, p-value = 0.4816 alternative hypothesis: two.sided 95 percent confidence interval: -0.005886479 0.002766479 sample estimates: prop 1 prop 2 0.58575 0.58731