From 0e2965c224932947d57ada7a5be7fe98900a493e Mon Sep 17 00:00:00 2001 From: Jaroslav Borodavka <df2994@kit.edu> Date: Fri, 12 Jul 2024 18:16:17 +0200 Subject: [PATCH] New scripts for runtime calculations. --- R/T_Statistic_Powertests.R | 118 +--------------------------------- R/T_statistic_Functions.R | 128 +++++++++++++++++++++++++++++++++++++ R/T_statistic_Runtimes.R | 28 ++++++++ 3 files changed, 158 insertions(+), 116 deletions(-) create mode 100644 R/T_statistic_Functions.R create mode 100644 R/T_statistic_Runtimes.R diff --git a/R/T_Statistic_Powertests.R b/R/T_Statistic_Powertests.R index 04c3137..1f15109 100644 --- a/R/T_Statistic_Powertests.R +++ b/R/T_Statistic_Powertests.R @@ -19,121 +19,7 @@ setwd("/home/jaroslav-borodavka/Schreibtisch/Dissertation/Arbeit/Bernoulli Submi # load additionally required script source("Legendre_Polynomial_Alternative.R") - -################################################################################################# -## reuqired functions and test statistic ## - -# test decision -Decision <- function(teststatistic_value, critical){ - - decision <- 0 - - if (teststatistic_value >= critical) - decision = 1 - else - decision = 0 - - return(decision) -} - -# uniformly distributed vectors on S^(d-1) -runif_sphere <- function(n, d){ - X = mvrnorm(n, rep(0,d), diag(1,d)) - if (n == 1){ - temp = as.vector(crossprod(X)) - U = X/sqrt(temp) - } - else{ - temp = diag(X %*% t(X)) - U = matrix(0, nrow = n, ncol = d) - for (j in 1:n){ - U[j,] = X[j,]/sqrt(temp[j]) - } - } - return(U) -} - -# expectations Psi_d as per section 1 -psi_d <- function(d, beta){ - return(if (beta%%2 == 0) gamma((beta+1)/2)*gamma(d/2)/(sqrt(pi)*gamma((beta+d)/2)) else 0) -} - -# squared and centered projections, used in T_stat_value -projection <- function(unif_point_cover, data, beta){ - n = dim(data)[1] - d = dim(data)[2] - scalarproducts = data %*% t(unif_point_cover) - sums = colSums((scalarproducts)^beta) - return((1/n*sums - psi_d(d, beta))^2) -} - -# test statistic T_n,beta, cover_n is the number of uniformly distributed points on the sphere -T_stat_value <- function(data, beta){ - n = dim(data)[1] - d = dim(data)[2] - cover_n = 0 - if (d < 5) cover_n = 5000 else cover_n = 20000 - unif_points = runif_sphere(cover_n, d) - projections = projection(unif_point_cover = unif_points, data = data, beta = beta) - return(n*max(projections)) -} - -# uniformly distributed data -T_unif <- function(n, dim, beta){ - return(list("realization" = T_stat_value(data = runif_sphere(n, dim), - beta = beta))) -} - -# von Mises-Fisher alternative data -T_vMF <- function(n, dim, kappa, beta){ - return(list("realization" = T_stat_value(data = rvmf(n, c(1,rep(0, dim-1)), kappa), - beta = beta))) -} - -# other alternative data, cf. section 6 -T_mixvmf_1 <- function(n, dim, p, beta){ - U = runif(n, 0, 1) - sample = (U<p)*rvmf(n, c(-1,rep(0,dim-1)), 1) + (U>=p)*rvmf(n, c(1,rep(0,dim-1)), 1) - return(list("realization" = T_stat_value(data = sample, - beta = beta))) -} - -T_mixvmf_2 <- function(n, dim, p, beta){ - U = runif(n, 0, 1) - sample = (U<p)*rvmf(n, c(-1,rep(0,dim-1)), 1)+(U>=p)*rvmf(n, c(1,rep(0,dim-1)), 4) - return(list("realization" = T_stat_value(data = sample, - beta = beta))) -} - -T_mixvmf_3 <- function(n, dim, beta){ - p = 0.25 - U = runif(n, 0, 1) - sample = (U<p)*rvmf(n, rep(-1,dim), 2) + (U>=p)*(U<2*p)*rvmf(n, c(-1,rep(1,dim-1)), 3) + (U>=2*p)*rvmf(n, c(1,rep(0,dim-1)), 3) - return(list("realization" = T_stat_value(data = sample, - beta = beta))) -} - -T_mixvmf_4 <- function(n, dim, beta){ - p = 1/3 - U = runif(n, 0, 1) - sample = (U<p)*rvmf(n, rep(-1,dim), 2) + (U>=p)*(U<2*p)*rvmf(n, c(-1,rep(1,dim-1)), 3) + (U>=2*p)*rvmf(n, c(1,rep(0,dim-1)), 4) - return(list("realization" = T_stat_value(data = sample, - beta = beta))) -} - -T_bing_1 <- function(n, dim, tau, beta){ - A = tau*diag(seq(1,dim)) - return(list("realization" = T_stat_value(data = rbingham(n, A), beta = beta))) -} - -T_bing_2 <- function(n, dim, tau, beta){ - A = tau*diag(c(-dim, rep(0, dim-2), dim)) - return(list("realization" = T_stat_value(data = rbingham(n, A), beta = beta))) -} - -T_LP <- function(n, dim, m, kappa, beta){ - return(list("realization" = T_stat_value(data = rLP(n, m, c(1,rep(0,dim-1)), kappa), beta = beta))) -} +source("T_statistic_Functions.R") ################################################################################################# ## calculation of empirical critical values of T_n,beta under the null hypothesis ## @@ -179,7 +65,7 @@ print(DT_T_unif) # replication number for all powertests a = 5000 -Test <- MonteCarlo::MonteCarlo(T_unif, nrep = a, param_list = parameter, ncpus = numCores, export_also = export_list) +Test <- MonteCarlo::MonteCarlo(T_unif, nrep = a, param_list = parameter, ncpus = numCores, export_also = export_list) DF_Test <- MonteCarlo::MakeFrame(Test) DT_Test <- setDT(DF_Test) diff --git a/R/T_statistic_Functions.R b/R/T_statistic_Functions.R new file mode 100644 index 0000000..4e1ae12 --- /dev/null +++ b/R/T_statistic_Functions.R @@ -0,0 +1,128 @@ +################################################################################################# +################################################################################################# +## This script contains relevant functions for the computation of the test statistic T_n,beta. +################################################################################################# +################################################################################################# + +# required packages +#library(Directional) +library(MASS) +library(MonteCarlo) +library(data.table) + +setwd("/home/jaroslav-borodavka/Schreibtisch/Dissertation/Arbeit/Bernoulli Submission/codes/codes_git_project/R") + +################################################################################################# +## required functions and test statistic ## + +# test decision +Decision <- function(teststatistic_value, critical){ + + decision <- 0 + + if (teststatistic_value >= critical) + decision = 1 + else + decision = 0 + + return(decision) +} + +# uniformly distributed vectors on S^(d-1) +runif_sphere <- function(n, d){ + X = mvrnorm(n, rep(0,d), diag(1,d)) + if (n == 1){ + temp = as.vector(crossprod(X)) + U = X/sqrt(temp) + } + else{ + temp = diag(X %*% t(X)) + U = matrix(0, nrow = n, ncol = d) + for (j in 1:n){ + U[j,] = X[j,]/sqrt(temp[j]) + } + } + return(U) +} + +# expectations Psi_d as per section 1 +psi_d <- function(d, beta){ + return(if (beta%%2 == 0) gamma((beta+1)/2)*gamma(d/2)/(sqrt(pi)*gamma((beta+d)/2)) else 0) +} + +# squared and centered projections, used in T_stat_value +projection <- function(unif_point_cover, data, beta){ + n = dim(data)[1] + d = dim(data)[2] + scalarproducts = data %*% t(unif_point_cover) + sums = colSums((scalarproducts)^beta) + return((1/n*sums - psi_d(d, beta))^2) +} + +# test statistic T_n,beta, cover_n is the number of uniformly distributed points on the sphere +T_stat_value <- function(data, beta){ + n = dim(data)[1] + d = dim(data)[2] + cover_n = 0 + if (d < 5) cover_n = 5000 else cover_n = 20000 + unif_points = runif_sphere(cover_n, d) + projections = projection(unif_point_cover = unif_points, data = data, beta = beta) + return(n*max(projections)) +} + +# uniformly distributed data +T_unif <- function(n, dim, beta){ + return(list("realization" = T_stat_value(data = runif_sphere(n, dim), + beta = beta))) +} + +# von Mises-Fisher alternative data +T_vMF <- function(n, dim, kappa, beta){ + return(list("realization" = T_stat_value(data = rvmf(n, c(1,rep(0, dim-1)), kappa), + beta = beta))) +} + +# other alternative data, cf. section 6 +T_mixvmf_1 <- function(n, dim, p, beta){ + U = runif(n, 0, 1) + sample = (U<p)*rvmf(n, c(-1,rep(0,dim-1)), 1) + (U>=p)*rvmf(n, c(1,rep(0,dim-1)), 1) + return(list("realization" = T_stat_value(data = sample, + beta = beta))) +} + +T_mixvmf_2 <- function(n, dim, p, beta){ + U = runif(n, 0, 1) + sample = (U<p)*rvmf(n, c(-1,rep(0,dim-1)), 1)+(U>=p)*rvmf(n, c(1,rep(0,dim-1)), 4) + return(list("realization" = T_stat_value(data = sample, + beta = beta))) +} + +T_mixvmf_3 <- function(n, dim, beta){ + p = 0.25 + U = runif(n, 0, 1) + sample = (U<p)*rvmf(n, rep(-1,dim), 2) + (U>=p)*(U<2*p)*rvmf(n, c(-1,rep(1,dim-1)), 3) + (U>=2*p)*rvmf(n, c(1,rep(0,dim-1)), 3) + return(list("realization" = T_stat_value(data = sample, + beta = beta))) +} + +T_mixvmf_4 <- function(n, dim, beta){ + p = 1/3 + U = runif(n, 0, 1) + sample = (U<p)*rvmf(n, rep(-1,dim), 2) + (U>=p)*(U<2*p)*rvmf(n, c(-1,rep(1,dim-1)), 3) + (U>=2*p)*rvmf(n, c(1,rep(0,dim-1)), 4) + return(list("realization" = T_stat_value(data = sample, + beta = beta))) +} + +T_bing_1 <- function(n, dim, tau, beta){ + A = tau*diag(seq(1,dim)) + return(list("realization" = T_stat_value(data = rbingham(n, A), beta = beta))) +} + +T_bing_2 <- function(n, dim, tau, beta){ + A = tau*diag(c(-dim, rep(0, dim-2), dim)) + return(list("realization" = T_stat_value(data = rbingham(n, A), beta = beta))) +} + +T_LP <- function(n, dim, m, kappa, beta){ + return(list("realization" = T_stat_value(data = rLP(n, m, c(1,rep(0,dim-1)), kappa), beta = beta))) +} \ No newline at end of file diff --git a/R/T_statistic_Runtimes.R b/R/T_statistic_Runtimes.R new file mode 100644 index 0000000..4bc55c0 --- /dev/null +++ b/R/T_statistic_Runtimes.R @@ -0,0 +1,28 @@ +################################################################################################# +################################################################################################# +## This script calculates running times of a test result with T_n,beta under the null hypothesis +## with the R benchmarking tool microbenchmarking. +################################################################################################# +################################################################################################# + +# required packages +library(microbenchmark) +library(MASS) +library(MonteCarlo) +library(data.table) +library(ggplot2) + +setwd("/home/jaroslav-borodavka/Schreibtisch/Dissertation/Arbeit/Bernoulli Submission/codes/codes_git_project/R") + +# load additionally required script +source("T_statistic_Functions.R") + +# benchmark tests + +n = 100 +d = 5 + +res <- microbenchmark(T_unif(n, d, 1), T_unif(n, d, 2), T_unif(n, d, 3), T_unif(n, d, 4), T_unif(n, d, 5), T_unif(n, d, 6), times=100, unit="s") +print(res) +ggplot2::autoplot(res) +boxplot(res) -- GitLab