From 0e2965c224932947d57ada7a5be7fe98900a493e Mon Sep 17 00:00:00 2001
From: Jaroslav Borodavka <df2994@kit.edu>
Date: Fri, 12 Jul 2024 18:16:17 +0200
Subject: [PATCH] New scripts for runtime calculations.

---
 R/T_Statistic_Powertests.R | 118 +---------------------------------
 R/T_statistic_Functions.R  | 128 +++++++++++++++++++++++++++++++++++++
 R/T_statistic_Runtimes.R   |  28 ++++++++
 3 files changed, 158 insertions(+), 116 deletions(-)
 create mode 100644 R/T_statistic_Functions.R
 create mode 100644 R/T_statistic_Runtimes.R

diff --git a/R/T_Statistic_Powertests.R b/R/T_Statistic_Powertests.R
index 04c3137..1f15109 100644
--- a/R/T_Statistic_Powertests.R
+++ b/R/T_Statistic_Powertests.R
@@ -19,121 +19,7 @@ setwd("/home/jaroslav-borodavka/Schreibtisch/Dissertation/Arbeit/Bernoulli Submi
 
 # load additionally required script
 source("Legendre_Polynomial_Alternative.R")
-
-#################################################################################################
-## reuqired functions and test statistic ## 
-
-# test decision  
-Decision <- function(teststatistic_value, critical){              
-  
-  decision <- 0
-  
-  if (teststatistic_value >= critical)         
-    decision = 1
-  else        
-    decision = 0
-  
-  return(decision)
-}
-
-# uniformly distributed vectors on S^(d-1)
-runif_sphere <- function(n, d){
-  X = mvrnorm(n, rep(0,d), diag(1,d))
-  if (n == 1){
-    temp = as.vector(crossprod(X))
-    U = X/sqrt(temp)
-  }
-  else{
-    temp = diag(X %*% t(X))
-    U = matrix(0, nrow = n, ncol = d)
-    for (j in 1:n){ 
-      U[j,] = X[j,]/sqrt(temp[j])             
-    }
-  }
-  return(U)
-}
-
-# expectations Psi_d as per section 1
-psi_d <- function(d, beta){
-  return(if (beta%%2 == 0) gamma((beta+1)/2)*gamma(d/2)/(sqrt(pi)*gamma((beta+d)/2)) else 0) 
-}
-
-# squared and centered projections, used in T_stat_value
-projection <- function(unif_point_cover, data, beta){
-  n = dim(data)[1]
-  d = dim(data)[2]
-  scalarproducts = data %*% t(unif_point_cover)
-  sums = colSums((scalarproducts)^beta)
-  return((1/n*sums - psi_d(d, beta))^2)
-}
-
-# test statistic T_n,beta, cover_n is the number of uniformly distributed points on the sphere
-T_stat_value <- function(data, beta){
-  n = dim(data)[1]
-  d = dim(data)[2]
-  cover_n = 0
-  if (d < 5) cover_n = 5000 else cover_n = 20000
-  unif_points = runif_sphere(cover_n, d)
-  projections = projection(unif_point_cover = unif_points, data = data, beta = beta)
-  return(n*max(projections))
-}
-
-# uniformly distributed data
-T_unif <- function(n, dim, beta){
-  return(list("realization" = T_stat_value(data = runif_sphere(n, dim), 
-                                            beta = beta)))
-}
-
-# von Mises-Fisher alternative data
-T_vMF <- function(n, dim, kappa, beta){
-  return(list("realization" = T_stat_value(data = rvmf(n, c(1,rep(0, dim-1)), kappa), 
-                                            beta = beta)))
-}
-
-# other alternative data, cf. section 6
-T_mixvmf_1 <- function(n, dim, p, beta){
-  U = runif(n, 0, 1)
-  sample = (U<p)*rvmf(n, c(-1,rep(0,dim-1)), 1) + (U>=p)*rvmf(n, c(1,rep(0,dim-1)), 1)
-  return(list("realization" = T_stat_value(data = sample, 
-                                            beta = beta)))
-}
-
-T_mixvmf_2 <- function(n, dim, p, beta){
-  U = runif(n, 0, 1)
-  sample = (U<p)*rvmf(n, c(-1,rep(0,dim-1)), 1)+(U>=p)*rvmf(n, c(1,rep(0,dim-1)), 4)
-  return(list("realization" = T_stat_value(data = sample, 
-                                            beta = beta)))
-}
-
-T_mixvmf_3 <- function(n, dim, beta){
-  p = 0.25
-  U = runif(n, 0, 1)
-  sample = (U<p)*rvmf(n, rep(-1,dim), 2) + (U>=p)*(U<2*p)*rvmf(n, c(-1,rep(1,dim-1)), 3) + (U>=2*p)*rvmf(n, c(1,rep(0,dim-1)), 3)
-  return(list("realization" = T_stat_value(data = sample, 
-                                            beta = beta)))
-}
-
-T_mixvmf_4 <- function(n, dim, beta){
-  p = 1/3
-  U = runif(n, 0, 1)
-  sample = (U<p)*rvmf(n, rep(-1,dim), 2) + (U>=p)*(U<2*p)*rvmf(n, c(-1,rep(1,dim-1)), 3) + (U>=2*p)*rvmf(n, c(1,rep(0,dim-1)), 4)
-  return(list("realization" = T_stat_value(data = sample,
-                                            beta = beta)))
-}
-
-T_bing_1 <- function(n, dim, tau, beta){
-  A = tau*diag(seq(1,dim))
-  return(list("realization" = T_stat_value(data = rbingham(n, A), beta = beta)))
-}
-
-T_bing_2 <- function(n, dim, tau, beta){
-  A = tau*diag(c(-dim, rep(0, dim-2), dim))
-  return(list("realization" = T_stat_value(data = rbingham(n, A), beta = beta)))
-}
-
-T_LP <- function(n, dim, m, kappa, beta){
-  return(list("realization" = T_stat_value(data = rLP(n, m, c(1,rep(0,dim-1)), kappa), beta = beta)))
-}
+source("T_statistic_Functions.R")
 
 #################################################################################################
 ## calculation of empirical critical values of T_n,beta under the null hypothesis ## 
@@ -179,7 +65,7 @@ print(DT_T_unif)
 # replication number for all powertests
 a = 5000
 
-Test <- MonteCarlo::MonteCarlo(T_unif, nrep = a, param_list = parameter, ncpus = numCores, export_also = export_list)      
+Test <- MonteCarlo::MonteCarlo(T_unif, nrep = a, param_list = parameter, ncpus = numCores, export_also = export_list)
 DF_Test <- MonteCarlo::MakeFrame(Test)                   
 
 DT_Test <- setDT(DF_Test)
diff --git a/R/T_statistic_Functions.R b/R/T_statistic_Functions.R
new file mode 100644
index 0000000..4e1ae12
--- /dev/null
+++ b/R/T_statistic_Functions.R
@@ -0,0 +1,128 @@
+#################################################################################################
+#################################################################################################
+## This script contains relevant functions for the computation of the test statistic T_n,beta.
+#################################################################################################
+#################################################################################################
+
+# required packages
+#library(Directional)
+library(MASS)
+library(MonteCarlo)
+library(data.table)
+
+setwd("/home/jaroslav-borodavka/Schreibtisch/Dissertation/Arbeit/Bernoulli Submission/codes/codes_git_project/R")
+
+#################################################################################################
+## required functions and test statistic ## 
+
+# test decision  
+Decision <- function(teststatistic_value, critical){              
+  
+  decision <- 0
+  
+  if (teststatistic_value >= critical)         
+    decision = 1
+  else        
+    decision = 0
+  
+  return(decision)
+}
+
+# uniformly distributed vectors on S^(d-1)
+runif_sphere <- function(n, d){
+  X = mvrnorm(n, rep(0,d), diag(1,d))
+  if (n == 1){
+    temp = as.vector(crossprod(X))
+    U = X/sqrt(temp)
+  }
+  else{
+    temp = diag(X %*% t(X))
+    U = matrix(0, nrow = n, ncol = d)
+    for (j in 1:n){ 
+      U[j,] = X[j,]/sqrt(temp[j])             
+    }
+  }
+  return(U)
+}
+
+# expectations Psi_d as per section 1
+psi_d <- function(d, beta){
+  return(if (beta%%2 == 0) gamma((beta+1)/2)*gamma(d/2)/(sqrt(pi)*gamma((beta+d)/2)) else 0) 
+}
+
+# squared and centered projections, used in T_stat_value
+projection <- function(unif_point_cover, data, beta){
+  n = dim(data)[1]
+  d = dim(data)[2]
+  scalarproducts = data %*% t(unif_point_cover)
+  sums = colSums((scalarproducts)^beta)
+  return((1/n*sums - psi_d(d, beta))^2)
+}
+
+# test statistic T_n,beta, cover_n is the number of uniformly distributed points on the sphere
+T_stat_value <- function(data, beta){
+  n = dim(data)[1]
+  d = dim(data)[2]
+  cover_n = 0
+  if (d < 5) cover_n = 5000 else cover_n = 20000
+  unif_points = runif_sphere(cover_n, d)
+  projections = projection(unif_point_cover = unif_points, data = data, beta = beta)
+  return(n*max(projections))
+}
+
+# uniformly distributed data
+T_unif <- function(n, dim, beta){
+  return(list("realization" = T_stat_value(data = runif_sphere(n, dim), 
+                                           beta = beta)))
+}
+
+# von Mises-Fisher alternative data
+T_vMF <- function(n, dim, kappa, beta){
+  return(list("realization" = T_stat_value(data = rvmf(n, c(1,rep(0, dim-1)), kappa), 
+                                           beta = beta)))
+}
+
+# other alternative data, cf. section 6
+T_mixvmf_1 <- function(n, dim, p, beta){
+  U = runif(n, 0, 1)
+  sample = (U<p)*rvmf(n, c(-1,rep(0,dim-1)), 1) + (U>=p)*rvmf(n, c(1,rep(0,dim-1)), 1)
+  return(list("realization" = T_stat_value(data = sample, 
+                                           beta = beta)))
+}
+
+T_mixvmf_2 <- function(n, dim, p, beta){
+  U = runif(n, 0, 1)
+  sample = (U<p)*rvmf(n, c(-1,rep(0,dim-1)), 1)+(U>=p)*rvmf(n, c(1,rep(0,dim-1)), 4)
+  return(list("realization" = T_stat_value(data = sample, 
+                                           beta = beta)))
+}
+
+T_mixvmf_3 <- function(n, dim, beta){
+  p = 0.25
+  U = runif(n, 0, 1)
+  sample = (U<p)*rvmf(n, rep(-1,dim), 2) + (U>=p)*(U<2*p)*rvmf(n, c(-1,rep(1,dim-1)), 3) + (U>=2*p)*rvmf(n, c(1,rep(0,dim-1)), 3)
+  return(list("realization" = T_stat_value(data = sample, 
+                                           beta = beta)))
+}
+
+T_mixvmf_4 <- function(n, dim, beta){
+  p = 1/3
+  U = runif(n, 0, 1)
+  sample = (U<p)*rvmf(n, rep(-1,dim), 2) + (U>=p)*(U<2*p)*rvmf(n, c(-1,rep(1,dim-1)), 3) + (U>=2*p)*rvmf(n, c(1,rep(0,dim-1)), 4)
+  return(list("realization" = T_stat_value(data = sample,
+                                           beta = beta)))
+}
+
+T_bing_1 <- function(n, dim, tau, beta){
+  A = tau*diag(seq(1,dim))
+  return(list("realization" = T_stat_value(data = rbingham(n, A), beta = beta)))
+}
+
+T_bing_2 <- function(n, dim, tau, beta){
+  A = tau*diag(c(-dim, rep(0, dim-2), dim))
+  return(list("realization" = T_stat_value(data = rbingham(n, A), beta = beta)))
+}
+
+T_LP <- function(n, dim, m, kappa, beta){
+  return(list("realization" = T_stat_value(data = rLP(n, m, c(1,rep(0,dim-1)), kappa), beta = beta)))
+}
\ No newline at end of file
diff --git a/R/T_statistic_Runtimes.R b/R/T_statistic_Runtimes.R
new file mode 100644
index 0000000..4bc55c0
--- /dev/null
+++ b/R/T_statistic_Runtimes.R
@@ -0,0 +1,28 @@
+#################################################################################################
+#################################################################################################
+## This script calculates running times of a test result with T_n,beta under the null hypothesis
+## with the R benchmarking tool microbenchmarking.
+#################################################################################################
+#################################################################################################
+
+# required packages
+library(microbenchmark)
+library(MASS)
+library(MonteCarlo)
+library(data.table)
+library(ggplot2)
+
+setwd("/home/jaroslav-borodavka/Schreibtisch/Dissertation/Arbeit/Bernoulli Submission/codes/codes_git_project/R")
+
+# load additionally required script
+source("T_statistic_Functions.R")
+
+# benchmark tests
+
+n = 100
+d = 5
+
+res <- microbenchmark(T_unif(n, d, 1), T_unif(n, d, 2), T_unif(n, d, 3), T_unif(n, d, 4), T_unif(n, d, 5), T_unif(n, d, 6), times=100, unit="s")
+print(res)
+ggplot2::autoplot(res)
+boxplot(res)
-- 
GitLab