Simulating P3 Amplitude Data With Random Constant Effect Sizes

combined-new-simulation-procedure-v12.0.knit

Purpose: The purpose of this document is to showcase the process of combining all of the simulated P3 amplitude data. Visualizations and preliminary statistics were produced to check the quality of the simulation procedure.

This simulation model uses an unstandardized effect size +- 2, termed the random constant effect size (RCES).

NOTES

#   "Stim3" & "Ride3" contains data where a constant effect size (+-2 of the actual effect size) was applied to the time window of interest.
# 
#   "Stim4" &"Ride4" contains data were a smaller effect size was applied to the beginning and end of the time window of interest
# 
#   "Stim5" contains data were an incremental effect size was applied to the time window of interest
# 
#   "stim IES" & "ride IES" contains 100 simulations with the incremental effect size
# 
#   "stim RCES" & "ride RCES" contains 100 simulations with the random constant effect size
# 
#   5000 simulations were produced using the RCES. The data has been split across 5 folders. After the data was loaded into R, it was saved in 2 different formats. The first format (e.g., "combined simulation data (RCES) 1.RData"), contains the all trials data before peak-picking methodologies were extracted. The second format of the data (e.g., "combined simulation data (RCES) 1b.RData"), contained the summarized data where only P3 amplitudes remain.

LOADING IN THE NECESSARY PACKAGES

options(scipen=999) #stopping scientific notation

#Check is the packages are already installed and load the library
pkgTest <- function(...) {
  pkg <- list(...)
  for (i in 1:length(pkg)) {
    if (!require(pkg[[i]], character.only = TRUE)) {
      install.packages(pkg[[i]], dep = TRUE)
      if(!require(pkg[[i]], character.only = TRUE)) stop("Package not found")
    } 
  }
}

pkgTest("here")
pkgTest("dplyr")
pkgTest("data.table")
pkgTest("plyr")
pkgTest("gganimate")
pkgTest("ggplot2")
pkgTest("plotly")
pkgTest("ggthemes")
pkgTest("tidyverse")
pkgTest("lme4")
pkgTest("openxlsx")
pkgTest("knitr")
pkgTest("car")
pkgTest("emmeans")
pkgTest("sjPlot")
pkgTest("pander")
pkgTest("kableExtra")
pkgTest("ggsci")
pkgTest("rms")
pkgTest("blorr")
pkgTest("stargazer")
pkgTest("effects")
pkgTest("questionr")
pkgTest("sjPlot")
pkgTest("sjmisc")
pkgTest("broom")
pkgTest("effsize")

PATHS & VARIABLES

#Specifying the path to the simulation data
path = "C:/Users/rjobrien/Desktop/Personal Projects/Thesis Data/combined output "
code_path = "C:/Users/rjobr/OneDrive/Desktop/Personal Projects/Thesis Code/"
files_path = "C:/Users/rjobr/OneDrive/Desktop/Personal Projects/Thesis Code/simulation results data/"


animated_plot = TRUE

simulation_count = 5000

theme = theme_bw(base_size = 14, base_family = "Time New Roman")

COMBINING P3 AMPLITUDE .RDS DATA TOGETHER

#Loading all of the peak simulation data and appending it together
combined_data = do.call('rbind',
                        lapply(list.files(code_path, pattern = "peak simulation",
                               full.names = TRUE), readRDS))


saveRDS(combined_data, paste0(code_path, "combined simulation data.Rds"))

#Checking to make sure there are 5000 samples
#length(unique(combined_data$sample))

CHANGING LEVELS IN EFFECT SIZE COLUMN

new_effect_size_labels = c("0" = "0 microvolts",
                           "0.5" = "0.5 microvolts", 
                           "1" = "1 microvolts",
                           "1.5" = "1.5 microvolts")

ANIMATED HISTOGRAMS OF P3 AMPLITUDES

if (animated_plot == TRUE){
  
  #ANIMATED UNSTACKED HISTOGRAM OF P3 AMPLITUDES ACROSS FACTORS
  p = ggplot(combined_data, aes(x = PZ, fill = as.factor(condition))) +
    geom_histogram(alpha = 0.6, position = "identity", colour = "black") +
    facet_grid(method~data) +
    labs(title = "Effect Size: {closest_state} microvolts", fill = "Condition", x = "P3 Amplitude (?V)") +
    theme +
    scale_fill_manual(labels = c("Control", "Experimental"), values = c("1" = "#474747",
                                  "2"  = "darkred")) +
    transition_states(effect_size)
  
  p = animate(p, fps = 10, width = 900, height = 600)
  
  p
  
}

anim_save(paste0(code_path, "Results/", "P3 Amplitude Histograms.gif"), animation = p)

CALCULATING THE TYPE I ERROR RATE BY PEAK-PICKING METHODOLOGIES

These visuals indicate how peak-picking methodologies type-I error rates vary depending on the averaging methoodology selected. As you can see in this visualization, peak-picking methodologies type-I error rates remain relatively stable around 5% regardless of the number of trials and averaging methodology used.

typeI_by_method <-
  plyr::ddply(subset(combined_data, effect_size == 0), 
              c("method", "sample", "sample_size", "effect_size", "data"), 
              function(df)
              t.test(PZ ~ condition, data = df)$p.value)


typeI_by_method = typeI_by_method %>% dplyr::rename(condition_pvalues = V1)

typeI_by_method$condition_typeIerror = ifelse(typeI_by_method$condition_pvalues <= 0.05, 1, 0)



#Finding the type-I error rate
typeI_by_method2 = typeI_by_method

typeI_by_method2 = setDT(typeI_by_method2)[, (sum(condition_typeIerror)/simulation_count)*100, by = list(sample_size, effect_size, method, data)]

typeI_by_method2 = typeI_by_method2 %>% dplyr::rename(condition_typeIerror = V1)

typeI_by_method2 = melt(typeI_by_method2, id.vars = c("method", "sample_size", "effect_size", "data"), measure.vars = "condition_typeIerror")




print(ggplot(typeI_by_method2, aes(x = sample_size, 
                                   y = value, 
                                   colour = method, 
                                   group = method)) +
  facet_grid(~ data) +
  geom_point() +
  geom_line(size = 2) +
  ylim(0, 10) +
  labs(title = "Type-I Error Rates", 
       y = "Percentage (%)",
       x = "Number of Trials",
       colour = "Peak-Picking Methodology") +
  scale_color_jco() +
  theme)

CALCULATING THE POWER BY PEAK-PICKING & AVERAGING METHODOLOGIES

These visuals indicate how peak-picking methodologies power changes depending on the number of trials, effect size, and averaging methodology selected. As you can see in this visual, all peak-picking methodologies power increases as the number of trials and effect size increases. Stimulus-locked averaging appears to produce higher power overall as compared to RIDE averaging, and the mean amplitude produces higher power overall as compared to the peak and adaptive mean amplitudes.

models_by_method <-
  plyr::ddply(combined_data, c("method", "sample", "sample_size", "effect_size", "data"), function(df)
  t.test(PZ ~ condition, data = df)$p.value)



models_by_method = models_by_method %>% dplyr::rename(condition_pvalues = V1)

models_by_method$condition_typeIIerror = ifelse(models_by_method$condition_pvalues >= 0.05, 1, 0)



#Finding the type-II error rate
models_by_method2 = models_by_method

models_by_method2 = setDT(models_by_method2)[, sum(condition_typeIIerror), by = list(sample_size, effect_size, method, data)]

models_by_method2 = models_by_method2 %>% dplyr::rename(condition_typeIIerror = V1)

models_by_method2$power = (simulation_count - (models_by_method2$condition_typeIIerror))/simulation_count*100

models_by_method2 = melt(models_by_method2, id.vars = c("method", "sample_size", "effect_size", "data"), measure.vars = c("condition_typeIIerror", "power"))




for (i in c("Stimulus-Locked Average", "RIDE Average")){
  print(ggplot(subset(models_by_method2, 
                      variable == "power" &
                      data == i &
                      effect_size != 0), 
               aes(x = sample_size, 
                  y = value, 
                  colour = method, 
                  group = method)) +
  facet_grid(data ~ effect_size,
             labeller = labeller(effect_size = new_effect_size_labels)) +
  geom_point() +
  geom_line(size = 2) +
  labs(title = paste0(i, " Power (%)"), 
       y = "Percentage (%)", 
       x = "Number of Trials",
       colour = "Peak-Picking Methodology") +
  scale_color_jco() +
  geom_hline(yintercept=80, linetype="dashed", color = "black", size = 1) +
  theme)
}

print(ggplot(subset(models_by_method2, 
                      variable == "power" & effect_size != 0), 
               aes(x = sample_size, 
                  y = value, 
                  colour = data, 
                  group = data)) +
  facet_grid(method ~ effect_size,
             labeller = labeller(effect_size = new_effect_size_labels)) +
  geom_point() +
  geom_line(size = 2) +
  labs(title = paste0("Averaging Methodologies Power (%)"), 
       y = "Percentage (%)", 
       x = "Number of Trials",
       colour = "Averaging Methodology"
       ) +
  scale_color_jco() +
  geom_hline(yintercept=80, linetype="dashed", color = "black", size = 1) +
  theme)

# 
# write.xlsx(models_by_method, paste0(code_path, "ride_power_pvalues.xlsx"))
# 
# 
# write.xlsx(models_by_method2, paste0(code_path, "ride_power_data.xlsx"))

LOOKING AT MY CURRENT SIMULATIONS EFFECT SIZES

These visuals indicate how peak-picking methodologies Cohen’s d changes depending on the number of trials, effect size, and averaging methodology selected. Similarly to the visuals with power, stimulus-locked averaging and the mean amplitude produce the highest Cohen’s d values.

#Calculating the cohen's d effect sizes for each grouping
test2 <-
  plyr::ddply(combined_data, c("sample_size", "effect_size", "method", "data"), function(df)
  effsize::cohen.d(
    df[df$condition == "2", ]$PZ,
    df[df$condition == "1", ]$PZ,
    paired = FALSE)$estimate)
  

#Producing a plot
ggplot(test2, 
      aes(x = sample_size, y = V1, colour = data, group = data)) +
      geom_point() +
      geom_line(size = 2) +
      facet_grid(method ~ effect_size,
                 labeller = labeller(effect_size = new_effect_size_labels)) +
      labs(title = "Cohen's d", 
           y = "Cohen's d", 
           x = "Number of Trials",
           colour = "Peak-Picking Methodology") +
      scale_color_jco() +
      theme

#Producing a plot
ggplot(test2, 
      aes(x = sample_size, y = V1, colour = method, group = method)) +
      geom_point() +
      geom_line(size = 2) +
      facet_grid(data ~ effect_size,
                 labeller = labeller(effect_size = new_effect_size_labels)) +
      labs(title = "Cohen's d", 
           y = "Cohen's d", 
           x = "Number of Trials",
           colour = "Peak-Picking Methodology") +
      scale_color_jco() +
      theme

CHECKING THAT THE DIFFERENCE IN MEANS ACROSS ALL SIMULATIONS IS DIFFERENT

Checking the mean difference between simulations provides and indication of how well the effect size manipulation worked (e.g., unstandardized effect size +- 2, the random constant effect size (RCES)). Here, it’s important to see whether the average difference between the control and experimental condition match the intended effect size.

mean_table = plyr::ddply(combined_data,
            c("sample_size", "effect_size", "method", "data"), function(df)
              mean(df[df$condition == "2", ]$PZ) - mean(df[df$condition == "1", ]$PZ))


mean_table = dcast(mean_table, data + sample_size + effect_size ~ method, value.var = "V1")

#Round metrics
mean_table[, c(4:6)] = lapply(mean_table[, c(4:6)], function(x){round(x, 4)})

flextable::flextable(mean_table)

data	sample_size	effect_size	Adaptive Mean Amplitude	Mean Amplitude	Peak Amplitude
RIDE Average	20	0	-0.0015	-0.0120	-0.0093
RIDE Average	20	0.5	0.5459	0.5422	0.5540
RIDE Average	20	1	1.0958	1.0654	1.0991
RIDE Average	20	1.5	1.6364	1.5968	1.6327
RIDE Average	30	0	0.0080	0.0047	0.0043
RIDE Average	30	0.5	0.5468	0.5354	0.5462
RIDE Average	30	1	1.0872	1.0675	1.0850
RIDE Average	30	1.5	1.6136	1.5867	1.6184
RIDE Average	40	0	0.0047	0.0025	-0.0015
RIDE Average	40	0.5	0.5422	0.5284	0.5415
RIDE Average	40	1	1.0877	1.0629	1.0847
RIDE Average	40	1.5	1.5992	1.5745	1.5955
RIDE Average	50	0	-0.0066	-0.0093	-0.0106
RIDE Average	50	0.5	0.5467	0.5376	0.5494
RIDE Average	50	1	1.0680	1.0444	1.0645
RIDE Average	50	1.5	1.6076	1.5694	1.6081
Stimulus-Locked Average	20	0	-0.0024	-0.0097	-0.0053
Stimulus-Locked Average	20	0.5	0.5032	0.5068	0.5024
Stimulus-Locked Average	20	1	1.0013	0.9998	1.0038
Stimulus-Locked Average	20	1.5	1.5030	1.5011	1.5013
Stimulus-Locked Average	30	0	0.0114	0.0061	0.0133
Stimulus-Locked Average	30	0.5	0.5027	0.5060	0.5051
Stimulus-Locked Average	30	1	1.0067	1.0075	1.0057
Stimulus-Locked Average	30	1.5	1.5083	1.5069	1.5095
Stimulus-Locked Average	40	0	0.0075	0.0019	0.0044
Stimulus-Locked Average	40	0.5	0.5053	0.5020	0.5049
Stimulus-Locked Average	40	1	1.0103	1.0087	1.0117
Stimulus-Locked Average	40	1.5	1.4950	1.4987	1.4958
Stimulus-Locked Average	50	0	-0.0066	-0.0081	-0.0051
Stimulus-Locked Average	50	0.5	0.5106	0.5072	0.5156
Stimulus-Locked Average	50	1	0.9919	0.9924	0.9900
Stimulus-Locked Average	50	1.5	1.4960	1.4962	1.4958

# mean_table %>%
#   kable(booktabs = T, caption = "Control vs. Experimental Mean Differences", digits = 2) %>% 
#   kable_styling(
#     bootstrap_options = c("striped","hover", "bordered", "condensed"),
#     latex_options = c("striped"),
#     full_width = F
#   )

SAVING WORKSPACE

save.image(paste0(code_path, "final combined simulation data.RData"))

EXPORTING DATA

write.xlsx(combined_data, paste0(code_path, "Results/", "final_p3_data.xlsx"))

write.xlsx(models_by_method, paste0(code_path, "Results/", "final_power_data.xlsx"))

write.xlsx(typeI_by_method, paste0(code_path, "Results/", "final_typeIerror_data.xlsx"))

RENDERING RMARKDOWN

# rmarkdown::render(paste0(
#       code_path,
#       "combined new simulation procedure v12.0.Rmd"
#     ))