This simulation code adds 0.5 microvolts to the 300 - 500 ms window for each 0.1 increase in effect size. 1000 simulations are run.
%Control condition = 1
%Experimental condition = 2
%Call the eeglab function to open it at the start. Datasets are loaded
%through eeglab.
eeglab
%====================================%
%SETTING UP PATHS
%====================================%
%Setting the import and export paths
import_path = ('C:\Users\rjobrien\Desktop\Personal Projects\Thesis Data');
export_path = ('C:\Users\rjobrien\Desktop\Personal Projects\Thesis Data\avg output1');
%Listing all of the files in the folder
filelist = dir(fullfile(import_path, '*.set'));
%====================================%
%PARAMETERS
%====================================%
sample_sizes = {20, 30, 40, 50}; %Sample sizes range from 10 to 50 by 10
effect_sizes = {0, 0.1, 0.2, 0.3}; %Effect sizes range from 0 to 0.5 by 0.1
simulations = 1:1000; %Running tests with 500 simulations
participants = 1:20; %Using 20 participants based on literature
%====================================%
%INITIATING VARIABLES
%====================================%
%Making an empty variable to place all of the individual datasets into
combined_data = [];
%Making an empty variable to place all of the avg datasets into
combined_avg_data = [];
%====================================%
%RANDOMLY SELECTING FILES
%====================================%
%Only a few files are randomly selected from the files list because
%otherwise the population dataset would become too large and the results
%would be uninterpretable.
temp_files = randperm(size(filelist, 1), 8); %TESTING different sized population datasets (e.g., 1000 - 2500 individual trials)
%====================================%
%LOADING IN THE INDIVIDUAL DATASETS
%====================================%
% for f = 1:length(filelist)
for f = temp_files
%Load each preprocessed/ICA dataset
try
EEG = pop_loadset(strcat(import_path, '\', filelist(f).name));
catch
"missing file";
end
%Combining data together into one large array
combined_data = cat(3, combined_data, extracting_correct_responses(EEG));
end
%Finding the standard deviation of the PZ electrode amplitudes
%Need to reshape the 3D array to 1D array
standard_deviation = std(unique(reshape(combined_data(29,:,:), [], 1), 'rows'));
%====================================%
%CALLING THE SIMULATION FUNCTION AND EXPORTING THE FINAL DATA
%====================================%
for s = 1:length(simulations)
for e = 1:length(effect_sizes)
for n = 1:length(sample_sizes)
for p = 1:length(participants)
trials = sample_sizes{n};
effect_size = effect_sizes{e};
simulation_num = simulations(s);
participants_num = participants(p);
final_data = run_sim(combined_data, simulation_num, effect_size, trials, participants_num);
%Exporting the final dataset
csvwrite(strcat(export_path, '\', num2str(simulations(s)), '_', num2str(effect_sizes{e}), '_', num2str(sample_sizes{n}), '_', num2str(participants(p)), '_stimulus_locked_avg_data.csv'), final_data);
end
end
end
end
%Plotting the average waveforms
% plot(squeeze(combined_avg_data(250:400, 29, :)));
%====================================%
%FUNCTIONS
%====================================%
%====================================%
%EXTRACTING THE CORRECT RESPONSES FROM THE EEG DATA
%====================================%
function a = extracting_correct_responses(data)
%To add even more randomness to the data I made it so that random
%groups of correct responses would be selected out. This allows for
%more files to be included.
flanker_event_type = [1,4,5];
temp = flanker_event_type(randi(length(flanker_event_type)));
%subset out all of the EEG event numbered 1, 4, and 5
% a = data.event(([data.event.type] == 1 | [data.event.type] == 4 | [data.event.type] == 5) & [data.event.response_status] == 2);
a = data.event(([data.event.type] == temp) & [data.event.response_status] == 2);
a = struct2cell(a);
a = squeeze(a)';
%Subsetting out a vector of epochs
epochs = num2cell(unique(cell2mat(a(:, 12))));
%Subsetting the data based on the correct responses
a = data.data(:,:, cell2mat(epochs));
end
%====================================%
%RUNNING THE MONTE-CARLO SIMULATION
%====================================%
function b = run_sim(data, simulation_num, effect_size, trials, participants_num)
combined_avg_data = [];
%====================================%
%RESAMPLING WITHOUT REPLACEMENT FROM THE POPULATION DATASET
%====================================%
% standard_deviation = std(unique(reshape(data(29,:,:), [], 1), 'rows')); %USE THIS IF YOU WANT THE STANDARD DEVIATION TO BE PULLED FROM THE POPULATION DATASET
standard_deviation = 5;
%Randomly select n * 2 number of trials
sample_first = randperm(size(data, 3), trials*2);
%Finding n trials indices to subset from the first sample
sample_second = randperm(length(sample_first), trials);
%Subsetting out the second samples values
sample_three = sample_first(:, sample_second);
%Removing the second samples values from the first sample
sample_first(sample_second) = [];
%====================================%
%GENERATING THE CONTROL AND EXPERIMENTAL CONDITIONS
%====================================%
control_condition = data(:,:, sample_first);
%I think I need to sample from the combined data again*
% experimental_condition = combined_data(:,:, s3);
experimental_condition = data(:,:, sample_second);
%====================================%
%ADDING EFFECT SIZES TO SINGLE TRIALS IN THE EXPERIMENTAL CONDITIONS
%====================================%
%Plotting the before and after of the effect size manipulation
% plot(experimental_condition(29, 250:400, 1));
% hold on;
%Adding the effect size voltage to the 300ms - 500ms window of each
%trial
%Use the population standard deviation and multiply it by the
%effect size
experimental_condition(:, 325:375, :) = experimental_condition(:, 325:375, :) + (standard_deviation * effect_size);
% plot(experimental_condition(29, 250:400, 1));
%====================================%
%CALCULATING THE STIMULUS-LOCKED AVERAGE ERP WAVEFORM
%====================================%
%Calculating the average ERPs across trials
control_condition_mean = mean(control_condition,3);
%Making the electrodes the columns
control_condition_mean = permute(control_condition_mean,[2 1]);
%Adding sample size as a column
control_condition_mean(:, 35) = trials;
%Adding effect size as a column
control_condition_mean(:, 36) = effect_size;
%Adding condition as a column
control_condition_mean(:, 37) = 1;
%Adding simulation number as a column
control_condition_mean(:, 38) = simulation_num;
%Adding participant number as a column
control_condition_mean(:, 39) = participants_num;
%Calculating the average ERPs across trials
experimental_condition_mean = mean(experimental_condition,3);
%Making the electrodes the columns
experimental_condition_mean = permute(experimental_condition_mean,[2 1]);
%Adding sample size as a column
experimental_condition_mean(:, 35) = trials;
%Adding effect size as a column
experimental_condition_mean(:, 36) = effect_size;
%Adding condition as a column
experimental_condition_mean(:, 37) = 2;
%Adding simulation number as a column
experimental_condition_mean(:, 38) = simulation_num;
%Adding participant number as a column
experimental_condition_mean(:, 39) = participants_num;
%====================================%
%COMBINING ALL AVERAGE EXPERIMENTAL AND CONTROL CONDITION DATASETS
%====================================%
%Combining data together into one large array
b = cat(3, combined_avg_data, control_condition_mean, experimental_condition_mean);
%====================================%
%PLOTTING THE STIMULUS-LOCKED AVERAGE ERP WAVEFORMS
%====================================%
%Visually depict 0 - 600 ms to see if there could be latency differences in
%the P3
% plot(experimental_condition_mean(250:400, 29));
% hold on;
% plot(control_condition_mean(250:400, 29));
%I subset the data between 250:400 because I know that represents 0 -
%600 ms
%Reshaping the data into a long format so that I can work with it in R
b = reshape(permute(b(250:400, [29, 35:39], :),[2 1 3]), size(b(250:400, [29, 35:39], :),2),[])';
end