% score_based_LRs_2025
% 
% Runs synthetic-data-based demonstrations and experiments descibed in:
%   Morrison G.S. (2025). Taking account of typicality in calculation of likelihood ratios. Accepted for publication in Law, Probability & Risk.
% 
% Available at https://forensic-data-science.net/likelihood-ratio-calculation/
% 
% version 2025-07-04a
% 
% tested on Matlab R2024b and Matlab 2025a
%   requires Statistics Toolbox

clc
close all
clear all
addpath('./functions/');

rng(0)

% settings for simple examples
mu_bw = 0;
sigma_b = 10;
sigma_w = 2;

mu_a = 1;
mu_b = 21;
delta = 2;
x_a = mu_a - delta;
x_b = mu_b - delta;

sigma2_b = sigma_b^2;
sigma2_w = sigma_w^2;
sigma2_bw = sigma2_b + sigma2_w;
sigma_bw = sqrt(sigma2_bw);

plot_range = 30;


% settings for sythetic data
num_sample_sets = 1000;

num_sources_train = 100;
num_items_train = 10;

num_sources_test = 1000;
num_items_test = 2;

kappa = 0.01;
df = num_sources_train;


%% specific-source

% 1D plot
figure(11);
xx = -plot_range:0.1:plot_range;
yy_bw = normpdf(xx, mu_bw, sigma_bw);
plot(xx, yy_bw, '-r', 'LineWidth', 1);
ax1 = gca;
ax1.PlotBoxAspectRatio = [2 1 1];
xlabel('\itx')
ylabel('probability density')
grid on
hold on

yy_a = normpdf(xx, mu_a, sigma_w);
yy_b = normpdf(xx, mu_b, sigma_w);
plot(xx, yy_a, '-b', 'LineWidth', 1);
plot(xx, yy_b, '-b', 'LineWidth', 1);

L_a_w = normpdf(x_a, mu_a, sigma_w);
L_a_bw = normpdf(x_a, mu_bw, sigma_bw);
L_b_w = normpdf(x_b, mu_b, sigma_w);
L_b_bw = normpdf(x_b, mu_bw, sigma_bw);

plot([x_a x_a], [0 max([L_a_w L_a_bw])], '-g', 'LineWidth', 1);
plot([xx(1) x_a], [L_a_bw L_a_bw], '--g', 'LineWidth', 1);
plot([x_a x_a], [L_a_w L_a_bw], 'og', 'MarkerFaceColor', 'g', 'MarkerSize', 8, 'LineWidth', 2);

plot([x_b x_b], [0 max([L_b_w L_b_bw])], '-g', 'LineWidth', 1);
plot([xx(1) x_b], [L_b_w L_b_w], '--g', 'LineWidth', 1);
plot([xx(1) x_b], [L_b_bw L_b_bw], '--g', 'LineWidth', 1);
plot([x_b x_b], [L_b_w L_b_bw], 'og', 'MarkerFaceColor', 'none', 'MarkerSize', 8, 'LineWidth', 2);

% dummy plots for legend
h = NaN(4,0);
h(1) = plot(NaN, NaN, '-r', 'LineWidth',1);
h(2) = plot(NaN, NaN, '-b', 'LineWidth',1);
h(3) = plot(NaN, NaN, 'og', 'MarkerFaceColor','g', 'MarkerSize',8, 'LineWidth',2);
h(4) = plot(NaN, NaN, 'og', 'MarkerFaceColor','none', 'MarkerSize',8, 'LineWidth',2);
legend_text = {'relevant-population model' 'specific-known-source models', '\it{x}\rm_q (typical)', '\it{x}\rm_q (atypical)'};
legend(h, legend_text, 'Location','NorthOutside', 'FontSize',10);

LR_a_specific = L_a_w / L_a_bw;
LR_b_specific = L_b_w / L_b_bw;

LR_a_specific
LR_b_specific


%% common-source

% 1D plot
figure(21);
h = NaN(5,0); % for legend
h(1) = plot(xx, yy_bw, '-r', 'LineWidth', 1);
ax21 = gca;
ax21.PlotBoxAspectRatio = [2 1 1];
xlabel('\itx')
ylabel('probability density')
grid on
hold on

x_a_pair = [x_a mu_a];
x_b_pair = [x_b mu_b];

h(4) = plot(x_a_pair, [0 0], 'og', 'MarkerFaceColor', 'g', 'MarkerSize', 8, 'LineWidth', 2);
h(5) = plot(x_b_pair, [0 0], 'og', 'MarkerFaceColor', 'none', 'MarkerSize', 8, 'LineWidth', 2);


% 2D plot
mu_vector = [mu_bw, mu_bw];

cov_s = [sigma2_bw, sigma2_b;...
         sigma2_b,  sigma2_bw];

cov_d = [sigma2_bw, 0;...
         0,         sigma2_bw];

[xx1,xx2] = meshgrid(xx);
xx12 = [xx1(:), xx2(:)];
length_xx = length(xx);

zz_s = mvnpdf(xx12, mu_vector, cov_s);
zz_s = reshape(zz_s, [length_xx, length_xx]);

zz_d = mvnpdf(xx12, mu_vector, cov_d);
zz_d = reshape(zz_d, [length_xx, length_xx]);

XX = -plot_range:1:plot_range;
[XX1,XX2] = meshgrid(XX);
XX12 = [XX1(:), XX2(:)];
length_XX = length(XX);
ZZ_s = mvnpdf(XX12, mu_vector, cov_s);
ZZ_s = reshape(ZZ_s, [length_XX, length_XX]);
ZZ_d = mvnpdf(XX12, mu_vector, cov_d);
ZZ_d = reshape(ZZ_d, [length_XX, length_XX]);

L_a_s = mvnpdf(x_a_pair, mu_vector, cov_s);
L_a_d = mvnpdf(x_a_pair, mu_vector, cov_d);

L_b_s = mvnpdf(x_b_pair, mu_vector, cov_s);
L_b_d = mvnpdf(x_b_pair, mu_vector, cov_d);

figure(22);
mesh(XX1, XX2, ZZ_d, 'FaceColor', 'none', 'EdgeColor', [1 .5 .5]);
hold on
h(2) = mesh(xx1, xx2, zz_d, 'FaceColor', 'r', 'FaceAlpha', .1, 'EdgeColor', 'none');
mesh(XX1, XX2, ZZ_s, 'FaceColor', 'none', 'EdgeColor', [.75 .75 1]);
h(3) = mesh(xx1, xx2, zz_s, 'FaceColor', 'b', 'FaceAlpha', .1, 'EdgeColor', 'none');

axis tight
ax22 = gca;
ax22.PlotBoxAspectRatio = [1 1 0.75];
view(ax22, [-11.25 5.625]);

plot3([x_a_pair(1);x_a_pair(1)], [x_a_pair(2);x_a_pair(2)], [L_a_s;L_a_d], 'og', 'MarkerFaceColor', 'g', 'MarkerSize', 8, 'LineWidth', 2);
plot3([x_a_pair(1);x_a_pair(1)], [x_a_pair(2);x_a_pair(2)], [0, max([L_a_s;L_a_d])], '-g', 'LineWidth', 2);
plot3([xx(1);x_a_pair(1)], [xx(end);x_a_pair(2)], [L_a_s;L_a_s], '--g', 'LineWidth', 2);
plot3([xx(1);x_a_pair(1)], [xx(end);x_a_pair(2)], [L_a_d;L_a_d], '--g', 'LineWidth', 2);

plot3([x_b_pair(1);x_b_pair(1)], [x_b_pair(2);x_b_pair(2)], [L_b_s;L_b_d], 'og', 'MarkerFaceColor', 'none', 'MarkerSize', 8, 'LineWidth', 2);
plot3([x_b_pair(1);x_b_pair(1)], [x_b_pair(2);x_b_pair(2)], [0, max([L_b_s;L_b_d])], '-g', 'LineWidth', 2);
plot3([xx(1);x_b_pair(1)], [xx(end);x_b_pair(2)], [L_b_s;L_b_s], '--g', 'LineWidth', 2);
plot3([xx(1);x_b_pair(1)], [xx(end);x_b_pair(2)], [L_b_d;L_b_d], '--g', 'LineWidth', 2);

xlabel('\it{x}\rm{_k}')
ylabel('\it{x}\rm{_q}')
zlabel('probability density')
box on

% legend
figure(21);
legend_text = {'relevant-population model' 'different-source model' 'same-source model' '\it{x}\rm_q and \it{x}\rm_k (typical)' '\it{x}\rm_q and \it{x}\rm_k (atypical)'};
legend(h, legend_text, 'Location','NorthOutside', 'FontSize',10);


LR_a_common = L_a_s / L_a_d;
LR_a_common

LR_b_common = L_b_s / L_b_d;
LR_b_common


%% similarity-score

% generate items from sources
num_sources = 1000;
num_items = 2;
num_items_total = num_items * num_sources;

mu_sources = normrnd(mu_bw, sigma_b, [num_sources, 1]);
x_items = normrnd(repmat(mu_sources, [1,num_items]), sigma_w);

% indices for different-source combinations
[ID_d1_mtrx,ID_d2_mtrx] = meshgrid(1:num_sources);
ID_d1 = nonzeros(tril(ID_d1_mtrx,-1));
ID_d2 = nonzeros(tril(ID_d2_mtrx,-1));

% scores
score_s = abs(x_items(:,1) - x_items(:,2));
score_d = abs(x_items(ID_d1, 1) - x_items(ID_d2, 2));

% score plot
figure(31);
h = NaN(5,0); % for legend
ss_plot = 0:0.05:plot_range;

% Half-normal distributions
M_s = fitdist(score_s, 'Half Normal');
M_d = fitdist(score_d, 'Half Normal');

yy_s = pdf(M_s, ss_plot);
yy_d = pdf(M_d, ss_plot);

h(3) = histogram(score_s, 'Normalization','pdf', 'FaceAlpha',0.2, 'LineStyle','none');
hold on
h(1) = histogram(score_d, 'Normalization','pdf', 'FaceAlpha',0.2, 'LineStyle','none');

h(2) = plot(ss_plot, yy_d, '-r', 'LineWidth', 1);
hold on
h(4) = plot(ss_plot, yy_s, '-b', 'LineWidth', 1);
xlim([0,ss_plot(end)])

ax3 = gca;
ax3.PlotBoxAspectRatio = [2 1 1];
xlabel('\delta(\it{x}\rm{_q},\it{x}\rm{_k})')
ylabel('probability density')

y_s_delta = pdf(M_s, delta);
y_d_delta = pdf(M_d, delta);

plot([delta delta], [0 max([y_d_delta y_s_delta])], '-g', 'LineWidth', 1);
plot([0 delta], [y_d_delta y_d_delta], '--g', 'LineWidth', 1);
plot([0 delta], [y_s_delta y_s_delta], '--g', 'LineWidth', 1);
h(5) = plot([delta delta], [y_d_delta y_s_delta], 'og', 'MarkerFaceColor', 'g', 'MarkerSize', 8);

grid on

% legend
legend_text = {'different-source histogram' 'different-source model' 'same-source histogram' 'same-source model' '\delta(\it{x}\rm{_q},\it{x}\rm{_k}) (typical & atypical)'};
legend(h, legend_text, 'Location','NorthOutside', 'FontSize',10);


LR_score = y_s_delta / y_d_delta;
LR_score

% % score to logLR mapping
% figure(32);
% plot([0 plot_range],[0 0], '-k', 'LineWidth', 0.5)
% hold on
% yy_lnLR = log10(yy_s) - log10(yy_d);
% plot(ss_plot, yy_lnLR, '-b', 'LineWidth', 1);
% xlabel('\delta(\it{x}\rm{_q},\it{x}\rm{_k})')
% ylabel('log_{10}(\Lambda)')
% grid on
% 
% xlim([0,plot_range])
% xlim([0,plot_range/3])

% % what are default colors in histograms?
% cm = colormap(lines);
% round(cm(1:2,:)*255)


%% rank-similarity scores

% % rank items and put in range 0 to 100 exclusive - empirical solution
% [x_items_sorted, ID_x_items_sorted] = sort(x_items(:));
% ranks = (1:num_items_total)/(num_items_total+1)*100;
% rank_items_vector(ID_x_items_sorted) = ranks;
% rank_items = reshape(rank_items_vector,num_sources,num_items);
% 
% figure(33);
% plot(x_items_sorted, ranks, '-b', 'LineWidth', 1);
% xlabel('\it{x}')
% ylabel('percentile rank')

% rank items and put in range 0 to 1 exclusive - parameter based solution
rank_items = normcdf(x_items, mu_bw, sigma_bw);

% rank scores
rank_s = abs(rank_items(:,1) - rank_items(:,2));
rank_d = abs(rank_items(ID_d1, 1) - rank_items(ID_d2, 2));

% rank plot
figure(34);
h = NaN(6,0); % for legend
ss_plot_rank = 0:0.005:1;

% beta distributions with a = 1
Beta_a1_pdf = @(x,b) ((1-x).^(b-1))/beta(1,b);
b_s = mle(rank_s, 'pdf', Beta_a1_pdf, 'start',1);
b_d = mle(rank_d, 'pdf', Beta_a1_pdf, 'start',1);

M_s_rank = makedist('Beta',1,b_s);
M_d_rank = makedist('Beta',1,b_d);

yy_s_rank = pdf(M_s_rank, ss_plot_rank);
yy_d_rank = pdf(M_d_rank, ss_plot_rank);

h(3) = histogram(rank_s, 'Normalization','pdf', 'FaceAlpha',0.2, 'LineStyle','none');
hold on
h(1) = histogram(rank_d, 'Normalization','pdf', 'FaceAlpha',0.2, 'LineStyle','none');

h(2) = plot(ss_plot_rank, yy_d_rank, '-r', 'LineWidth', 1);
h(4) = plot(ss_plot_rank, yy_s_rank, '-b', 'LineWidth', 1);
xlim([0,ss_plot_rank(end)])

ax3 = gca;
ax3.PlotBoxAspectRatio = [2 1 1];
xlabel('\delta(\rho_q,\rho_k)');
ylabel('probability density');
xticks(0:0.1:1);

% convert test values to ranks
% rank_ab_pair = interp1(x_items_sorted, ranks, [x_a_pair; x_b_pair], 'linear', 'extrap');
rank_ab_pair = normcdf([x_a_pair; x_b_pair], mu_bw, sigma_bw);
delta_ab_pair = abs(rank_ab_pair(:,1) - rank_ab_pair(:,2));

y_s_delta_rank = pdf(M_s_rank, delta_ab_pair);
y_d_delta_rank = pdf(M_d_rank, delta_ab_pair);

marker_face_color = {'g', 'none'};
for I_delta = 1:2
    plot([delta_ab_pair(I_delta) delta_ab_pair(I_delta)], [0 max([y_d_delta_rank(I_delta) y_s_delta_rank(I_delta)])], '-g', 'LineWidth', 1);
    plot([0 delta_ab_pair(I_delta)], [y_d_delta_rank(I_delta) y_d_delta_rank(I_delta)], '--g', 'LineWidth', 1);
    plot([0 delta_ab_pair(I_delta)], [y_s_delta_rank(I_delta) y_s_delta_rank(I_delta)], '--g', 'LineWidth', 1);
    h(4+I_delta) = plot([delta_ab_pair(I_delta) delta_ab_pair(I_delta)], [y_d_delta_rank(I_delta) y_s_delta_rank(I_delta)], 'og', 'MarkerFaceColor', marker_face_color{I_delta}, 'MarkerSize', 8, 'LineWidth', 1);
end

grid on

% legend
legend_text = {'different-source histogram' 'different-source model' 'same-source histogram' 'same-source model' '\delta(\rho_q,\rho_k) (typical)' '\delta(\rho_q,\rho_k) (atypical)'};
legend(h, legend_text, 'Location','NorthOutside', 'FontSize',10);


LR_rank = y_s_delta_rank ./ y_d_delta_rank;
LR_rank

% % score to logLR mapping
% figure(35);
% plot([0 plot_range],[0 0], '-k', 'LineWidth', 0.5)
% hold on
% yy_lnLR_rank = log10(yy_s_rank) - log10(yy_d_rank);
% plot(ss_plot_rank, yy_lnLR_rank, '-b', 'LineWidth', 1);
% xlim([0,ss_plot_rank(end)])
% xlabel('\delta(\rho_q,\rho_k)')
% ylabel('log_{10}(\Lambda)')
% grid on
% 
% xlim([0,ss_plot_rank(end)])
% xlim([0,0.4])


%% over a grid of xq and xk values

% common-source LRs based on population parameters
figure(41);

logLR_synth_common_pop = log10(ZZ_s) - log10(ZZ_d);

XX0 = -plot_range:10:plot_range;
[XX01,XX02] = meshgrid(XX0);
ZZ0 = zeros(size(XX02));

mesh(XX01, XX02, ZZ0, 'FaceColor', [.5 1 .5], 'FaceAlpha', .2, 'EdgeColor', 'g');
hold on
mesh(XX1, XX2, logLR_synth_common_pop, 'FaceColor', [.9 .9 .9], 'EdgeColor', 'k');
xlim([-plot_range plot_range])
ylim([-plot_range plot_range])
zlim([-3 3])
view(-22.5, 22.5)
xticks(-plot_range:10:plot_range)
yticks(-plot_range:10:plot_range)
xlabel('\it{x}\rm{_k}')
ylabel('\it{x}\rm{_q}')
zlabel('log_{10}(\Lambda)')
box on

% similarity-score LRs based on population parameters
figure(42);

delta_pop = abs(XX1(:) - XX2(:));

y_s_delta_pop = pdf(M_s, delta_pop);
y_s_delta_pop = reshape(y_s_delta_pop, [length_XX, length_XX]);
y_d_delta_pop = pdf(M_d, delta_pop);
y_d_delta_pop = reshape(y_d_delta_pop, [length_XX, length_XX]);
logLR_synth_delta_pop = log10(y_s_delta_pop) - log10(y_d_delta_pop);

mesh(XX01, XX02, ZZ0, 'FaceColor', [.5 1 .5], 'FaceAlpha', .2, 'EdgeColor', 'g');
hold on
mesh(XX1, XX2, logLR_synth_delta_pop, 'FaceColor', [.9 .9 .9], 'EdgeColor', 'k');
xlim([-plot_range plot_range])
ylim([-plot_range plot_range])
zlim([-3 3])
view(-22.5, 22.5)
xticks(-plot_range:10:plot_range)
yticks(-plot_range:10:plot_range)
xlabel('\it{x}\rm{_k}')
ylabel('\it{x}\rm{_q}')
zlabel('log_{10}(\Lambda)')
box on


% similarity-rank-score LRs based on population parameters
figure(43);

XX1_rank = normcdf(XX1, mu_bw, sigma_bw);
XX2_rank = normcdf(XX2, mu_bw, sigma_bw);

delta_pop_rank = abs(XX1_rank(:) - XX2_rank(:));

y_s_delta_pop_rank = pdf(M_s_rank, delta_pop_rank);
y_s_delta_pop_rank = reshape(y_s_delta_pop_rank, [length_XX, length_XX]);
y_d_delta_pop_rank = pdf(M_d_rank, delta_pop_rank);
y_d_delta_pop_rank = reshape(y_d_delta_pop_rank, [length_XX, length_XX]);
logLR_synth_delta_pop_rank = log10(y_s_delta_pop_rank) - log10(y_d_delta_pop_rank);

mesh(XX01, XX02, ZZ0, 'FaceColor', [.5 1 .5], 'FaceAlpha', .2, 'EdgeColor', 'g');
hold on
mesh(XX1, XX2, logLR_synth_delta_pop_rank, 'FaceColor', [.9 .9 .9], 'EdgeColor', 'k');
xlim([-plot_range plot_range])
ylim([-plot_range plot_range])
zlim([-3 3])
view(-22.5, 22.5)
xticks(-plot_range:10:plot_range)
yticks(-plot_range:10:plot_range)
xlabel('\it{x}\rm{_k}')
ylabel('\it{x}\rm{_q}')
zlabel('log_{10}(\Lambda)')
box on


% specific-source LRs based on population parameters
figure(45);

L_specific_w = normpdf(XX12(:,2), XX12(:,1), sigma_w); % note which XX12 column is x_q and which is mu_k (this model is not symmetrical)
L_specific_bw = normpdf(XX12(:,2), mu_bw, sigma_bw);
Z_specific = log10(L_specific_w) - log10(L_specific_bw);
ZZ_specific = reshape(Z_specific, [length_XX, length_XX]); 

mesh(XX01, XX02, ZZ0, 'FaceColor', [.5 1 .5], 'FaceAlpha', .2, 'EdgeColor', 'g');
hold on
mesh(XX1, XX2, ZZ_specific, 'FaceColor', [.9 .9 .9], 'EdgeColor', 'k');
xlim([-plot_range plot_range])
ylim([-plot_range plot_range])
zlim([-3 3])
view(-22.5, 22.5)
xticks(-plot_range:10:plot_range)
yticks(-plot_range:10:plot_range)
xlabel('\it{\mu}\rm{_k}')
ylabel('\it{x}\rm{_q}')
zlabel('log_{10}(\Lambda)')
box on


%% synthetic data

% test data
mu_sources_test = normrnd(mu_bw, sigma_b, [num_sources_test, 1]);
x_items_test = normrnd(repmat(mu_sources_test, [1,num_items_test]), sigma_w);

% indices for different-source combinations
[ID_d1_mtrx_test,ID_d2_mtrx_test] = meshgrid(1:num_sources_test);
ID_d1_test_all = nonzeros(tril(ID_d1_mtrx_test,-1));
ID_d2_test_all = nonzeros(tril(ID_d2_mtrx_test,-1));

% randomly pick the same number of items as for same-source
II_rand = randperm(length(ID_d1_test_all), num_sources_test);
ID_d1_test = ID_d1_test_all(II_rand);
ID_d2_test = ID_d2_test_all(II_rand);

x_items_test_combinations_d = [x_items_test(ID_d1_test,1), x_items_test(ID_d2_test,2)];

% true lnLR values given parameter values
L_num_synth_true_s = mvnpdf(x_items_test, mu_vector, cov_s);
L_dem_synth_true_s = mvnpdf(x_items_test, mu_vector, cov_d);
lnLR_synth_true_s = log(L_num_synth_true_s) - log(L_dem_synth_true_s);

L_num_synth_true_d = mvnpdf(x_items_test_combinations_d, mu_vector, cov_s);
L_dem_synth_true_d = mvnpdf(x_items_test_combinations_d, mu_vector, cov_d);
lnLR_synth_true_d = log(L_num_synth_true_d) - log(L_dem_synth_true_d);

Cllr_synth_true = cllr(lnLR_synth_true_s, lnLR_synth_true_d);

% test scores
score_test_s = abs(x_items_test(:,1) - x_items_test(:,2));
score_test_d = abs(x_items_test(ID_d1_test,1) - x_items_test(ID_d2_test,2));

% indices for training scores (there is a simpler way to do this, as used above, but this works so I have left it as is)
ID_combinations_train_s = table2array(combinations((1:num_items_train)', (1:num_items_train)'));
ID_combinations_s_w_train = (0:num_items_train:num_items_train^2-1) + (1:num_items_train); % same item indices
ID_combinations_train_s(ID_combinations_s_w_train, :) = [];
ID_combinations_train_s = sort(ID_combinations_train_s,2);
ID_combinations_train_s = unique(ID_combinations_train_s,'rows');

ID_combinations_s_b_train = (0:num_sources_train:num_sources_train^2-1) + (1:num_sources_train); % same-source indices
ID_combinations_train_d = table2array(combinations((1:num_sources_train)', (1:num_sources_train)'));
ID_combinations_train_d(ID_combinations_s_b_train, :) = [];
ID_combinations_train_d = sort(ID_combinations_train_d,2);
ID_combinations_train_d = unique(ID_combinations_train_d,'rows');

% prep for loop
Cllr_synth_common = NaN(num_sample_sets,1);
Cllr_synth_score = NaN(num_sample_sets,1);
Cllr_synth_score_rank = NaN(num_sample_sets,1);

lnLR_synth_common_s_saved = NaN(num_sources_test, num_sample_sets);
lnLR_synth_common_d_saved = NaN(num_sources_test, num_sample_sets);

lnLR_synth_score_s_saved = NaN(num_sources_test, num_sample_sets);
lnLR_synth_score_d_saved = NaN(num_sources_test, num_sample_sets);

lnLR_synth_score_s_saved_rank = NaN(num_sources_test, num_sample_sets);
lnLR_synth_score_d_saved_rank = NaN(num_sources_test, num_sample_sets);


% note: current code not set up to give same results each run if parallel processing is used

h_wait = waitbar(0, 'Monte Carlo samples');
for I_sample_set = 1:num_sample_sets
% h_wait = waitbar(0,'Monte Carlo samples on sythetic data');
% h_wait.UserData = [0 num_sample_sets];
% par_DataQueue = parallel.pool.DataQueue;
% afterEach(par_DataQueue, @(varargin) increment_waitbar(h_wait));
% parfor I_sample_set = 1:num_sample_sets
    % generate training samples
    mu_sources_train = normrnd(mu_bw, sigma_b, [num_sources_train, 1]);
    x_items_train = normrnd(repmat(mu_sources_train, [1,num_items_train]), sigma_w);

    % COMMON-SOURCE METHOD
    mu_hat_bw = mean(mu_sources_train);
    sigma2_hat_b = var(mu_sources_train);
    sigma2_hat_w = mean(var(x_items_train, 0, 2));
    sigma2_hat_bw = sigma2_hat_b + sigma2_hat_w;
    
    mu_hat_vector = [mu_hat_bw, mu_hat_bw];
    
    cov_hat_s = [sigma2_hat_bw, sigma2_hat_b;...
                 sigma2_hat_b,  sigma2_hat_bw];
    
    cov_hat_d = [sigma2_hat_bw, 0;...
                 0,             sigma2_hat_bw];
    
    L_num_synth_common_s = mvnpdf(x_items_test, mu_hat_vector, cov_hat_s);
    L_dem_synth_common_s = mvnpdf(x_items_test, mu_hat_vector, cov_hat_d);
    lnLR_synth_common_s = log(L_num_synth_common_s) - log(L_dem_synth_common_s);

    L_num_synth_common_d = mvnpdf(x_items_test_combinations_d, mu_hat_vector, cov_hat_s);
    L_dem_synth_common_d = mvnpdf(x_items_test_combinations_d, mu_hat_vector, cov_hat_d);
    lnLR_synth_common_d = log(L_num_synth_common_d) - log(L_dem_synth_common_d);
    
    % save common-source results
    lnLR_synth_common_s_saved(:,I_sample_set) = lnLR_synth_common_s;
    lnLR_synth_common_d_saved(:,I_sample_set) = lnLR_synth_common_d;

    Cllr_synth_common(I_sample_set) = cllr(lnLR_synth_common_s, lnLR_synth_common_d);
    
    % SIMILARITY-SCORE METHOD
    score_train_s = abs(x_items_train(:, ID_combinations_train_s(:,1)) - x_items_train(:, ID_combinations_train_s(:,2)));
    score_train_d = abs(x_items_train(ID_combinations_train_d(:,1), :) - x_items_train(ID_combinations_train_d(:,2), :));
    
    % distributions
    M_synth_s = fitdist(score_train_s(:), 'Half Normal');
    M_synth_d = fitdist(score_train_d(:), 'Half Normal');

    L_num_synth_score_s = pdf(M_synth_s, score_test_s);
    L_dem_synth_score_s = pdf(M_synth_d, score_test_s);
    lnLR_synth_score_s = log(L_num_synth_score_s) - log(L_dem_synth_score_s);
    
    L_num_synth_score_d = pdf(M_synth_s, score_test_d);
    L_dem_synth_score_d = pdf(M_synth_d, score_test_d);
    lnLR_synth_score_d = log(L_num_synth_score_d) - log(L_dem_synth_score_d);

    % save similiarity-score results
    lnLR_synth_score_s_saved(:,I_sample_set) = lnLR_synth_score_s;
    lnLR_synth_score_d_saved(:,I_sample_set) = lnLR_synth_score_d;
    
    Cllr_synth_score(I_sample_set) = cllr(lnLR_synth_score_s, lnLR_synth_score_d);

    % RANK-SIMILARITY METHOD
    M_kernel = fitdist(x_items_train(:), 'Kernel');
    x_items_train_rank = cdf(M_kernel, x_items_train);
    x_items_test_rank = cdf(M_kernel, x_items_test);
    x_items_test_combinations_d_rank = cdf(M_kernel, x_items_test_combinations_d);

    score_train_s_rank = abs(x_items_train_rank(:, ID_combinations_train_s(:,1)) - x_items_train_rank(:, ID_combinations_train_s(:,2)));
    score_train_d_rank = abs(x_items_train_rank(ID_combinations_train_d(:,1), :) - x_items_train_rank(ID_combinations_train_d(:,2), :));
    score_test_s_rank = abs(x_items_test_rank(:,1) - x_items_test_rank(:,2));
    score_test_d_rank = abs(x_items_test_combinations_d_rank(:,1) - x_items_test_combinations_d_rank(:,2));
    
    % beta distributions with a = 1
    b_synth_s = mle(score_train_s_rank(:), 'pdf', Beta_a1_pdf, 'start',1);
    b_synth_d = mle(score_train_d_rank(:), 'pdf', Beta_a1_pdf, 'start',1);
    
    M_synth_s_rank = makedist('Beta',1,b_synth_s);
    M_synth_d_rank = makedist('Beta',1,b_synth_d);
    
    L_num_synth_score_s_rank = pdf(M_synth_s_rank, score_test_s_rank);
    L_dem_synth_score_s_rank = pdf(M_synth_d_rank, score_test_s_rank);
    lnLR_synth_score_s_rank = log(L_num_synth_score_s_rank) - log(L_dem_synth_score_s_rank);

    L_num_synth_score_d_rank = pdf(M_synth_s_rank, score_test_d_rank);
    L_dem_synth_score_d_rank = pdf(M_synth_d_rank, score_test_d_rank);
    lnLR_synth_score_d_rank = log(L_num_synth_score_d_rank) - log(L_dem_synth_score_d_rank);

    % save rank-similarity results
    lnLR_synth_score_s_saved_rank(:,I_sample_set) = lnLR_synth_score_s_rank;
    lnLR_synth_score_d_saved_rank(:,I_sample_set) = lnLR_synth_score_d_rank;
    
    Cllr_synth_score_rank(I_sample_set) = cllr(lnLR_synth_score_s_rank, lnLR_synth_score_d_rank);


    waitbar(I_sample_set/num_sample_sets, h_wait);
    % send(par_DataQueue, I_sample_set);
end
close(h_wait);

% violin plots
figure(51);
violin([Cllr_synth_common, Cllr_synth_score, Cllr_synth_score_rank], 'xlabel',{'common','similarity','rank'}, 'facecolor',[0.9 0.9 0.9], 'mc',[], 'plotlegend',false, 'same_area',true);
ylim([0 1]);
yticks(0:0.1:1);
ylabel('\itC\rm_{llr}');

lim_x = get(gca,'XLim');
plot(lim_x, [Cllr_synth_true Cllr_synth_true], '-k')

% zoom in on the y axis
ylim([0.52 0.63]);
yticks(0.52:0.01:0.63);

% dummy plots for legend
h = NaN(3,0);
h(1) = plot(NaN, NaN, '-k', 'LineWidth',0.5);
h(2) = plot(NaN, NaN, '--k', 'LineWidth',1);
h(3) = plot(NaN, NaN, ':k', 'LineWidth',1);
legend_text = {'\itC\rm_{llr} value for "true" \Lambda values', 'median', 'quartile'};
legend(h, legend_text, 'Location','NorthWest', 'FontSize',10);


% Tippett plots for selected sample set
selected_set = 1;

max_plot = 3.5; % x-axis range in Tippett plots constrined to [-max_plot max_plot]

lnLR_synth_common_d_plot = lnLR_synth_common_d_saved(:,selected_set);
lnLR_synth_common_s_plot = lnLR_synth_common_s_saved(:,selected_set);
lnLR_synth_score_d_plot = lnLR_synth_score_d_saved(:,selected_set);
lnLR_synth_score_s_plot = lnLR_synth_score_s_saved(:,selected_set);
lnLR_synth_rank_d_plot = lnLR_synth_score_d_saved_rank(:,selected_set);
lnLR_synth_rank_s_plot = lnLR_synth_score_s_saved_rank(:,selected_set);

h_fig_53 = figure(53);
plot_tippett(exp(lnLR_synth_common_s_plot), [], exp(lnLR_synth_common_d_plot), [], [], false, '-', true, [], [], false);
plot_tippett(exp(lnLR_synth_score_s_plot), [], exp(lnLR_synth_score_d_plot), [], [], false, '--', false, [], [], false);
plot_tippett(exp(lnLR_synth_rank_s_plot), [], exp(lnLR_synth_rank_d_plot), [], [], false, ':', false, [], [], false);
xlim([-max_plot max_plot]);
box on
h_fig_53.Position(3) = h_fig_53.Position(3)*1.5;


% for selected sample set, apply bi-Gaussianized calibration and draw Tippett plot
[~, sigma2_target_true, lnLR_synth_true_biGauss_s, lnLR_synth_true_biGauss_d] = ...
biGaussianized_calibration ...
    ([], lnLR_synth_true_s, lnLR_synth_true_d, Cllr_synth_true);

[~, sigma2_target_common, lnLR_synth_common_biGauss_s_plot, lnLR_synth_common_biGauss_d_plot] = ...
biGaussianized_calibration ...
    ([], lnLR_synth_common_s_plot, lnLR_synth_common_d_plot, Cllr_synth_common(selected_set));

[~, sigma2_target_score, lnLR_synth_score_biGauss_s_plot, lnLR_synth_score_biGauss_d_plot] = ...
biGaussianized_calibration ...
    ([], lnLR_synth_score_s_plot, lnLR_synth_score_d_plot, Cllr_synth_score(selected_set));

[~, sigma2_target_rank, lnLR_synth_rank_biGauss_s_plot, lnLR_synth_rank_biGauss_d_plot] = ...
biGaussianized_calibration ...
    ([], lnLR_synth_rank_s_plot, lnLR_synth_rank_d_plot, Cllr_synth_score_rank(selected_set));

h_fig_54 = figure(54);
plot_tippett(exp(lnLR_synth_common_biGauss_s_plot), [], exp(lnLR_synth_common_biGauss_d_plot), [], [], false, '-', true, [], [], false);
plot_tippett(exp(lnLR_synth_score_biGauss_s_plot), [], exp(lnLR_synth_score_biGauss_d_plot), [], [], false, '--', false, [], [], false);
plot_tippett(exp(lnLR_synth_rank_biGauss_s_plot), [], exp(lnLR_synth_rank_biGauss_d_plot), [], [], false, ':', false, [], [], false);
xlim([-max_plot max_plot]);
box on
h_fig_54.Position(3) = h_fig_54.Position(3)*1.5;


% plot calculated logLR versus true logLR

figure(60);
plot([-max_plot max_plot], [-max_plot max_plot], '-k', 'LineWidth',1);
hold on
scatter(lnLR_synth_true_d/log(10), lnLR_synth_common_d_plot/log(10), 10, 'or', 'filled');
scatter(lnLR_synth_true_s/log(10), lnLR_synth_common_s_plot/log(10), 10, 'ob', 'filled');
alpha(0.1)
axis square
grid on
xlabel('true log_{10}(\Lambda)')
ylabel('common-source log_{10}(\Lambda)')
xlim([-max_plot max_plot]);
ylim([-max_plot max_plot]);
xticks(yticks);
title('LogReg');

figure(61);
plot([-max_plot max_plot], [-max_plot max_plot], '-k', 'LineWidth',1);
hold on
scatter(lnLR_synth_true_biGauss_d/log(10), lnLR_synth_common_biGauss_d_plot/log(10), 10, 'or', 'filled');
scatter(lnLR_synth_true_biGauss_s/log(10), lnLR_synth_common_biGauss_s_plot/log(10), 10, 'ob', 'filled');
alpha(0.1)
axis square
grid on
xlabel('true log_{10}(\Lambda)')
ylabel('common-source log_{10}(\Lambda)')
xlim([-max_plot max_plot]);
ylim([-max_plot max_plot]);
xticks(yticks);
title('biGauss');


figure(62);
plot([-max_plot max_plot], [-max_plot max_plot], '-k', 'LineWidth',1);
hold on
scatter(lnLR_synth_true_d/log(10), lnLR_synth_score_d_plot/log(10), 10, 'or', 'filled');
scatter(lnLR_synth_true_s/log(10), lnLR_synth_score_s_plot/log(10), 10, 'ob', 'filled');
alpha(0.1)
axis square
grid on
xlabel('true log_{10}(\Lambda)')
ylabel('similarity-score log_{10}(\Lambda)')
xlim([-max_plot max_plot]);
ylim([-max_plot max_plot]);
xticks(yticks);
title('LogReg');

figure(63);
plot([-max_plot max_plot], [-max_plot max_plot], '-k', 'LineWidth',1);
hold on
scatter(lnLR_synth_true_biGauss_d/log(10), lnLR_synth_score_biGauss_d_plot/log(10), 10, 'or', 'filled');
scatter(lnLR_synth_true_biGauss_s/log(10), lnLR_synth_score_biGauss_s_plot/log(10), 10, 'ob', 'filled');
alpha(0.1)
axis square
grid on
xlabel('true log_{10}(\Lambda)')
ylabel('similarity-score log_{10}(\Lambda)')
xlim([-max_plot max_plot]);
ylim([-max_plot max_plot]);
xticks(yticks);
title('biGauss');


figure(64);
plot([-max_plot max_plot], [-max_plot max_plot], '-k', 'LineWidth',1);
hold on
scatter(lnLR_synth_true_d/log(10), lnLR_synth_rank_d_plot/log(10), 10, 'or', 'filled');
scatter(lnLR_synth_true_s/log(10), lnLR_synth_rank_s_plot/log(10), 10, 'ob', 'filled');
alpha(0.1)
axis square
grid on
xlabel('true log_{10}(\Lambda)')
ylabel('rank-similarity log_{10}(\Lambda)')
xlim([-max_plot max_plot]);
ylim([-max_plot max_plot]);
xticks(yticks);
title('LogReg');

figure(65);
plot([-max_plot max_plot], [-max_plot max_plot], '-k', 'LineWidth',1);
hold on
scatter(lnLR_synth_true_biGauss_d/log(10), lnLR_synth_rank_biGauss_d_plot/log(10), 10, 'or', 'filled');
scatter(lnLR_synth_true_biGauss_s/log(10), lnLR_synth_rank_biGauss_s_plot/log(10), 10, 'ob', 'filled');
alpha(0.1)
axis square
grid on
xlabel('true log_{10}(\Lambda)')
ylabel('rank-similarity log_{10}(\Lambda)')
xlim([-max_plot max_plot]);
ylim([-max_plot max_plot]);
xticks(yticks);
title('biGauss');



Cllr_synth_true

Cllr_synth_common_median = median(Cllr_synth_common);
Cllr_synth_score_median = median(Cllr_synth_score);
Cllr_synth_score_median_rank = median(Cllr_synth_score_rank);
Cllr_synth_common_median
Cllr_synth_score_median
Cllr_synth_score_median_rank

Cllr_synth_common_Tippet = Cllr_synth_common(selected_set);
Cllr_synth_score_Tippett = Cllr_synth_score(selected_set);
Cllr_synth_score_Tippett_rank = Cllr_synth_score_rank(selected_set);
Cllr_synth_common_Tippet
Cllr_synth_score_Tippett
Cllr_synth_score_Tippett_rank


%% cleanup
% rmpath('./functions/');