Last active
December 27, 2017 13:04
-
-
Save keckelt/f268ad0d99217c8a77fc7edd24ede461 to your computer and use it in GitHub Desktop.
Split normal distributions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
close all | |
%load test set: | |
%load('female.mat') | |
%load('male.mat') | |
% get normaly distributed age | |
female = floor(normrnd(66,8,1,100)); | |
male = floor(normrnd(58,5,1,80)); | |
% count each age. shortens array to te respective maximal age | |
% first element is age 1, last element is max(female) / max(male) | |
binnedFemale = accumarray(female(:),1); | |
binnedMale = accumarray(male(:),1); | |
first = min(min(female),min(male))-1; %lowest age of male & female (used to trim charts to relevent section) | |
last = max(max(female), max(male))+1; % highest age of male & female (used to make array lengths equal) | |
% make length equal by padding with zeros | |
binnedFemale(length(binnedFemale)+1:last) = 0; | |
binnedMale(length(binnedMale)+1:last) = 0; | |
% remove leading zeros (indexes below minimal age) | |
binnedFemale = binnedFemale(first:last); | |
binnedMale = binnedMale(first:last); | |
x = first:last; % x axis | |
% aggregated histogram (for age >= x) | |
aggHistFemale = cumsum(binnedFemale); % sum up | |
aggHistMale = cumsum(binnedMale); | |
% aggregated histogram (for age < = x) | |
aggHistFemale2 = cumsum(flipud(binnedFemale)); %reverse vector to sum up from end to start | |
aggHistMale2 = cumsum(flipud(binnedMale)); | |
femaleJaccards1 = aggHistFemale./(max(aggHistFemale)+aggHistMale); | |
femaleJaccards2 = flipud(aggHistFemale2./(max(aggHistFemale2)+aggHistMale2)); %reverse result vector back to first:last orderig | |
[femaleJaccard1, femaleSplitIndex1] = max(femaleJaccards1); | |
[femaleJaccard2, femaleSplitIndex2] = max(femaleJaccards2); | |
if (femaleJaccard1 >= femaleJaccard2) | |
femaleJaccard = femaleJaccard1; | |
femaleStart = first; % from first | |
femaleSplit = x(femaleSplitIndex1); % to split | |
else | |
femaleJaccard = femaleJaccard2; | |
femaleStart = last; % from last | |
femaleSplit = x(femaleSplitIndex2); %to split | |
end | |
X = sprintf('Female: Jaccard Score of %f with region from %d to %d.', femaleJaccard, femaleStart, femaleSplit); | |
disp(X) | |
maleJaccards1 = aggHistMale./(max(aggHistMale)+aggHistFemale); | |
maleJaccards2 = flipud(aggHistMale2./(max(aggHistMale2)+aggHistFemale2)); %reverse result vector back to first:last orderig | |
[maleJaccard1, maleSplitIndex1] = max(maleJaccards1); | |
[maleJaccard2, maleSplitIndex2] = max(maleJaccards2); | |
if (maleJaccard1 >= maleJaccard2) | |
maleJaccard = maleJaccard1; | |
maleStart = first; % from first | |
maleSplit = x(maleSplitIndex1); %to split | |
else | |
maleJaccard = maleJaccard2; | |
maleStart = last; % from last | |
maleSplit = x(maleSplitIndex2); %to split | |
end | |
X = sprintf('Male: Jaccard Score of %f with region from %d to %d.', maleJaccard, maleStart, maleSplit); | |
disp(X) | |
%plot histograms: | |
figure('pos',[0 500 900 500]) | |
subplot(2,1,1) | |
bar(x, binnedFemale,'g') | |
title('female') | |
%line([femaleSplit femaleSplit], [0 max(binnedFemale)], 'Color','green','LineStyle',':'); | |
line([maleSplit maleSplit], [0 max(binnedFemale)], 'Color','blue','LineStyle',':'); | |
patch('vertices', [femaleStart, 0; femaleSplit, 0; femaleSplit, max(binnedFemale); femaleStart, max(binnedFemale)], ... | |
'faces', [1, 2, 3, 4], ... | |
'FaceColor', 'g', ... | |
'FaceAlpha', 0.15); | |
subplot(2,1,2) | |
bar(x, binnedMale, 'b') | |
title('male') | |
%line([maleSplit maleSplit], [0 max(binnedMale)], 'Color','blue','LineStyle',':'); | |
line([femaleSplit femaleSplit], [0 max(binnedMale)], 'Color','green','LineStyle',':'); | |
patch('vertices', [maleStart, 0; maleSplit, 0; maleSplit, max(binnedMale); maleStart, max(binnedMale)], ... | |
'faces', [1, 2, 3, 4], ... | |
'FaceColor', 'b', ... | |
'FaceAlpha', 0.15); | |
figure('pos',[0 0 900 450]) | |
subplot(2,1,1) | |
plot(x, aggHistFemale, 'g', x, aggHistMale, 'b') | |
title('aggregated histograms (age increasing)') | |
line([femaleSplit femaleSplit], [0 max(max(aggHistFemale), max(aggHistMale))], 'Color','green','LineStyle',':'); | |
line([maleSplit maleSplit], [0 max(max(aggHistFemale), max(aggHistMale))], 'Color','blue','LineStyle',':'); | |
subplot(2,1,2) | |
plot(x, aggHistFemale2, 'g', x, aggHistMale2, 'b') | |
set(gca, 'xdir', 'reverse') | |
title('aggregated histograms (age decreasing)') | |
line([femaleSplit femaleSplit], [0 max(max(aggHistFemale), max(aggHistMale))], 'Color','green','LineStyle',':'); | |
line([maleSplit maleSplit], [0 max(max(aggHistFemale), max(aggHistMale))], 'Color','blue','LineStyle',':'); | |
figure('pos',[910 0 1000 1000]) | |
diffMin = 0; | |
subplot(2,1,1) | |
diffMax = max([max(femaleJaccards1), max(maleJaccards1)]); | |
plot(x, femaleJaccards1, 'g', x, maleJaccards1, 'b') | |
title('jaccard scores "Age <= x"') | |
if (femaleJaccard1 >= femaleJaccard2) | |
line([x(femaleSplitIndex1) x(femaleSplitIndex1)], [diffMin diffMax], 'Color','green','LineStyle',':'); | |
end | |
if (maleJaccard1 >= maleJaccard2) | |
line([x(maleSplitIndex1) x(maleSplitIndex1)], [diffMin diffMax], 'Color','blue','LineStyle',':'); | |
end | |
subplot(2,1,2) | |
diffMax = max([max(femaleJaccards2), max(maleJaccards2)]); | |
plot(x, femaleJaccards2, 'g', x, maleJaccards2, 'b') | |
set ( gca, 'xdir', 'reverse' ) | |
title('jaccard scores "Age >= x"') | |
if (femaleJaccard2 > femaleJaccard1) | |
line([x(femaleSplitIndex2) x(femaleSplitIndex2)], [diffMin diffMax], 'Color','green','LineStyle',':'); | |
end | |
if (maleJaccard2 >= maleJaccard1) | |
line([x(maleSplitIndex2) x(maleSplitIndex2)], [diffMin diffMax], 'Color','blue','LineStyle',':'); | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment