Commit b4fd2255 authored by Jens Behrmann's avatar Jens Behrmann

ADD: CNN Paper Code

parents
%% Script to analyze a trained CNN model
% Steps:
% 1.) Specification of dataset
% 2.) Specification of results file
% 3.) Show performance ranking (in order to select best model)
% 4.) Compute sensitivity of training dataset
%% Specification of dataset
% TODO: change to loading a ready mat-file
[data, partition, kFold] = setup_dataADvsSQ();
%% Specification of results file
load('resultsIsotopeNet-ADSQ.mat')
results = resultsADSQ;
%% Show performance ranking (in order to select best model)
results.performanceRanking(@(x) x.avgSensitivity)
%% Compute sensitivity of training dataset
% select best CNN model from several training run by using performance
% ranking
indexes = 1;
[cnnModels, ~] = results.findObject(indexes, 'model', 'cv');
% select from selected training run the model trained on the last
% cross-validation fold
cnnModel = cnnModels{1}{end};
% normalization of data (like it was used for training)
data.setNormalization('tic')
% Selected training Labels
trainingLabels = copy(partition.classes);
trainingLabels.data(kFold(:,end) == 2) = 0;
trainingLabels.data(kFold(:,end) == 0) = 0;
% Compute sensitivity on training data
scaledSensBoth = cnnModel.sensitivityAnalysis(...
data, trainingLabels, 'perClass');
% Select first class
scaledSens = squeeze(scaledSensBoth(1,:,:));
% Compute mean sensitivity over dataset
meanTrainingSensitivity = mean(scaledSens);
% Plotting of sensitivity
figure;
plot(data.mzVector, meanTrainingSensitivity)
%% Script to evaluate results of classification model:
% 1.) Evaluation of median balanced accuracy over cross-validation
% 2.) Preparation for box-plots over variances of several runs of CNNs
% 3.) Generation of boxplot
%% Median Balanced Accuracy and boxplot preparation for ROC/LDA results
results = resultsADSQ;
% get table with balanced accuracies for all cv-runs
numCVs = size(kFold,2);
balAccTable = zeros(numCVs, 20);
for i = 1: numCVs
cellResults = results.performance(1, i);
balAccTable(i,:) = cellResults{1}.avgSensitivity(1:20);
end
% compute median per number of features
medianBalAccADSQ = median(balAccTable);
[max1, arg1] = max(maxCVaccADSQ);
maxCVaccADSQ = balAccTable(:,arg1);
results = resultsLP;
% get table with balanced accuracies for all cv-runs
numCVs = size(kFold,2);
balAccTable = zeros(numCVs, 20);
for i = 1: numCVs
cellResults = results.performance(1, i);
balAccTable(i,:) = cellResults{1}.avgSensitivity(1:20);
end
% compute median per number of features
medianBalAccLP = median(balAccTable);
[max2, arg2] = max(medianBalAccLP);
maxCVaccLP = balAccTable(:,arg2);
concatACC = [maxCVaccADSQ, maxCVaccLP];
boxplot(concatACC)
%% Evaluate results on Task ADSQ for CNN-model
% (either IsotopeNet or ResidualNet)
% Specify folder with save results from IsotopeNet or ResidualNet
load('resultsIsotopeNet-ADSQ.mat.mat')
numCVruns = size(kFold, 2);
accCV = zeros(numCVruns, 4);
medianACCcv = zeros(numCVruns,1);
for indexes = 1:4
[cnnModels, ~] = resultsADSQ.findObject(indexes, 'model', 'cv');
% get performance for each cv-run
for i = 1:numCVruns
accCV(i, indexes) = resultsADSQ.performance(indexes,i,'once').avgSensitivity;
end
medianACCcv(indexes) = median(accCV(:, indexes));
end
[~, arg] = max(medianACCcv);
maxCVaccADSQiso = accCV(:, arg);
aggregateResults = resultsADSQ.aggregate(resultsADSQ.dataPartition{1}.individuals);
numCVruns = size(kFold, 2);
accCV = zeros(numCVruns, 4);
medianACCcv = zeros(numCVruns,1);
for indexes = 1:4
[cnnModels, ~] = aggregateResults.findObject(indexes, 'model', 'cv');
% get performance for each cv-run
for i = 1:numCVruns
accCV(i, indexes) = aggregateResults.performance(indexes,i,'once').avgSensitivity;
end
medianACCcv(indexes) = median(accCV(:, indexes));
end
%% Evaluate results on Task LP for CNN-model
% (either IsotopeNet or ResidualNet)
% Specify folder with save results from IsotopeNet or ResidualNet
load('resultsIsotopeNet-LP.mat')
numCVruns = size(kFold, 2);
accCV = zeros(numCVruns, 4);
medianACCcv = zeros(numCVruns,1);
for indexes = 1:4
[cnnModels, ~] = resultsLP.findObject(indexes, 'model', 'cv');
% get performance for each cv-run
for i = 1:numCVruns
accCV(i, indexes) = resultsLP.performance(indexes,i,'once').avgSensitivity;
end
medianACCcv(indexes) = median(accCV(:, indexes));
end
[~, arg] = max(medianACCcv);
maxCVaccLPiso = accCV(:, arg);
aggregateResults = resultsLP.aggregate(resultsLP.dataPartition{1}.individuals);
numCVruns = size(kFold, 2);
accCV = zeros(numCVruns, 4);
medianACCcv = zeros(numCVruns,1);
for indexes = 1:4
[cnnModels, ~] = aggregateResults.findObject(indexes, 'model', 'cv');
% get performance for each cv-run
for i = 1:numCVruns
accCV(i, indexes) = aggregateResults.performance(indexes,i,'once').avgSensitivity;
end
medianACCcv(indexes) = median(accCV(:, indexes));
end
%% Generate boxplot
% Note that results for ROC/LDA can be generated by the first part of this
% script. Results for IsotopeNet and ResidualNet can be generated by the
% second part of this script by exchanging the loading .mat-file and
% changing variable names
maxCVaccADSQrepeat = [maxCVaccADSQ; maxCVaccADSQ; maxCVaccADSQ;maxCVaccADSQ];
maxCVaccLPrepeat = [maxCVaccLP; maxCVaccLP; maxCVaccLP;maxCVaccLP];
concatACC = [maxCVaccADSQrepeat, accCVresADSQ(:), accCVisoADSQ(:), ...
maxCVaccLPrepeat, accCVresLP(:), accCVisoLP(:)];
labels = {'ADSQ / ROC', 'ADSQ / ReslNet', 'ADSQ / IsoNet', 'LP / ROC', 'LP / ResNet', 'LP / IsoNet' };
boxplot(concatACC, 'Labels', labels, 'LabelOrientation', 'inline')
%% Script to train an IsotopeNet on Task ADSQ and Task LP
% Steps:
% 1.) Specification of dataset
% (Note: Path has to be adjusted)
% 2.) Specification: CNN-architecture file and training parameters
% (Note: Path has to be adjusted)
% 3.) Training on Task ADSQ
% 4.) Training on Task LP
%% Specification of dataset for Task ADSQ
% Change path to saving folder of dataset
load('../DataTaskADSQ.mat')
%% Specification: CNN-architecture file and training parameters (Task ADSQ)
% Note: Set path to classifcation folder of MSClassifyLib
lC1 = '../MSClassifyLib/Classification/NNLocallyConnected.py';
% Create Tables:
% Create preprocessing table
tPP.string={'tic'};
tPP.object={MSNormalizationTrigger('tic')};
% Create feature extraction tables
tFE.string={'noFE'};
tFE.object={MSNeuralNetFeatureExtraction};
% Create map modifier table
tMM.string={'noMM'};
tMM.object={MSDummyMapModifier};
% Create classifier table
tCM.string={'lC1','lC2','lC3','lC4'};
tCM.object={...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',256,...
'epochs',300,'useTimeStamp',1,'learningRate',.0005,...
'l2',.05,'neuralNetArchitecture',lC1,'neuralNetFile','LP_lC'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',256,...
'epochs',300,'useTimeStamp',1,'learningRate',.0005,...
'l2',.05,'neuralNetArchitecture',lC1,'neuralNetFile','LP_lC'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',256,...
'epochs',300,'useTimeStamp',1,'learningRate',.0005,...
'l2',.05,'neuralNetArchitecture',lC1,'neuralNetFile','LP_lC'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',256,...
'epochs',300,'useTimeStamp',1,'learningRate',.0005,...
'l2',.05,'neuralNetArchitecture',lC1,'neuralNetFile','LP_lC')};
%% Training on Task ADSQ
% Create classification task:
ctADSQ = MSClassificationTask(tPP,tFE,tMM,tCM, trainingData.dataLength);
resultsADSQ = ctADSQ.apply(trainingData, trainingPartition,kFold,...
'trainAllData',false,...
'saveMaps',true,'saveModels',true,...
'saveFeatureData',true,'predictionType', 'labels');
save('resultsIsotopeNet-ADSQ.mat', 'resultsADSQ', '-v7.3')
%% clear data for task ADSQ
clear trainingData trainingPartition kFold
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Specification of dataset for Task LP
% Change path to saving folder of dataset
load('../DataTaskLP.mat')
%% Specification: CNN-architecture file and training parameters (Task LP)
% Note: Set path to classifcation folder of MSClassifyLib
lC1 = '../MSClassifyLib/Classification/NNLocallyConnected.py';
% Create Tables:
% Create preprocessing table
tPP.string={'tic'};
tPP.object={MSNormalizationTrigger('tic')};
% Create feature extraction tables
tFE.string={'noFE'};
tFE.object={MSNeuralNetFeatureExtraction};
% Create map modifier table
tMM.string={'noMM'};
tMM.object={MSDummyMapModifier};
% Create classifier table
tCM.string={'lC1','lC2','lC3','lC4'};
tCM.object={...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',256,...
'epochs',30,'useTimeStamp',1,'learningRate',.0005,...
'l2',.01,'neuralNetArchitecture',lC1,'neuralNetFile','ADSQ_lC'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',256,...
'epochs',30,'useTimeStamp',1,'learningRate',.0005,...
'l2',.01,'neuralNetArchitecture',lC1,'neuralNetFile','ADSQ_lC'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',256,...
'epochs',30,'useTimeStamp',1,'learningRate',.0005,...
'l2',.01,'neuralNetArchitecture',lC1,'neuralNetFile','ADSQ_lC'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',256,...
'epochs',30,'useTimeStamp',1,'learningRate',.0005,...
'l2',.01,'neuralNetArchitecture',lC1,'neuralNetFile','ADSQ_lC')};
%% Training on Task LP
% Create classification task:
ctLP = MSClassificationTask(tPP,tFE,tMM,tCM, trainingData.dataLength);
resultsLP = ctLP.apply(trainingData, trainingPartition,kFold, ...
'trainAllData',false,...
'saveMaps',true,'saveModels',true,...
'saveFeatureData',true,'predictionType', 'labels');
save('resultsIsotopeNet-LP.mat', 'resultsLP', '-v7.3')
%% Script to train a ROC/LDA classification model on Task ADSQ and Task LP
% Steps:
% 1.) Specification of dataset
% 2.) Define number of features and processing steps
% 3.) Training on Task ADSQ
% 4.) Training on Task LP
%% Specification of dataset for Task ADSQ
% Change path to saving folder of dataset
load('../DataTaskADSQ.mat')
%% Define number of features and processing steps
numFeatures = 5:5:400;
maxNumFeatures = max(numFeatures);
% Create Tables:
% Create preprocessing table
tPP.string={'tic'};
tPP.object={MSNormalizationTrigger('tic')};
% Create feature extraction tables
tFE.string={'roc'};
tFE.object={MSROCPicking(maxNumFeatures)};
% Create map modifier table
tMM.string={'noMM'};
tMM.object={MSDummyMapModifier};
% Create classifier table
tCM.string={'lda'};
tCM.object={MSLDAClassifier};
%%
% Create classification task:
ctADSQ = MSClassificationTask(tPP,tFE,tMM,tCM, numFeatures);
resultsADSQ = ctADSQ.apply(trainingData, trainingPartition,kFold,'trainAllData',true,...
'saveMaps',true,'saveModels',true,...
'saveFeatureData',true,'predictionType', 'labels');
%% clear stuff
clear trainingData trainingPartition kFold
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Specification of dataset for Task LP
% Change path to saving folder of dataset
load('../DataTaskLP.mat')
%%
% Create classification task:
ctLP = MSClassificationTask(tPP,tFE,tMM,tCM, numFeatures);
resultsLP = ctLP.apply(trainingData, trainingPartition,kFold,'trainAllData',true,...
'saveMaps',true,'saveModels',true,...
'saveFeatureData',true,'predictionType', 'labels');
%% Script to train an ResidualNet on Task ADSQ and Task LP
% Steps:
% 1.) Specification of dataset
% 2.) Specification: CNN-architecture file and training parameters
% 3.) Training on Task ADSQ
% 4.) Training on Task LP
%% Specification of dataset for Task ADSQ
% Change path to saving folder of dataset
load('../DataTaskADSQ.mat')
%% Specification: CNN-architecture file and training parameters (Task ADSQ)
% Note: Set path to classifcation folder of MSClassifyLib
resnet = '../NNResidualArchitecture.py';
% Create Tables:
% Create preprocessing table
tPP.string={'tic'};
tPP.object={MSNormalizationTrigger('tic')};
% Create feature extraction tables
tFE.string={'noFE'};
tFE.object={MSNeuralNetFeatureExtraction};
% Create map modifier table
tMM.string={'noMM'};
tMM.object={MSDummyMapModifier};
% Create classifier table
tCM.string={'resnet1','resnet2','resnet3','resnet4'};
tCM.object={...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',64,...
'epochs',2,'useTimeStamp',1,'learningRate',.0005,...
'l2',.05,'neuralNetArchitecture',resnet,'neuralNetFile','ADSQ_resnet'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',64,...
'epochs',2,'useTimeStamp',1,'learningRate',.0005,...
'l2',.05,'neuralNetArchitecture',resnet,'neuralNetFile','ADSQ_resnet'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',64,...
'epochs',2,'useTimeStamp',1,'learningRate',.0005,...
'l2',.05,'neuralNetArchitecture',resnet,'neuralNetFile','ADSQ_resnet'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',64,...
'epochs',2,'useTimeStamp',1,'learningRate',.0005,...
'l2',.05,'neuralNetArchitecture',resnet,'neuralNetFile','ADSQ_resnet')};
%% Training on Task ADSQ
% Create classification task:
ctADSQ = MSClassificationTask(tPP,tFE,tMM,tCM, trainingData.dataLength);
resultsADSQ = ctADSQ.apply(trainingData, trainingPartition,kFold,'trainAllData',false,...
'saveMaps',true,'saveModels',true,...
'saveFeatureData',true,'predictionType', 'labels');
save('resultsResidualNet-ADSQ.mat', 'resultsADSQ', '-v7.3')
%% clear stuff
clear trainingData trainingPartition kFold
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Specification of dataset for Task LP
% Change path to saving folder of dataset
load('../DataTaskLP.mat')
%% Specification: CNN-architecture file and training parameters (Task LP)
% Note: Set path to classifcation folder of MSClassifyLib
resnet = '../NNResidualArchitecture.py';
% Create Tables:
% Create preprocessing table
tPP.string={'tic'};
tPP.object={MSNormalizationTrigger('tic')};
% Create feature extraction tables
tFE.string={'noFE'};
tFE.object={MSNeuralNetFeatureExtraction};
% Create map modifier table
tMM.string={'noMM'};
tMM.object={MSDummyMapModifier};
% Create classifier table
tCM.string={'resnet1','resnet2','resnet3','resnet4'};
tCM.object={...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',64,...
'epochs',2,'useTimeStamp',1,'learningRate',.0005,...
'l2',.001,'neuralNetArchitecture',resnet,'neuralNetFile','LP_resnet'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',64,...
'epochs',2,'useTimeStamp',1,'learningRate',.0005,...
'l2',.001,'neuralNetArchitecture',resnet,'neuralNetFile','LP_resnet'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',64,...
'epochs',2,'useTimeStamp',1,'learningRate',.0005,...
'l2',.001,'neuralNetArchitecture',resnet,'neuralNetFile','LP_resnet'),...
MSNeuralNetClassifier('useValidationSet',0,'batchSize',64,...
'epochs',2,'useTimeStamp',1,'learningRate',.0005,...
'l2',.001,'neuralNetArchitecture',resnet,'neuralNetFile','LP_resnet')};
%% Training on Task LP
% Create classification task:
ctLP = MSClassificationTask(tPP,tFE,tMM,tCM, trainingData.dataLength);
resultsLP = ctLP.apply(trainingData, trainingPartition,kFold,'trainAllData',false,...
'saveMaps',true,'saveModels',true,...
'saveFeatureData',true,'predictionType', 'labels');
save('resultsResidualNet-LP.mat', 'resultsLP', '-v7.3')
t = cell(length(trainingPartition.individuals.labels), 1);
for i = 1: length(trainingPartition.individuals.labels)
t{i} = num2str(i);
end
trainingPartition.individuals.labels = t;
trainingData.reduce(trainingPartition.classes.data(:,1) ~= 0);
trainingPartition = trainingPartition.reduce(trainingPartition.classes.data(:,1) ~= 0);
\ No newline at end of file
classdef MSLDAClassifier < MSClassifier
% Linear discrimant classifier (LDA)
%
% This class implements a linear discriminant classifier (LDA) for
% multiple training classes. The generated classification model is an
% MSLDAModel object.
%
% Methods:
% MSLDAClassifier: Constructor
% trainModel_impl: Implementation of LDA training algorithm
%
% MSLDAClassifier uses the handle semantic, i.e. when assigning an object
% of this class to a variable, only a reference to the original object is
% copied. Use the copy method to create a deep copy.
properties
params = {}; % Optional parameters passed to fitcdiscr
ldaParams;
end
methods
function obj = MSLDAClassifier (varargin)
% Constructor
% obj = MSLDAClassifier: Create new MSLDAClassifier object
% obj = MSLDAClassifier(Name, Value, ...): Specify optional
% name-value-pair arguments that are passed to fitcdiscr.
% Possible parameters are:
% 'Prior': -string 'empirical' (according to the frequency of classes)
% or 'uniform'
obj@MSClassifier;
params=inputParser;
% optional parameter
params.addParameter('Prior', 'uniform', @checkPriorParam);
%parse actual parameters
params.parse(varargin{:});
%...and the rest of the parameters in the forestParams struct
% only the not default parameters are stored
for i=1:(length(varargin)-1)/2;
obj.ldaParams.(varargin{2*i})=varargin{2*i+1};
end
end
end
methods (Access = protected)
function model = trainModel_impl (obj, msData, L, numFeatures)
% model = obj.trainModel_impl(msData, L, numFeatures): Create
% classification model for input data msData (MSData) and classes
% defined by the labels argument (numeric vector). Only the first
% numFeatures components (columns) of the input data are used.
% Train LDA
nonZeroLabels = (L > 0);
if ~isempty(obj.ldaParams)
paramNames=fields(obj.ldaParams);
nParams=length(paramNames);
varargin=cell(1,2*nParams);
for i=1:nParams
varargin{2*i-1}=paramNames{i};
varargin{2*i}=obj.ldaParams.(paramNames{i});
end
lda = fitcdiscr(msData.data(nonZeroLabels, 1:numFeatures), ...
L(nonZeroLabels), varargin{:});
else
lda = fitcdiscr(msData.data(nonZeroLabels, 1:numFeatures), ...
L(nonZeroLabels));
end
model = MSLDAModel(lda, numFeatures, class(obj));
end
end
end
function bool = checkPriorParam(param)
bool = ischar(param)||(isnumeric(param) && isvector(param))||...
(isstruct(param) && isfield(param, 'ClassProbs') && isfield(param, 'ClassNames') );
end
classdef MSLDAModel < MSClassificationModel
% Linear discriminant analysis (LDA) classification model
%
% This class implements the linear discriminant analysis (LDA)
% classification model..
%
% Properties:
% lda: LDA model as created by fitcdiscr()
% numFeatures: Number of features used for training
%
% Methods:
% MSLDAModel: Constructor
% classify_impl: Classification model implementation
%
% MSLDAModel uses the handle semantic, i.e. when assigning an object
% of this class to a variable, only a reference to the original object is
% copied. Use the copy method to create a deep copy.
properties (SetAccess = immutable)
lda; % LDA model as created by fitcdiscr()
numFeatures; % Number of features used for training
end
methods
function obj = MSLDAModel (lda, numFeatures, creator)
% Constructor
obj@MSClassificationModel(creator);
obj.lda = lda;
obj.numFeatures = numFeatures;
end
end
methods (Access = protected)
function [prediction, scores] = classify_impl (obj, msData, itemMask)
% Classification model implementation
% Compute prediction for items in itemMask
if nargout>1
[prediction, scores] = obj.lda.predict(msData.data(itemMask,1:obj.numFeatures));
else
prediction = obj.lda.predict(msData.data(itemMask,1:obj.numFeatures));
end
end
end
end
classdef MSNeuralNetClassifier < MSClassifier
% Neural network classifier
properties
neuralNetFile
% Serialized Neural Net. Also contains class
% dictionaries that can translate between the class
% labels used by Python (0,..,N-1) and the class
% labels transmitted by MSClassifyLib (e.g. 1,3 and 4).
neuralNetArchitecture
% An external network architecture can be supplied.
% If neuralNetArchitecture is 'auto', the standard architecture
% defined in NNInterface.py is used.
dataFile
% File used for data exchange between Python and
% Matlab. Contains spectra (and class labels during
% training).
% Default: 'neuralNetTemp'
resultFile
% File used for data exchange between Python and
% Matlab. Contains predicted class labels and
% predicted class membership probabilites.
% Default: 'neuralNetTemp'
continueTraining
% false if the neuralNetFile should be created anew
% every time the neural network is trained,
% otherwise true.
% Default: false
keepTemporaryFiles
% true if temporary files should be kept, else false
useValidationSet
% If true, 10% of the training data is used for validation.
hyperparameters
% Struct of hyperparameters.
useTimeStamp
% If 1, a timestamp is added to the neuralNetFile-name upon the
% beginning of the training. If for example an