diff --git a/docs-src/content/tutorials/_index.md b/docs-src/content/tutorials/_index.md index ce80d1a..8e9066c 100644 --- a/docs-src/content/tutorials/_index.md +++ b/docs-src/content/tutorials/_index.md @@ -1,16 +1,16 @@ +++ -title = "Tutorials" +title = "Tutorials and example data" weight = 3 +++ -Here you can find complete examples of common analysis tasks using freely available example data. +Here you can find details on different ways of using SpectralAnalysis and freely available MSI data that can be used for testing. #### Example Data ##### Mouse Brain (MALDI MSI QTOF) This mouse brain dataset is available alongside [SpectralAnalysis](https://github.com/AlanRace/SpectralAnalysis/tree/master/example-data/mouse-brain). It was acquired using MALDI MSI with a QSTAR XL mass spectrometer. -![MALDI mouse brain data]() +![MALDI mouse brain data](/images/SpectralAnalysis-data-mousebrain.png?width=30pc) ##### Mouse Bladder (MALDI MSI Orbitrap) This mouse bladded dataset is available from the [PRIDE repository](https://www.ebi.ac.uk/pride/archive/projects/PXD001283). It was acquired using MALDI MSI with a AP-SMALDI imaging source coupled to an Orbitrap mass spectrometer. diff --git a/docs-src/content/tutorials/taylor-workflow.md b/docs-src/content/tutorials/taylor-workflow.md new file mode 100644 index 0000000..8ec9eb0 --- /dev/null +++ b/docs-src/content/tutorials/taylor-workflow.md @@ -0,0 +1,110 @@ ++++ +title = "Scripting - datacube / kmeans" +weight = 3 ++++ + +This script was originally written by Adam Taylor and can be used to automatically generate a mean spectrum, detect peaks, reduce the data to the peaks with signal-to-noise greater than 3, perform *k*-means clustering on the reduced data generate mean spectra for each cluster and save out all variables. + +This script shows how SpectralAnalysis can be used without the interface to perform more complex and automatable analysis routines. + +```matlab +spectralAnalysisPath = 'C:\path\to\SpectralAnalysis'; + +inputFolder = [spectralAnalysisPath filesep 'example-data' filesep 'mouse-brain']; %location of imzML files to process +outputFoler = [spectralAnalysisPath 'filesep 'example-data' filesep 'mouse-brain']; +filesToProcess = dir([inputFolder filesep '*.imzML']); %gets all imzML files in folder + +% Set up datacube generation variables +preprocessingWorkflowFile = [spectralAnalysisPath filesep 'example-data' filesep 'mouse-brain' filesep 'mouse-brain-preprocessingWorkflow.sap']; %location of preprocessing file +nzm_multiple = 3; % multiple of non zero median + +% Add SpectralAnalysis to the path - this only needs to be done once per MATLAB session +disp('Setting up '); +addpath(genpath(spectralAnalysisPath)); +addJARsToClassPath(); + +% Generate preprocessing workflow +preprocessing = PreprocessingWorkflow(); +preprocessing.loadWorkflow(preprocessingWorkflowFile); + +peakPicking = GradientPeakDetection(); +medianPeakFilter = PeakThresholdFilterMedian(1, nzm_multiple); +peakPicking.addPeakFilter(medianPeakFilter); + +%% +for i = 1:length(filesToProcess) + disp(['Processing ' filesToProcess(i).name]); + + input_file = [filesToProcess(i).folder filesep filesToProcess(i).name]; + + %% Get the filename from the path + [~, filename, ~] = fileparts(input_file); + + %% make datacubes from each dataset + + % obtain total spectrum + disp(['Generating Total Spectrum for ' ,input_file]); + parser = ImzMLParser(input_file); + parser.parse; + data = DataOnDisk(parser); + + spectrumGeneration = TotalSpectrum(); + spectrumGeneration.setPreprocessingWorkflow(preprocessing); + + totalSpectrum = spectrumGeneration.process(data); + totalSpectrum = totalSpectrum.get(1); + + %% Peak picking + disp('Peak picking '); + peaks = peakPicking.process(totalSpectrum); + + spectralChannels_all = totalSpectrum.spectralChannels; + spectralChannels = [peaks.centroid]; + + %% Make datacube old + disp(['! Generating data cube with ' num2str(length(peaks)) ' peaks...']) + + peakTolerance = -1; + + reduction = DatacubeReduction(peakTolerance); + reduction.setPeakList(peaks); + + addlistener(reduction, 'FastMethods', @(src, canUseFastMethods)disp(['! Using fast Methods? ' num2str(canUseFastMethods.bool)])); + + dataRepresentation = reduction.process(data); + dataRepresentation = dataRepresentation.get(1); + dataRepresentation_struct = dataRepresentation.saveobj(); + + datacube = dataRepresentation.data; + pixels = dataRepresentation.pixels; + + %% K means clustering + disp('Performing k-means clustering on top 1000 peaks with k = 2 and cosine distance') + + [~, top1000idx] = maxk([peaks.intensity], 1000); + datacube_small = datacube(:,top1000idx); + + [kmeans_idx, kmeans_c, ~, ~ ] = kmeans(datacube_small, 2, 'distance', 'cosine'); + + %% Make mean spectrum + disp('Saving cluster mean spectra') + + datacube_clust1 = datacube(kmeans_idx == 1,:); + datacube_clust2 = datacube(kmeans_idx == 2,:); + + mean_intensity_clust1 = mean(datacube_clust1); + mean_intensity_clust2 = mean(datacube_clust2); + mean_intensity_all = mean(datacube); + + %% Save all + disp('Saving files') + + save([outputFoler filesep filename '.mat'], '-struct', 'dataRepresentation_struct', '-v7.3') + save([outputFoler filesep filename '.mat'], ... + 'peaks', 'spectralChannels_all', 'spectralChannels', 'kmeans_idx', 'kmeans_c', ... + 'top1000idx', 'mean_intensity_clust1', 'mean_intensity_clust2', 'mean_intensity_all',... + '-append') + + disp([input_file ' complete']); +end +``` \ No newline at end of file diff --git a/docs-src/static/images/SpectralAnalysis-data-mousebrain.png b/docs-src/static/images/SpectralAnalysis-data-mousebrain.png new file mode 100644 index 0000000..2221de7 Binary files /dev/null and b/docs-src/static/images/SpectralAnalysis-data-mousebrain.png differ diff --git a/example-data/mouse-brain/mouse-brain-preprocessingWorkflow.sap b/example-data/mouse-brain/mouse-brain-preprocessingWorkflow.sap new file mode 100644 index 0000000..cd681a3 --- /dev/null +++ b/example-data/mouse-brain/mouse-brain-preprocessingWorkflow.sap @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/examples/datacube_kmeans.m b/examples/datacube_kmeans.m new file mode 100644 index 0000000..ce9d5a2 --- /dev/null +++ b/examples/datacube_kmeans.m @@ -0,0 +1,101 @@ +spectralAnalysisPath = '/home/alan/Documents/GitProjects/SpectralAnalysis/'; + +inputFolder = [spectralAnalysisPath '/example-data/mouse-brain']; %location of imzML files to process +outputFoler = [spectralAnalysisPath '/example-data/mouse-brain']; +filesToProcess = dir([inputFolder filesep '*.imzML']); %gets all imzML files in folder + +% Set up datacube generation variables +preprocessingWorkflowFile = [spectralAnalysisPath '/example-data/mouse-brain/mouse-brain-preprocessingWorkflow.sap']; %location of preprocessing file +nzm_multiple = 3; % multiple of non zero median + +% Add SpectralAnalysis to the path - this only needs to be done once per MATLAB session +disp('Setting up '); +addpath(genpath(spectralAnalysisPath)); +addJARsToClassPath(); + +% Generate preprocessing workflow +preprocessing = PreprocessingWorkflow(); +preprocessing.loadWorkflow(preprocessingWorkflowFile); + +peakPicking = GradientPeakDetection(); +medianPeakFilter = PeakThresholdFilterMedian(1, nzm_multiple); +peakPicking.addPeakFilter(medianPeakFilter); + +%% +for i = 1:length(filesToProcess) + disp(['Processing ' filesToProcess(i).name]); + + input_file = [filesToProcess(i).folder filesep filesToProcess(i).name]; + + %% Get the filename from the path + [~, filename, ~] = fileparts(input_file); + + %% make datacubes from each dataset + + % obtain total spectrum + disp(['Generating Total Spectrum for ' ,input_file]); + parser = ImzMLParser(input_file); + parser.parse; + data = DataOnDisk(parser); + + spectrumGeneration = TotalSpectrum(); + spectrumGeneration.setPreprocessingWorkflow(preprocessing); + + totalSpectrum = spectrumGeneration.process(data); + totalSpectrum = totalSpectrum.get(1); + + %% Peak picking + disp('Peak picking '); + peaks = peakPicking.process(totalSpectrum); + + spectralChannels_all = totalSpectrum.spectralChannels; + spectralChannels = [peaks.centroid]; + + %% Make datacube old + disp(['! Generating data cube with ' num2str(length(peaks)) ' peaks...']) + + peakTolerance = -1; + + reduction = DatacubeReduction(peakTolerance); + reduction.setPeakList(peaks); + + addlistener(reduction, 'FastMethods', @(src, canUseFastMethods)disp(['! Using fast Methods? ' num2str(canUseFastMethods.bool)])); + + dataRepresentation = reduction.process(data); + dataRepresentation = dataRepresentation.get(1); + dataRepresentation_struct = dataRepresentation.saveobj(); + + datacube = dataRepresentation.data; + pixels = dataRepresentation.pixels; + + %% K means clustering + disp('Performing k-means clustering on top 1000 peaks with k = 2 and cosine distance') + + [~, top1000idx] = maxk([peaks.intensity], 1000); + datacube_small = datacube(:,top1000idx); + + [kmeans_idx, kmeans_c, ~, ~ ] = kmeans(datacube_small, 2, 'distance', 'cosine'); + + %% Make mean spectrum + disp('Saving cluster mean spectra') + + datacube_clust1 = datacube(kmeans_idx == 1,:); + datacube_clust2 = datacube(kmeans_idx == 2,:); + + mean_intensity_clust1 = mean(datacube_clust1); + mean_intensity_clust2 = mean(datacube_clust2); + mean_intensity_all = mean(datacube); + + %% Save all + disp('Saving files') + + save([outputFoler filesep filename '.mat'], '-struct', 'dataRepresentation_struct', '-v7.3') + save([outputFoler filesep filename '.mat'], ... + 'peaks', 'spectralChannels_all', 'spectralChannels', ... + 'kmeans_idx', 'kmeans_c', ... + 'mean_intensity_clust1', 'mean_intensity_clust2', 'mean_intensity_all',... + 'top1000idx',... + '-append') + + disp([input_file ' complete']); +end \ No newline at end of file