From ba65a87cb89a8fa587a96a784914b865ee611087 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 14:51:58 -0700 Subject: [PATCH 01/26] MNT: Abstract+simplify sm barplots/getObsCountsBy still need to document/test the new functions, but this is nice --- empress/support_files/js/biom-table.js | 93 +++++++++++++++++++------- empress/support_files/js/empress.js | 32 ++------- 2 files changed, 77 insertions(+), 48 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index bd6c08eaf..e5cbaeb5a 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -199,6 +199,36 @@ define(["underscore", "util"], function (_, util) { return _.indexOf(arr, num, true) >= 0; }; + + BIOMTable.prototype._getObsCountAndTotalBy = function(colIdx, fIdx) { + var scope = this; + var countMap = {}; + var containingSampleCount = 0; + var cVal; + // Iterate through each sample of the BIOM table + _.each(this._tbl, function (presentFeatureIndices, sIdx) { + // Figure out what metadata value this sample has at the column. + // If we haven't recorded it as a key in our output Object yet, do + // so and set it to default to 0. + cVal = scope._sm[sIdx][colIdx]; + if (!_.has(countMap, cVal)) { + countMap[cVal] = 0; + } + // Now, we check if we need to update the cVal entry by 1 + // (indicating that one more sample with cVal contains the + // specified feature). + if (scope._sortedArrayHasNumber(presentFeatureIndices, fIdx)) { + // This sample actually contains the feature! + cVal = scope._sm[sIdx][colIdx]; + // Update our output Object's count info accordingly. + countMap[cVal] += 1; + // And update the count of samples containing this feature + containingSampleCount++; + } + }); + return {countMap: countMap, containingSampleCount: containingSampleCount}; + }; + /** * Return the feature IDs shared by the BIOM table and input array * @@ -296,7 +326,7 @@ define(["underscore", "util"], function (_, util) { * @param {String} col Sample metadata column * @param {String} fID Feature (aka observation) ID * - * @return {Object} valueToCountOfSampleWithObs + * @return {Object} countMap * * @throws {Error} If the sample metadata column is unrecognized. * If the feature ID is unrecognized. @@ -305,28 +335,7 @@ define(["underscore", "util"], function (_, util) { var scope = this; var colIdx = this._getSampleMetadataColIndex(col); var fIdx = this._getFeatureIndexFromID(fID); - var valueToCountOfSampleWithObs = {}; - var cVal, fIdxPos; - // Iterate through each sample of the BIOM table - _.each(this._tbl, function (presentFeatureIndices, sIdx) { - // Figure out what metadata value this sample has at the column. - // If we haven't recorded it as a key in our output Object yet, do - // so and set it to default to 0. - cVal = scope._sm[sIdx][colIdx]; - if (!_.has(valueToCountOfSampleWithObs, cVal)) { - valueToCountOfSampleWithObs[cVal] = 0; - } - // Now, we check if we need to update the cVal entry by 1 - // (indicating that one more sample with cVal contains the - // specified feature). - if (scope._sortedArrayHasNumber(presentFeatureIndices, fIdx)) { - // This sample actually contains the feature! - cVal = scope._sm[sIdx][colIdx]; - // Update our output Object's count info accordingly. - valueToCountOfSampleWithObs[cVal] += 1; - } - }); - return valueToCountOfSampleWithObs; + return this._getObsCountAndTotalBy(colIdx, fIdx).countMap; }; /** @@ -480,5 +489,43 @@ define(["underscore", "util"], function (_, util) { return valueToSampleCount; }; + /** + * Returns a object that maps the unique values of a sample metadata column + * to the *proportion* of samples with that metadata value containing a + * given feature. + * + * Besides the fact that this returns proportions (e.g. + * {a: 0.25, b: 0.5, c: 0.25} rather than {a: 1, b: 2, c: 1}), this + * function differs from getObsCountsBy() in that it doesn't raise an error + * if the input feature ID is unrecognized -- rather, it'll just return + * null in that case. (This is because this function is designed to be + * used with sample metadata tip barplots, and it's understood that some + * of the tips in the tree may not be present in the table.) + * + * @param {String} col Sample metadata column + * @param {String} fID Feature (aka observation) ID + * + * @return {Object} valueToFreq Maps unique values in the sample metadata + * column to their proportion of the total + * number of samples containing the specified + * feature. If no samples contain the + * specified feature, this will return {}. + * + * @throws {Error} If the sample metadata column is unrecognized. + */ + BIOMTable.prototype.getObsFrequencyBy = function (col, fID) { + var fIdx = this._fID2Idx[fID]; + if (_.isUndefined(fIdx)) { + return {}; + } + var colIdx = this._getSampleMetadataColIndex(col); + var countInfo = this._getObsCountAndTotalBy(colIdx, fIdx); + var countMap = countInfo.countMap; + var containingSampleCount = countInfo.containingSampleCount; + return _.mapObject(countMap, function (count) { + return count / containingSampleCount; + }); + }; + return BIOMTable; }); diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index 68c7a7d7e..fd95c8db7 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -1145,44 +1145,26 @@ define([ if (this._tree.isleaf(this._tree.postorderselect(i))) { var node = this._treeData[i]; var name = this.getNodeInfo(node, "name"); + var freqs = this._biom.getObsFrequencyBy( + layer.colorBySMField, name + ); // Don't draw bars for tips that aren't in the BIOM table // (Note that this is only for the sample metadata barplots -- // these tips could still ostensibly have associated // feature metadata) - if (this._biom.getObsIDsDifference([name]).length > 0) { + if (_.isEmpty(freqs)) { continue; } - // Figure how many samples across each unique value in the - // selected sample metadata field contain this tip. (This is - // computed the same way as the information shown in the - // selected node menu's "Sample Presence Information" section.) - var spi = this.computeTipSamplePresence(name, [ - layer.colorBySMField, - ])[layer.colorBySMField]; - - // Sum the values of the sample presence information, getting - // us the total number of samples containing this tip. - // JS doesn't have a built-in sum() function, so I couldn't - // think of a better way to do this. Taken from - // https://underscorejs.org/#reduce. - var totalSampleCt = _.reduce( - _.values(spi), - function (a, b) { - return a + b; - }, - 0 - ); var prevSectionMaxX = prevLayerMaxX; for (var v = 0; v < sortedUniqueValues.length; v++) { var smVal = sortedUniqueValues[v]; - var ct = spi[smVal]; - if (ct > 0) { + var freq = freqs[smVal]; + if (freq > 0) { var sectionColor = sm2color[smVal]; // Assign each unique sample metadata value a length // proportional to its, well, proportion within the sample // presence information for this tip. - var barSectionLen = - layer.lengthSM * (ct / totalSampleCt); + var barSectionLen = layer.lengthSM * freq; var thisSectionMaxX = prevSectionMaxX + barSectionLen; var y = this.getY(node); var ty = y + halfyrscf; From c0e023c575659ae1e8f8b28bc555561a127ba764 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 14:53:02 -0700 Subject: [PATCH 02/26] STY: prettier --- empress/support_files/js/biom-table.js | 8 +++++--- empress/support_files/js/empress.js | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index e5cbaeb5a..18a40df30 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -199,8 +199,7 @@ define(["underscore", "util"], function (_, util) { return _.indexOf(arr, num, true) >= 0; }; - - BIOMTable.prototype._getObsCountAndTotalBy = function(colIdx, fIdx) { + BIOMTable.prototype._getObsCountAndTotalBy = function (colIdx, fIdx) { var scope = this; var countMap = {}; var containingSampleCount = 0; @@ -226,7 +225,10 @@ define(["underscore", "util"], function (_, util) { containingSampleCount++; } }); - return {countMap: countMap, containingSampleCount: containingSampleCount}; + return { + countMap: countMap, + containingSampleCount: containingSampleCount, + }; }; /** diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index fd95c8db7..65f564d1c 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -1146,7 +1146,8 @@ define([ var node = this._treeData[i]; var name = this.getNodeInfo(node, "name"); var freqs = this._biom.getObsFrequencyBy( - layer.colorBySMField, name + layer.colorBySMField, + name ); // Don't draw bars for tips that aren't in the BIOM table // (Note that this is only for the sample metadata barplots -- From 5a5479b8d5fee39ebbfaad2f726ff00eb9eca61e Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 15:44:10 -0700 Subject: [PATCH 03/26] MNT: ++ instead of += 1 consistently in biom table --- empress/support_files/js/biom-table.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 18a40df30..11a4f8104 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -220,7 +220,7 @@ define(["underscore", "util"], function (_, util) { // This sample actually contains the feature! cVal = scope._sm[sIdx][colIdx]; // Update our output Object's count info accordingly. - countMap[cVal] += 1; + countMap[cVal]++; // And update the count of samples containing this feature containingSampleCount++; } @@ -483,7 +483,7 @@ define(["underscore", "util"], function (_, util) { var sampleIdx = scope._getSampleIndexFromID(sID); var cVal = scope._sm[sampleIdx][colIdx]; if (_.has(valueToSampleCount, cVal)) { - valueToSampleCount[cVal] += 1; + valueToSampleCount[cVal]++; } else { valueToSampleCount[cVal] = 1; } From 13e02723b5ffea64f185d19b1898449ea86212aa Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 15:57:56 -0700 Subject: [PATCH 04/26] MNT: document+rename getObsCountsAndTotalBy() --- empress/support_files/js/biom-table.js | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 11a4f8104..cebac1c47 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -199,7 +199,26 @@ define(["underscore", "util"], function (_, util) { return _.indexOf(arr, num, true) >= 0; }; - BIOMTable.prototype._getObsCountAndTotalBy = function (colIdx, fIdx) { + /** + * Returns information about the amounts of samples per unique value in a + * metadata column containing a given feature. + * + * Note that this function is designed for internal use, so it doesn't do + * explicit validation -- it assumes that the input indices are sane. + * + * @param {String} colIdx Sample metadata column index + * @param {String} fIdx Feature index + * + * @return {Object} An object with two keys: + * -countMap: maps to an Object mapping the unique values + * in the sample metadata column to the number of samples + * with each value containing the specified feature. + * -containingSampleCount: maps to a Number describing the + * total number of samples containing the specified + * feature. Equivalently, this is the sum of the values + * of countMap. + */ + BIOMTable.prototype._getObsCountsAndTotalBy = function (colIdx, fIdx) { var scope = this; var countMap = {}; var containingSampleCount = 0; @@ -337,7 +356,7 @@ define(["underscore", "util"], function (_, util) { var scope = this; var colIdx = this._getSampleMetadataColIndex(col); var fIdx = this._getFeatureIndexFromID(fID); - return this._getObsCountAndTotalBy(colIdx, fIdx).countMap; + return this._getObsCountsAndTotalBy(colIdx, fIdx).countMap; }; /** @@ -521,7 +540,7 @@ define(["underscore", "util"], function (_, util) { return {}; } var colIdx = this._getSampleMetadataColIndex(col); - var countInfo = this._getObsCountAndTotalBy(colIdx, fIdx); + var countInfo = this._getObsCountsAndTotalBy(colIdx, fIdx); var countMap = countInfo.countMap; var containingSampleCount = countInfo.containingSampleCount; return _.mapObject(countMap, function (count) { From 898a0b3110595ebae0b9562a8f583143850e083f Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 16:43:45 -0700 Subject: [PATCH 05/26] MNT: add sped up? freq map code need to like document and test and style and make it less horrendous buuut i think this might be decently faster, will hafta test --- empress/support_files/js/biom-table.js | 55 ++++++++++++++++++++++++++ empress/support_files/js/empress.js | 8 ++-- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index cebac1c47..0b273ae24 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -548,5 +548,60 @@ define(["underscore", "util"], function (_, util) { }); }; + BIOMTable.prototype.getFrequencyMap = function (col) { + var scope = this; + var colIdx = _.indexOf(this._smCols, col); + // note could make these faster by doing full on 2d arrays + var fIdx2counts = {}; + var fIdx2sampleCt = {}; + var containingSampleCount; + var cVal; + + // Find unique (sorted) values in this sample metadata column + var uniqueSMVals = this.getUniqueSampleValues(col); + var numUniqueSMVals = uniqueSMVals.length; + var smVal2Idx = {}; + _.each(uniqueSMVals, function (smVal, c) { + smVal2Idx[smVal] = c; + }); + // Assign each feature an empty frequency array, soon to be filled in + var i, emptyCounts; + _.each(this._fIDs, function (fID, fIdx) { + emptyCounts = []; + for (i = 0; i < numUniqueSMVals; i++) { + emptyCounts.push(0); + } + fIdx2counts[fIdx] = emptyCounts; + fIdx2sampleCt[fIdx] = 0; + }); + // Iterate through each sample of the BIOM table, storing group counts + // and total sample counts for each feature + var cValIdx; + _.each(this._tbl, function (presentFeatureIndices, sIdx) { + // Figure out what metadata value this sample has at the column. + cVal = scope._sm[sIdx][colIdx]; + cValIdx = smVal2Idx[cVal]; + // Increment group counts for each feature present in this sample + _.each(presentFeatureIndices, function (fIdx) { + fIdx2counts[fIdx][cValIdx]++; + fIdx2sampleCt[fIdx]++; + }); + }); + // Convert counts to frequencies + var feature2freqs = {}; + var freqs, fIdx, totalSampleCount; + _.each(this._fIDs, function (fID, fIdx) { + totalSampleCount = fIdx2sampleCt[fIdx]; + feature2freqs[fID] = {}; + _.each(fIdx2counts[fIdx], function (count, smValIdx) { + if (count > 0) { + feature2freqs[fID][uniqueSMVals[smValIdx]] = count / totalSampleCount; + } + }); + }); + console.log(feature2freqs); + return feature2freqs; + }; + return BIOMTable; }); diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index ddf889e1c..ab1cbc7e5 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -1139,21 +1139,19 @@ define([ ); var colorer = new Colorer(layer.colorBySMColorMap, sortedUniqueValues); var sm2color = colorer.getMapRGB(); + var feature2freqs = this._biom.getFrequencyMap(layer.colorBySMField); // Bar thickness var halfyrscf = this._yrscf / 2; for (i = 1; i < this._tree.size; i++) { if (this._tree.isleaf(this._tree.postorderselect(i))) { var node = this._treeData[i]; var name = this.getNodeInfo(node, "name"); - var freqs = this._biom.getObsFrequencyBy( - layer.colorBySMField, - name - ); + var freqs = feature2freqs[name]; // Don't draw bars for tips that aren't in the BIOM table // (Note that this is only for the sample metadata barplots -- // these tips could still ostensibly have associated // feature metadata) - if (_.isEmpty(freqs)) { + if (_.isUndefined(freqs)) { continue; } var prevSectionMaxX = prevLayerMaxX; From 32174c0d11f14a17f7c5c6742d8e2d3b7b398878 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 16:44:44 -0700 Subject: [PATCH 06/26] STY: prettify --- empress/support_files/js/biom-table.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 0b273ae24..5d660bd30 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -595,7 +595,8 @@ define(["underscore", "util"], function (_, util) { feature2freqs[fID] = {}; _.each(fIdx2counts[fIdx], function (count, smValIdx) { if (count > 0) { - feature2freqs[fID][uniqueSMVals[smValIdx]] = count / totalSampleCount; + feature2freqs[fID][uniqueSMVals[smValIdx]] = + count / totalSampleCount; } }); }); From d10c4ed534ad1ae8de2e142abc7e07037334ea14 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 18:24:05 -0700 Subject: [PATCH 07/26] MNT: use 2D arrays internally for freq map comp should make sm barplots even faster >:) --- empress/support_files/js/biom-table.js | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 5d660bd30..84a7a69a5 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -551,9 +551,8 @@ define(["underscore", "util"], function (_, util) { BIOMTable.prototype.getFrequencyMap = function (col) { var scope = this; var colIdx = _.indexOf(this._smCols, col); - // note could make these faster by doing full on 2d arrays - var fIdx2counts = {}; - var fIdx2sampleCt = {}; + var fIdx2counts = []; + var fIdx2sampleCt = []; var containingSampleCount; var cVal; @@ -571,8 +570,8 @@ define(["underscore", "util"], function (_, util) { for (i = 0; i < numUniqueSMVals; i++) { emptyCounts.push(0); } - fIdx2counts[fIdx] = emptyCounts; - fIdx2sampleCt[fIdx] = 0; + fIdx2counts.push(emptyCounts); + fIdx2sampleCt.push(0); }); // Iterate through each sample of the BIOM table, storing group counts // and total sample counts for each feature @@ -588,20 +587,21 @@ define(["underscore", "util"], function (_, util) { }); }); // Convert counts to frequencies - var feature2freqs = {}; - var freqs, fIdx, totalSampleCount; + // Also, return an Object where the keys are feature IDs, rather than + // an Array using feature indices + var fID2freqs = {}; + var totalSampleCount; _.each(this._fIDs, function (fID, fIdx) { totalSampleCount = fIdx2sampleCt[fIdx]; - feature2freqs[fID] = {}; + fID2freqs[fID] = {}; _.each(fIdx2counts[fIdx], function (count, smValIdx) { if (count > 0) { - feature2freqs[fID][uniqueSMVals[smValIdx]] = + fID2freqs[fID][uniqueSMVals[smValIdx]] = count / totalSampleCount; } }); }); - console.log(feature2freqs); - return feature2freqs; + return fID2freqs; }; return BIOMTable; From b59ba6ec818b66d001a346cfd6825a354fd59c47 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 18:39:18 -0700 Subject: [PATCH 08/26] MNT: remove now-unused code i added earlier --- empress/support_files/js/biom-table.js | 114 +++++-------------------- 1 file changed, 23 insertions(+), 91 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 84a7a69a5..800beff6d 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -199,57 +199,6 @@ define(["underscore", "util"], function (_, util) { return _.indexOf(arr, num, true) >= 0; }; - /** - * Returns information about the amounts of samples per unique value in a - * metadata column containing a given feature. - * - * Note that this function is designed for internal use, so it doesn't do - * explicit validation -- it assumes that the input indices are sane. - * - * @param {String} colIdx Sample metadata column index - * @param {String} fIdx Feature index - * - * @return {Object} An object with two keys: - * -countMap: maps to an Object mapping the unique values - * in the sample metadata column to the number of samples - * with each value containing the specified feature. - * -containingSampleCount: maps to a Number describing the - * total number of samples containing the specified - * feature. Equivalently, this is the sum of the values - * of countMap. - */ - BIOMTable.prototype._getObsCountsAndTotalBy = function (colIdx, fIdx) { - var scope = this; - var countMap = {}; - var containingSampleCount = 0; - var cVal; - // Iterate through each sample of the BIOM table - _.each(this._tbl, function (presentFeatureIndices, sIdx) { - // Figure out what metadata value this sample has at the column. - // If we haven't recorded it as a key in our output Object yet, do - // so and set it to default to 0. - cVal = scope._sm[sIdx][colIdx]; - if (!_.has(countMap, cVal)) { - countMap[cVal] = 0; - } - // Now, we check if we need to update the cVal entry by 1 - // (indicating that one more sample with cVal contains the - // specified feature). - if (scope._sortedArrayHasNumber(presentFeatureIndices, fIdx)) { - // This sample actually contains the feature! - cVal = scope._sm[sIdx][colIdx]; - // Update our output Object's count info accordingly. - countMap[cVal]++; - // And update the count of samples containing this feature - containingSampleCount++; - } - }); - return { - countMap: countMap, - containingSampleCount: containingSampleCount, - }; - }; - /** * Return the feature IDs shared by the BIOM table and input array * @@ -347,7 +296,7 @@ define(["underscore", "util"], function (_, util) { * @param {String} col Sample metadata column * @param {String} fID Feature (aka observation) ID * - * @return {Object} countMap + * @return {Object} valueToCountOfSampleWithObs * * @throws {Error} If the sample metadata column is unrecognized. * If the feature ID is unrecognized. @@ -356,7 +305,28 @@ define(["underscore", "util"], function (_, util) { var scope = this; var colIdx = this._getSampleMetadataColIndex(col); var fIdx = this._getFeatureIndexFromID(fID); - return this._getObsCountsAndTotalBy(colIdx, fIdx).countMap; + var valueToCountOfSampleWithObs = {}; + var cVal, fIdxPos; + // Iterate through each sample of the BIOM table + _.each(this._tbl, function (presentFeatureIndices, sIdx) { + // Figure out what metadata value this sample has at the column. + // If we haven't recorded it as a key in our output Object yet, do + // so and set it to default to 0. + cVal = scope._sm[sIdx][colIdx]; + if (!_.has(valueToCountOfSampleWithObs, cVal)) { + valueToCountOfSampleWithObs[cVal] = 0; + } + // Now, we check if we need to update the cVal entry by 1 + // (indicating that one more sample with cVal contains the + // specified feature). + if (scope._sortedArrayHasNumber(presentFeatureIndices, fIdx)) { + // This sample actually contains the feature! + cVal = scope._sm[sIdx][colIdx]; + // Update our output Object's count info accordingly. + valueToCountOfSampleWithObs[cVal]++; + } + }); + return valueToCountOfSampleWithObs; }; /** @@ -510,44 +480,6 @@ define(["underscore", "util"], function (_, util) { return valueToSampleCount; }; - /** - * Returns a object that maps the unique values of a sample metadata column - * to the *proportion* of samples with that metadata value containing a - * given feature. - * - * Besides the fact that this returns proportions (e.g. - * {a: 0.25, b: 0.5, c: 0.25} rather than {a: 1, b: 2, c: 1}), this - * function differs from getObsCountsBy() in that it doesn't raise an error - * if the input feature ID is unrecognized -- rather, it'll just return - * null in that case. (This is because this function is designed to be - * used with sample metadata tip barplots, and it's understood that some - * of the tips in the tree may not be present in the table.) - * - * @param {String} col Sample metadata column - * @param {String} fID Feature (aka observation) ID - * - * @return {Object} valueToFreq Maps unique values in the sample metadata - * column to their proportion of the total - * number of samples containing the specified - * feature. If no samples contain the - * specified feature, this will return {}. - * - * @throws {Error} If the sample metadata column is unrecognized. - */ - BIOMTable.prototype.getObsFrequencyBy = function (col, fID) { - var fIdx = this._fID2Idx[fID]; - if (_.isUndefined(fIdx)) { - return {}; - } - var colIdx = this._getSampleMetadataColIndex(col); - var countInfo = this._getObsCountsAndTotalBy(colIdx, fIdx); - var countMap = countInfo.countMap; - var containingSampleCount = countInfo.containingSampleCount; - return _.mapObject(countMap, function (count) { - return count / containingSampleCount; - }); - }; - BIOMTable.prototype.getFrequencyMap = function (col) { var scope = this; var colIdx = _.indexOf(this._smCols, col); From e5416863cf2980329a7216cf03db6da4cf4a761d Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 18:39:59 -0700 Subject: [PATCH 09/26] STY: prettify --- empress/support_files/js/biom-table.js | 40 +++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 800beff6d..864f172f5 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -306,26 +306,26 @@ define(["underscore", "util"], function (_, util) { var colIdx = this._getSampleMetadataColIndex(col); var fIdx = this._getFeatureIndexFromID(fID); var valueToCountOfSampleWithObs = {}; - var cVal, fIdxPos; - // Iterate through each sample of the BIOM table - _.each(this._tbl, function (presentFeatureIndices, sIdx) { - // Figure out what metadata value this sample has at the column. - // If we haven't recorded it as a key in our output Object yet, do - // so and set it to default to 0. - cVal = scope._sm[sIdx][colIdx]; - if (!_.has(valueToCountOfSampleWithObs, cVal)) { - valueToCountOfSampleWithObs[cVal] = 0; - } - // Now, we check if we need to update the cVal entry by 1 - // (indicating that one more sample with cVal contains the - // specified feature). - if (scope._sortedArrayHasNumber(presentFeatureIndices, fIdx)) { - // This sample actually contains the feature! - cVal = scope._sm[sIdx][colIdx]; - // Update our output Object's count info accordingly. - valueToCountOfSampleWithObs[cVal]++; - } - }); + var cVal, fIdxPos; + // Iterate through each sample of the BIOM table + _.each(this._tbl, function (presentFeatureIndices, sIdx) { + // Figure out what metadata value this sample has at the column. + // If we haven't recorded it as a key in our output Object yet, do + // so and set it to default to 0. + cVal = scope._sm[sIdx][colIdx]; + if (!_.has(valueToCountOfSampleWithObs, cVal)) { + valueToCountOfSampleWithObs[cVal] = 0; + } + // Now, we check if we need to update the cVal entry by 1 + // (indicating that one more sample with cVal contains the + // specified feature). + if (scope._sortedArrayHasNumber(presentFeatureIndices, fIdx)) { + // This sample actually contains the feature! + cVal = scope._sm[sIdx][colIdx]; + // Update our output Object's count info accordingly. + valueToCountOfSampleWithObs[cVal]++; + } + }); return valueToCountOfSampleWithObs; }; From c1440e6d0066fcad3f683b689c874e8fe120323e Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 19:04:17 -0700 Subject: [PATCH 10/26] DOC: improve documentation for freq map #298 --- empress/support_files/js/biom-table.js | 49 +++++++++++++++++++++----- empress/support_files/js/empress.js | 1 + 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 864f172f5..6a408ea3d 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -480,22 +480,52 @@ define(["underscore", "util"], function (_, util) { return valueToSampleCount; }; + /** + * Maps each feature ID in the table to a "frequencies" Object for a sample + * metadata field. + * + * Each "frequencies" Object contains information on the number of samples + * from each unique sample metadata value that contain the feature ID in + * question. Keys in these objects are unique sample metadata values, and + * values in these objects are the proportion of samples containing the + * feature that have this unique value. Only frequency information for + * unique values where at least 1 sample with this value contains the + * feature is included in a given "frequencies" Object. + * + * This function is designed to be reasonably fast, which is a big part of + * why this works on the order of "each feature ID in the table" rather + * than on a feature-per-feature basis. (The reason for this design is that + * this is used for generating sample metadata barplots, and that was + * previously very slow on large trees: see issue #298 on GitHub. Thanks + * to Yoshiki for discussing this with me.) + * + * @param {String} col Sample metadata column + * + * @return {Object} fID2freqs + * + * @throws {Error} If the sample metadata column is unrecognized. + */ BIOMTable.prototype.getFrequencyMap = function (col) { var scope = this; - var colIdx = _.indexOf(this._smCols, col); + var colIdx = this._getSampleMetadataColIndex(col); var fIdx2counts = []; var fIdx2sampleCt = []; - var containingSampleCount; - var cVal; + var containingSampleCount, cVal, cValIdx; - // Find unique (sorted) values in this sample metadata column + // Find unique (sorted) values in this sample metadata column; map + // sample metadata values to a consistent index. (Using an index to + // store this data means we can store the sample metadata values for + // each feature in an Array rather than in an Object for now.) var uniqueSMVals = this.getUniqueSampleValues(col); var numUniqueSMVals = uniqueSMVals.length; var smVal2Idx = {}; _.each(uniqueSMVals, function (smVal, c) { smVal2Idx[smVal] = c; }); - // Assign each feature an empty frequency array, soon to be filled in + + // Assign each feature an empty counts array with all 0s. Also set + // things up so we can keep track of the total number of samples + // containing each feature easily. var i, emptyCounts; _.each(this._fIDs, function (fID, fIdx) { emptyCounts = []; @@ -505,19 +535,22 @@ define(["underscore", "util"], function (_, util) { fIdx2counts.push(emptyCounts); fIdx2sampleCt.push(0); }); - // Iterate through each sample of the BIOM table, storing group counts - // and total sample counts for each feature + + // Iterate through each sample of the BIOM table, storing unique s.m. + // value counts and total sample counts for each feature var cValIdx; _.each(this._tbl, function (presentFeatureIndices, sIdx) { // Figure out what metadata value this sample has at the column. cVal = scope._sm[sIdx][colIdx]; cValIdx = smVal2Idx[cVal]; - // Increment group counts for each feature present in this sample + // Increment s.m. value counts for each feature present in this + // sample _.each(presentFeatureIndices, function (fIdx) { fIdx2counts[fIdx][cValIdx]++; fIdx2sampleCt[fIdx]++; }); }); + // Convert counts to frequencies // Also, return an Object where the keys are feature IDs, rather than // an Array using feature indices diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index ab1cbc7e5..86eeebce4 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -1139,6 +1139,7 @@ define([ ); var colorer = new Colorer(layer.colorBySMColorMap, sortedUniqueValues); var sm2color = colorer.getMapRGB(); + // Do most of the hard work: compute the frequencies for each tip var feature2freqs = this._biom.getFrequencyMap(layer.colorBySMField); // Bar thickness var halfyrscf = this._yrscf / 2; From 3cb4771f3bd05bab48e6bd3f936894dd1ff29af8 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 19:19:47 -0700 Subject: [PATCH 11/26] DOC: add extra context to sm barplot drawing func --- empress/support_files/js/empress.js | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index 86eeebce4..f01d2e8f3 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -1156,10 +1156,26 @@ define([ continue; } var prevSectionMaxX = prevLayerMaxX; + // NOTE: currently we iterate through all of sortedUniqueValues + // once for every tip in the table, detecting unique values + // where no samples contain this tip using the + // !_.isUndefined() check. The reason we do things this way is + // that we want to ensure that unique values are processed in + // the same order for every tip. + // + // Ideally we'd skip having to do this full iteration, though, + // and only look at the unique values containing this tip from + // the start (saving time). This might require refactoring the + // output of BiomTable.getFrequencyMap(), though. for (var v = 0; v < sortedUniqueValues.length; v++) { var smVal = sortedUniqueValues[v]; var freq = freqs[smVal]; - if (freq > 0) { + // Ignore sample metadata values where no sample with this + // value contains this tip. We can detect this using + // !_.isUndefined() because freqs should only include + // entries for metadata values where this feature is + // present in at least one sample with that value. + if (!_.isUndefined(freq)) { var sectionColor = sm2color[smVal]; // Assign each unique sample metadata value a length // proportional to its, well, proportion within the sample From 65dfcc2ab4715367a0bbf5e8873d01158ccf0889 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Thu, 6 Aug 2020 19:41:28 -0700 Subject: [PATCH 12/26] STY: fix redundant variable declaration --- empress/support_files/js/biom-table.js | 1 - 1 file changed, 1 deletion(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 6a408ea3d..4ac9a23e6 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -538,7 +538,6 @@ define(["underscore", "util"], function (_, util) { // Iterate through each sample of the BIOM table, storing unique s.m. // value counts and total sample counts for each feature - var cValIdx; _.each(this._tbl, function (presentFeatureIndices, sIdx) { // Figure out what metadata value this sample has at the column. cVal = scope._sm[sIdx][colIdx]; From f08638d6febd9e617a7f622c538e4b37f79d0368 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Fri, 7 Aug 2020 14:11:37 -0700 Subject: [PATCH 13/26] TST: test getFrequencyMap() - close #298 --- tests/test-biom-table.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test-biom-table.js b/tests/test-biom-table.js index 1da2e65f8..a3b094ea8 100644 --- a/tests/test-biom-table.js +++ b/tests/test-biom-table.js @@ -690,5 +690,19 @@ require(["jquery", "underscore", "BiomTable"], function ($, _, BiomTable) { "Test: error thrown if unrecognized metadata col passed" ); }); + test("Test getFrequencyMap", function () { + deepEqual(this.biomTable.getFrequencyMap("f1"), { + o1: { a: 1 }, + o2: { a: 0.5, c: 0.5 }, + o3: { a: 0.5, c: 0.5 }, + o4: { a: 0.5, b: 0.5 }, + o5: { a: 2 / 3, b: 1 / 3 }, + o6: { a: 0.5, c: 0.5 }, + o7: { a: 1 }, + o8: { b: 1 }, + o9: { a: 1 }, + o10: { a: 1 }, + }); + }); }); }); From e91538aaa43a7ddc12f1b4f16756e86f160c726e Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Fri, 7 Aug 2020 14:11:52 -0700 Subject: [PATCH 14/26] DOC: document getfrequencymap output a bit more --- empress/support_files/js/biom-table.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 4ac9a23e6..125cd9c08 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -551,8 +551,9 @@ define(["underscore", "util"], function (_, util) { }); // Convert counts to frequencies - // Also, return an Object where the keys are feature IDs, rather than - // an Array using feature indices + // Also, return an Object where the keys are feature IDs pointing to + // other Objects where the keys are sample metadata values, rather than + // a 2D array (which is how fIdx2counts has been stored) var fID2freqs = {}; var totalSampleCount; _.each(this._fIDs, function (fID, fIdx) { From 7083d6715b9b20f085182ee0db3ea531d7ac6ca7 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Fri, 7 Aug 2020 17:41:00 -0700 Subject: [PATCH 15/26] grammar --- empress/support_files/js/empress.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index f01d2e8f3..5245ca2c5 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -682,7 +682,7 @@ define([ this.getNodeInfo(rNode, "highestchildyr") ); } - // iterate throught the tree in postorder, skip root + // iterate through the tree in postorder, skip root for (var i = 1; i < tree.size; i++) { // name of current node var nodeInd = i; From 373583263532adc7e760e0d8e34960c6e5d217e5 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Fri, 7 Aug 2020 18:01:58 -0700 Subject: [PATCH 16/26] Add extra freqmap test --- tests/test-biom-table.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test-biom-table.js b/tests/test-biom-table.js index a3b094ea8..5d7ba392c 100644 --- a/tests/test-biom-table.js +++ b/tests/test-biom-table.js @@ -703,6 +703,18 @@ require(["jquery", "underscore", "BiomTable"], function ($, _, BiomTable) { o9: { a: 1 }, o10: { a: 1 }, }); + deepEqual(this.biomTable.getFrequencyMap("f4"), { + o1: { 4: 0.5, 3: 0.5 }, + o2: { 4: 0.5, 1: 0.5 }, + o3: { 3: 0.5, 1: 0.5 }, + o4: { 4: 0.5, 2: 0.5 }, + o5: { 4: 1 / 3, 3: 1 / 3, 5: 1 / 3 }, + o6: { 3: 0.5, 1: 0.5 }, + o7: { 4: 0.5, 3: 0.5 }, + o8: { 2: 0.5, 5: 0.5 }, + o9: { 3: 1 }, + o10: { 4: 1 }, + }); }); }); }); From 1ece062459a81f744c5249ef0c4bd3b250090263 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Fri, 7 Aug 2020 18:16:05 -0700 Subject: [PATCH 17/26] DOC: improve uniqueVal docs in sm barplot drawing --- empress/support_files/js/empress.js | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index 5245ca2c5..f71016fa0 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -1157,11 +1157,14 @@ define([ } var prevSectionMaxX = prevLayerMaxX; // NOTE: currently we iterate through all of sortedUniqueValues - // once for every tip in the table, detecting unique values - // where no samples contain this tip using the - // !_.isUndefined() check. The reason we do things this way is - // that we want to ensure that unique values are processed in - // the same order for every tip. + // once for every tip in the table, detecting and skipping + // unique values where no samples contain this tip. + // The reason we do things this way, rather than just + // iterating directly over the keys of this tip's Object within + // the frequency map, is that we want to ensure that unique + // values are processed in the same order for every tip (so for + // a "body site" barplot you'd always see e.g. gut, left palm, + // right palm, tongue in that order). // // Ideally we'd skip having to do this full iteration, though, // and only look at the unique values containing this tip from From f685f9981bf8c815c009b809bf12aee7b86dcfe3 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Fri, 7 Aug 2020 18:16:30 -0700 Subject: [PATCH 18/26] TST: expand getFrequencyMap() tests --- tests/test-biom-table.js | 108 ++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 24 deletions(-) diff --git a/tests/test-biom-table.js b/tests/test-biom-table.js index 5d7ba392c..f76837d03 100644 --- a/tests/test-biom-table.js +++ b/tests/test-biom-table.js @@ -691,30 +691,90 @@ require(["jquery", "underscore", "BiomTable"], function ($, _, BiomTable) { ); }); test("Test getFrequencyMap", function () { - deepEqual(this.biomTable.getFrequencyMap("f1"), { - o1: { a: 1 }, - o2: { a: 0.5, c: 0.5 }, - o3: { a: 0.5, c: 0.5 }, - o4: { a: 0.5, b: 0.5 }, - o5: { a: 2 / 3, b: 1 / 3 }, - o6: { a: 0.5, c: 0.5 }, - o7: { a: 1 }, - o8: { b: 1 }, - o9: { a: 1 }, - o10: { a: 1 }, - }); - deepEqual(this.biomTable.getFrequencyMap("f4"), { - o1: { 4: 0.5, 3: 0.5 }, - o2: { 4: 0.5, 1: 0.5 }, - o3: { 3: 0.5, 1: 0.5 }, - o4: { 4: 0.5, 2: 0.5 }, - o5: { 4: 1 / 3, 3: 1 / 3, 5: 1 / 3 }, - o6: { 3: 0.5, 1: 0.5 }, - o7: { 4: 0.5, 3: 0.5 }, - o8: { 2: 0.5, 5: 0.5 }, - o9: { 3: 1 }, - o10: { 4: 1 }, - }); + deepEqual( + this.biomTable.getFrequencyMap("f1"), + { + o1: { a: 1 }, + o2: { a: 0.5, c: 0.5 }, + o3: { a: 0.5, c: 0.5 }, + o4: { a: 0.5, b: 0.5 }, + o5: { a: 2 / 3, b: 1 / 3 }, + o6: { a: 0.5, c: 0.5 }, + o7: { a: 1 }, + o8: { b: 1 }, + o9: { a: 1 }, + o10: { a: 1 }, + }, + "Test frequency map for field f1" + ); + deepEqual( + this.biomTable.getFrequencyMap("f4"), + { + o1: { 4: 0.5, 3: 0.5 }, + o2: { 4: 0.5, 1: 0.5 }, + o3: { 3: 0.5, 1: 0.5 }, + o4: { 4: 0.5, 2: 0.5 }, + o5: { 4: 1 / 3, 3: 1 / 3, 5: 1 / 3 }, + o6: { 3: 0.5, 1: 0.5 }, + o7: { 4: 0.5, 3: 0.5 }, + o8: { 2: 0.5, 5: 0.5 }, + o9: { 3: 1 }, + o10: { 4: 1 }, + }, + "Test frequency map for field f4" + ); + + var smolTable = new BiomTable( + ["s1", "s2", "s3"], + ["o1", "o2", "o3", "o4"], + { s1: 0, s2: 1, s3: 2 }, + { o1: 0, o2: 1, o3: 2, o4: 3 }, + [ + [0, 1], + [2, 3], + [0, 3], + ], + ["f1"], + [["m"], ["m"], ["m"]] + ); + deepEqual( + smolTable.getFrequencyMap("f1"), + { + o1: { m: 1 }, + o2: { m: 1 }, + o3: { m: 1 }, + o4: { m: 1 }, + }, + "Test frequency map when all features unique to same group" + ); + + var funkyTable = new BiomTable( + ["s1", "s2", "s3"], + ["o1", "o2", "o3"], + { s1: 0, s2: 1, s3: 2 }, + { o1: 0, o2: 1, o3: 2 }, + [[0], [1], [2]], + ["f1"], + [["x"], ["y"], ["z"]] + ); + deepEqual( + funkyTable.getFrequencyMap("f1"), + { + o1: { x: 1 }, + o2: { y: 1 }, + o3: { z: 1 }, + }, + "Test frequency map when all features unique to different group" + ); + + var scope = this; + throws( + function () { + scope.biomTable.getFrequencyMap("badfield"); + }, + /Sample metadata column "badfield" not in BIOM table./, + "Test error thrown if unrecognized metadata col passed" + ); }); }); }); From 7890a17d47a9fd261802e36d46276b7000d49dc7 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Tue, 11 Aug 2020 11:59:00 -0700 Subject: [PATCH 19/26] Update empress/support_files/js/biom-table.js Co-authored-by: kwcantrell --- empress/support_files/js/biom-table.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 125cd9c08..e14db85b6 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -508,8 +508,8 @@ define(["underscore", "util"], function (_, util) { BIOMTable.prototype.getFrequencyMap = function (col) { var scope = this; var colIdx = this._getSampleMetadataColIndex(col); - var fIdx2counts = []; - var fIdx2sampleCt = []; + var fIdx2Counts = []; + var fIdx2SampleCt = []; var containingSampleCount, cVal, cValIdx; // Find unique (sorted) values in this sample metadata column; map From 2b8626d4ec7b7578c432436cc0bc4c51a5928d19 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Tue, 11 Aug 2020 11:59:51 -0700 Subject: [PATCH 20/26] MNT: rename remaining fIdx2... vars --- empress/support_files/js/biom-table.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index e14db85b6..f4c972fe2 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -532,8 +532,8 @@ define(["underscore", "util"], function (_, util) { for (i = 0; i < numUniqueSMVals; i++) { emptyCounts.push(0); } - fIdx2counts.push(emptyCounts); - fIdx2sampleCt.push(0); + fIdx2Counts.push(emptyCounts); + fIdx2SampleCt.push(0); }); // Iterate through each sample of the BIOM table, storing unique s.m. @@ -545,21 +545,21 @@ define(["underscore", "util"], function (_, util) { // Increment s.m. value counts for each feature present in this // sample _.each(presentFeatureIndices, function (fIdx) { - fIdx2counts[fIdx][cValIdx]++; - fIdx2sampleCt[fIdx]++; + fIdx2Counts[fIdx][cValIdx]++; + fIdx2SampleCt[fIdx]++; }); }); // Convert counts to frequencies // Also, return an Object where the keys are feature IDs pointing to // other Objects where the keys are sample metadata values, rather than - // a 2D array (which is how fIdx2counts has been stored) + // a 2D array (which is how fIdx2Counts has been stored) var fID2freqs = {}; var totalSampleCount; _.each(this._fIDs, function (fID, fIdx) { - totalSampleCount = fIdx2sampleCt[fIdx]; + totalSampleCount = fIdx2SampleCt[fIdx]; fID2freqs[fID] = {}; - _.each(fIdx2counts[fIdx], function (count, smValIdx) { + _.each(fIdx2Counts[fIdx], function (count, smValIdx) { if (count > 0) { fID2freqs[fID][uniqueSMVals[smValIdx]] = count / totalSampleCount; From 5fa827b88159470c10928c4a9748f4c8d7795c21 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Tue, 11 Aug 2020 12:00:31 -0700 Subject: [PATCH 21/26] MNT: fID2freqs -> fID2Freqs --- empress/support_files/js/biom-table.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index f4c972fe2..842564fee 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -501,7 +501,7 @@ define(["underscore", "util"], function (_, util) { * * @param {String} col Sample metadata column * - * @return {Object} fID2freqs + * @return {Object} fID2Freqs * * @throws {Error} If the sample metadata column is unrecognized. */ @@ -554,19 +554,19 @@ define(["underscore", "util"], function (_, util) { // Also, return an Object where the keys are feature IDs pointing to // other Objects where the keys are sample metadata values, rather than // a 2D array (which is how fIdx2Counts has been stored) - var fID2freqs = {}; + var fID2Freqs = {}; var totalSampleCount; _.each(this._fIDs, function (fID, fIdx) { totalSampleCount = fIdx2SampleCt[fIdx]; - fID2freqs[fID] = {}; + fID2Freqs[fID] = {}; _.each(fIdx2Counts[fIdx], function (count, smValIdx) { if (count > 0) { - fID2freqs[fID][uniqueSMVals[smValIdx]] = + fID2Freqs[fID][uniqueSMVals[smValIdx]] = count / totalSampleCount; } }); }); - return fID2freqs; + return fID2Freqs; }; return BIOMTable; From dc6476b3711e7a3d64d9120fdee6337e589df53d Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Tue, 11 Aug 2020 12:23:33 -0700 Subject: [PATCH 22/26] DOC: more explicit about smb proportions in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 24bd76499..160bb7572 100644 --- a/README.md +++ b/README.md @@ -171,17 +171,17 @@ We have a new layer to work with! One thing we might be interested in doing is seeing what types of samples contain each tip. This is possible using the _Sample Metadata Coloring_ functionality described above, but this only lets us see information about tips that are unique to a given sample metadata category -- and in practice many tips are often shared between multiple metadata categories, complicating things. -Let's revisit our analysis above of which tips are unique to which body sites in this dataset -- now, we'll instead be asking the related question of "which tips are most common in the body sites in this dataset?" To investigate this, we'll use our new barplot layer to show this information. +Let's revisit our analysis above of which tips are unique to which body sites in this dataset -- now, we'll instead be asking the related question of "which tips are most frequently seen in which body sites in this dataset?" To investigate this, we'll use our new barplot layer to show this information. In order to do this, we'll need to change our new layer ("Layer 2") from a feature metadata layer to a sample metadata layer. You can do this by clicking on the _Sample Metadata_ button underneath the text "Layer 2". The controls available for this barplot layer should change; in order to show sample presence information for body sites, change the _Show sample info for..._ drop-down menu to `body-site`. Try clicking _Update_ to see what our new Layer 2 looks like. ![empress barplots: class coloring layer 1, bodysite layer 2, and tree phylum coloring](docs/moving-pictures/img/empress_barplots_6.png) -Layer 2 now shows a stacked barplot for each tip. The colors used in this barplot are the same as with sample metadata coloring -- red corresponds to gut samples, green corresponds to tongue samples, etc. When we zoom in, we can see things in detail (you may want to re-color the tree using the _Sample Metadata Coloring_ controls and `body-site` so you can see what these colors mean): +Layer 2 now shows a stacked barplot for each tip, based on the proportions of sample groups containing a given tip. The colors used in this barplot are the same as with sample metadata coloring -- red corresponds to gut samples, green corresponds to tongue samples, etc. When we zoom in, we can see things in detail (you may want to re-color the tree using the _Sample Metadata Coloring_ controls and `body-site` so you can see what these colors mean): ![empress barplots: zoomed in on barplots: class coloring layer 1, bodysite layer 2](docs/moving-pictures/img/empress_barplots_7.png) -The top-most tip is only present in right palm samples, the second-from-the-top tip is only present in gut samples, and so on. The length taken up by a "block" for a given tip is proportional to how many samples of that type contain the tip -- so the fourth-from-the-top tip, for example, is present in mostly tongue samples but also present (albeit in fewer) left palm samples. When we click on this tip (name `9f1913b781d2cde1c8a4c57b7dc2ab83`) in the tree, we can see that this matches up with the _Sample Presence Information_ `body-site` summary for this tip: it's present in 2 tongue samples and 1 left palm sample. +The top-most tip is only present in right palm samples, the second-from-the-top tip is only present in gut samples, and so on. The length taken up by a "block" for a given tip is proportional to how many samples of that type contain the tip (relative to the total number of samples containing the tip; it's [not absolute](https://github.com/biocore/empress/issues/322)) -- so the fourth-from-the-top tip, for example, is present in mostly tongue samples but also present (albeit in fewer) left palm samples. When we click on this tip (name `9f1913b781d2cde1c8a4c57b7dc2ab83`) in the tree, we can see that this matches up with the _Sample Presence Information_ `body-site` summary for this tip: it's present in 2 tongue samples and 1 left palm sample. This was a brief introduction to some of the barplot functionality available in Empress. There's a lot more that hasn't been documented here -- scaling bars' lengths by a continuous feature metadata field, coloring bars by a continuous feature metadata field (e.g. an importance score or [Songbird](https://github.com/biocore/songbird/)/[ALDEx2](https://www.bioconductor.org/packages/release/bioc/html/ALDEx2.html)-style feature differential), adjusting the default colors or lengths of bars, and so on. We encourage you to try things out; feel free to contact us if you have any questions! From 3aeec27a0e8569ddc96bc600d57c5408ea6792cf Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Tue, 11 Aug 2020 12:26:13 -0700 Subject: [PATCH 23/26] DOC: clarify presentFeatureIndices usage a bit? --- empress/support_files/js/biom-table.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index 842564fee..b6fa7f131 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -536,8 +536,9 @@ define(["underscore", "util"], function (_, util) { fIdx2SampleCt.push(0); }); - // Iterate through each sample of the BIOM table, storing unique s.m. - // value counts and total sample counts for each feature + // Iterate through each the feature presence data for each sample in + // the BIOM table, storing unique s.m. value counts and total sample + // counts for each feature _.each(this._tbl, function (presentFeatureIndices, sIdx) { // Figure out what metadata value this sample has at the column. cVal = scope._sm[sIdx][colIdx]; From d2b7d37147e9ba73d1382de0a8ce40fdd25370b8 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Tue, 11 Aug 2020 12:37:47 -0700 Subject: [PATCH 24/26] DOC: add note re a future optimization #313 --- empress/support_files/js/biom-table.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/empress/support_files/js/biom-table.js b/empress/support_files/js/biom-table.js index b6fa7f131..db51a4ec3 100644 --- a/empress/support_files/js/biom-table.js +++ b/empress/support_files/js/biom-table.js @@ -554,7 +554,13 @@ define(["underscore", "util"], function (_, util) { // Convert counts to frequencies // Also, return an Object where the keys are feature IDs pointing to // other Objects where the keys are sample metadata values, rather than - // a 2D array (which is how fIdx2Counts has been stored) + // a 2D array (which is how fIdx2Counts has been stored). + // + // TODO: It should be possible to return a 2D array without + // constructing an Object, which would save some space. This would + // require decently substantial refactoring of the tests / of + // Empress.addSMBarplotLayerCoords(), but if this gets to be too + // inefficient for large trees it's an option. var fID2Freqs = {}; var totalSampleCount; _.each(this._fIDs, function (fID, fIdx) { From 80770a8755c0a03a77f46627b9d6c08b3d0ecff6 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Tue, 11 Aug 2020 13:26:01 -0700 Subject: [PATCH 25/26] MNT: Simplify iteration in SM barplot drawing Addresses @kwcantrell comment in #313 --- empress/support_files/js/empress.js | 115 ++++++++++++++-------------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index f71016fa0..2eb087c65 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -1139,67 +1139,70 @@ define([ ); var colorer = new Colorer(layer.colorBySMColorMap, sortedUniqueValues); var sm2color = colorer.getMapRGB(); - // Do most of the hard work: compute the frequencies for each tip + // Do most of the hard work: compute the frequencies for each tip (only + // the tips present in the BIOM table, that is) var feature2freqs = this._biom.getFrequencyMap(layer.colorBySMField); // Bar thickness var halfyrscf = this._yrscf / 2; - for (i = 1; i < this._tree.size; i++) { - if (this._tree.isleaf(this._tree.postorderselect(i))) { - var node = this._treeData[i]; - var name = this.getNodeInfo(node, "name"); - var freqs = feature2freqs[name]; - // Don't draw bars for tips that aren't in the BIOM table - // (Note that this is only for the sample metadata barplots -- - // these tips could still ostensibly have associated - // feature metadata) - if (_.isUndefined(freqs)) { - continue; - } - var prevSectionMaxX = prevLayerMaxX; - // NOTE: currently we iterate through all of sortedUniqueValues - // once for every tip in the table, detecting and skipping - // unique values where no samples contain this tip. - // The reason we do things this way, rather than just - // iterating directly over the keys of this tip's Object within - // the frequency map, is that we want to ensure that unique - // values are processed in the same order for every tip (so for - // a "body site" barplot you'd always see e.g. gut, left palm, - // right palm, tongue in that order). - // - // Ideally we'd skip having to do this full iteration, though, - // and only look at the unique values containing this tip from - // the start (saving time). This might require refactoring the - // output of BiomTable.getFrequencyMap(), though. - for (var v = 0; v < sortedUniqueValues.length; v++) { - var smVal = sortedUniqueValues[v]; - var freq = freqs[smVal]; - // Ignore sample metadata values where no sample with this - // value contains this tip. We can detect this using - // !_.isUndefined() because freqs should only include - // entries for metadata values where this feature is - // present in at least one sample with that value. - if (!_.isUndefined(freq)) { - var sectionColor = sm2color[smVal]; - // Assign each unique sample metadata value a length - // proportional to its, well, proportion within the sample - // presence information for this tip. - var barSectionLen = layer.lengthSM * freq; - var thisSectionMaxX = prevSectionMaxX + barSectionLen; - var y = this.getY(node); - var ty = y + halfyrscf; - var by = y - halfyrscf; - var corners = { - tL: [prevSectionMaxX, ty], - tR: [thisSectionMaxX, ty], - bL: [prevSectionMaxX, by], - bR: [thisSectionMaxX, by], - }; - this._addTriangleCoords(coords, corners, sectionColor); - prevSectionMaxX = thisSectionMaxX; - } + // For each tip in the BIOM table... + // (We implicitly ignore [and don't draw anything for] tips that + // *aren't* in the BIOM table.) + _.each(feature2freqs, function(freqs, tipName) { + // Get the tree data for this tip. + // We can just get the 0-th key because tip names are guaranteed to + // be unique, so the nameToKeys entry for a tip name should be an + // array with 1 element. + var node = scope._treeData[scope._nameToKeys[tipName][0]]; + + // This variable defines the left x-coordinate for drawing the next + // "section" of the stacked barplot. It'll be updated as we iterate + // through the unique values in this sample metadata field below. + var prevSectionMaxX = prevLayerMaxX; + + // For each unique value for this sample metadata field... + // NOTE: currently we iterate through all of sortedUniqueValues + // once for every tip in the table, detecting and skipping + // unique values where no samples contain this tip. + // The reason we do things this way, rather than just + // iterating directly over the keys of this tip's Object within + // the frequency map, is that we want to ensure that unique + // values are processed in the same order for every tip (so for + // a "body site" barplot you'd always see e.g. gut, left palm, + // right palm, tongue in that order). + // + // Ideally we'd skip having to do this full iteration, though, + // and only look at the unique values containing this tip from + // the start (saving time). This might require refactoring the + // output of BiomTable.getFrequencyMap(), though. + for (var v = 0; v < sortedUniqueValues.length; v++) { + var smVal = sortedUniqueValues[v]; + var freq = freqs[smVal]; + // Ignore sample metadata values where no sample with this + // value contains this tip. We can detect this using + // !_.isUndefined() because freqs should only include + // entries for metadata values where this feature is + // present in at least one sample with that value. + if (!_.isUndefined(freq)) { + var sectionColor = sm2color[smVal]; + // Assign each unique sample metadata value a length + // proportional to its, well, proportion within the sample + // presence information for this tip. + var barSectionLen = layer.lengthSM * freq; + var thisSectionMaxX = prevSectionMaxX + barSectionLen; + var y = scope.getY(node); + var ty = y + halfyrscf; + var by = y - halfyrscf; + var corners = { + tL: [prevSectionMaxX, ty], + tR: [thisSectionMaxX, ty], + bL: [prevSectionMaxX, by], + bR: [thisSectionMaxX, by], + }; + scope._addTriangleCoords(coords, corners, sectionColor); + prevSectionMaxX = thisSectionMaxX; } } - } + }); // The bar lengths are identical for all tips in this layer, so no need // to do anything fancy to compute the maximum X coordinate. return prevLayerMaxX + layer.lengthSM; From 472a24824461d3e44331e00af95b7b5f3ba6f43a Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Tue, 11 Aug 2020 13:52:44 -0700 Subject: [PATCH 26/26] STY: prettify --- empress/support_files/js/empress.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/empress/support_files/js/empress.js b/empress/support_files/js/empress.js index 2eb087c65..6544c7447 100644 --- a/empress/support_files/js/empress.js +++ b/empress/support_files/js/empress.js @@ -1147,7 +1147,7 @@ define([ // For each tip in the BIOM table... // (We implicitly ignore [and don't draw anything for] tips that // *aren't* in the BIOM table.) - _.each(feature2freqs, function(freqs, tipName) { + _.each(feature2freqs, function (freqs, tipName) { // Get the tree data for this tip. // We can just get the 0-th key because tip names are guaranteed to // be unique, so the nameToKeys entry for a tip name should be an