diff --git a/appendix.Rmd b/appendix.Rmd index 349ed9a..a057d7a 100644 --- a/appendix.Rmd +++ b/appendix.Rmd @@ -1,76 +1,125 @@ # (APPENDIX) Appendix {-} -# Details of MGS model {#model-details} +# Model details {#model-details} -- Give the model in terms of arbitrary taxa -- Consider effect of variation in efficiency within a taxon -- Discuss assumption about efficiencies being constant at the species level +This appendix provides a more detailed description of our deterministic model of the microbiome measurement process, which it uses to derive some of the additional claims of the main text. -# Methods for assaying absolute densities {#review-absolute-methods} +## MGS measurement -# Taxonomic bias, counting noise, and zeros +This section expands the description of the model in the main text to more explicitly lay out our assumptions and to consider the effect of variation in measurement efficiencies within a taxonomic group on its MGS measurement. -- Motivation/intro P +A taxon can be any group of organisms. +For any taxon $I$ and sample $a$, we define the absolute efficiency of that taxon and that sample as the number of reads assigned to that taxon divided by the number of cells of that taxon in the given sample. +We assume that these assigned reads really do come from that particular taxon and sample (though this assumption is often violated in practice). -Simplest model: deterministic bias model + multinomial sampling. -Number of reads assigned to a species as multinomial with a given total read count for the sample, and a the biased proportions as input. -(Can treat this as a heuristic model, or can give an actual justification as to why this makes sense even for bioinformatic bias.) +The efficiency of a taxon $I$ is the average number of reads produced by each of its constituent cells. +This number varies due to + +1. Genetic variation among cells +1. Non-genetic variation among cells +1. Randomness in the experimental process + +Variation encoded in the genome of cells determines properties that affect its efficiency, such as how easy it is to lyse, how easily PCR primers will bind to their target locus, how many copies of a marker gene are in the genome, and whether its sequencing reads can be taxonomically assigned. +This variation created by genetically encoded traits is what we refer to as 'taxonomic bias' and is the primary focus of this article. +In principle, even a single nucleotide change (e.g. in a primer-binding site) could affect the efficiency of a cell. +Nevertheless, cells will more similar genomes will tend to have more similar genetically-encoded efficiencies. +For the purposes of our analysis, we suppose that cells within the same species have approximately identical genetically-encoded efficiencies. +The impact of violations in this assumption can be understood using the approach we describe below for taxa above the species level. +More generally, our results can be read as applying to the finest taxonomic unit that is reported by the given MGS method. + +Genetically identical cells can differ in cell state such that their efficiencies vary significantly. +An extreme example is the effect of sporulation: Spores are generally harder to lyse than vegetative cells, and @vaninsberghe2020diar found that the efficiency of a common DNA extraction protocol was 800X on vegetative cells vs spores of a strain of _Clostridium difficile_. +Physiological differences might also arise simply due where they are in the cell cycle. +To the extent that the cells of a given genotype tend to have a similar distribution of cell states across samples, the efficiency of the genotype will remain stable. +We assume this stability throughout our analysis. +Violations in this assumption can again be understood using an approach similar to that we use to considering taxa above the species level. + +The final number of reads also varies due to idiosyncratic events that we typically think of (and model) as 'random', such as the precise handling of a sample during pipetting or loading of a sequencing flow cell. +Such variation may or may not be dwarfed by the 'random' variation in the sample composition (i.e., the variation not explained by the covariates in a regression analysis). +For simplicity, we ignore this random variation when considering the impact of taxonomic bias on MGS measurement. +The effect of random variation in regression analysis is addressed in Appendix +\@ref(appendix-regression) and the effect of random counting error is discussed in Section \@ref(implications). + +The absolute efficiency of a taxon can also vary simply because more effort is put towards sequencing that sample, either intentionally or unintentionally. +The sequencing effort might depend on the taxonomic composition and bias of a sample. +For example, a sample with higher output DNA will often be diluted more prior to amplification or sequencing. +The critical assumption we make regarding sequencing effort is that it affects all species in the sample equally and hence doesn't affect their relative abundances. + +With these assumptions in place, we can partition the multiplicative difference between the reads assigned to a taxon and its density in the source sample into a taxon-specific factor and a sample-specific factor. +There is an extra degree of freedom, which we settle by equating the taxon-specific factors with relative measurement efficiencies and arbitrarily choose the relative efficiency of a particular species to equal 1. + + +Our model for the read counts of a species $i$ in sample $a$ can thus be expressed mathematically as \begin{align} -\text{reads}_i(a) - &= \text{Multinomial}\left( \text{total reads}(a), \text{prop}_{i}(a) \times \frac{\text{efficiency}_{i}}{\text{mean efficiency}(a)} \right). + \text{reads}_{i}(a) + = \text{density}_{i}(a) \cdot \text{efficiency}_{i} \cdot \text{effort}(a). \end{align} -The expected read count (conditional on read total) is +Note that the species' efficiencies are properties of the species $i$ but not the sample $a$. +Similarly, let $\text{density}_{I}(a)$, $\text{reads}_{I}(a)$, and $\text{efficiency}_{I}(a)$ denote the density, read count, and relative efficiency of an arbitrary group of species $I$ in sample $a$. +The read count for taxon $I$ in sample $a$ can be written \begin{align} -E\left[\text{reads}_i(a)\right] = \text{total reads}(a) \times \text{prop}_{i}(a) \times \frac{\text{efficiency}_{i}}{\text{mean efficiency}(a)}. + (\#eq:measurement-model-general) + \text{reads}_{I}(a) + = \text{density}_{I}(a) \cdot \text{efficiency}_{I}(a) \cdot \text{effort}(a). \end{align} - -(Note, might be simpler to use a Poisson model, especially for overdispersion case.) - -Zero counts are likely when the expected read count is $\lesssim 1$. -The expected read count is lower and hence the likelihood of zeros is much higher for species with efficiencies substantially below the sample mean. -Whereas in the absence of taxonomic bias, an observation of zero is taken as evidence that the actual proportion of a species is similar or less than the reciprocal of the total read count, with taxonomic bias, it instead implies only that +The efficiency of taxon $I$ in sample $a$ equals the weighted average of its constituent species, \begin{align} - \text{prop}_{i}(a) - \lesssim \frac{1}{ \text{total reads}(a) } - \times \frac{\text{mean efficiency}(a)}{ \text{efficiency}_{i}}. + (\#eq:efficiency-general) + \text{efficiency}_I(a) + \equiv \frac{\sum_{i\in I}\text{density}_i(a)\cdot \text{efficiency}_i}{\text{density}_I(a)}. \end{align} +The ratio between the read counts of two taxa $I$ and $J$ is +\begin{align} + (\#eq:ratio-error-general) + \widehat{\text{ratio}}_{I/J}(a) + &= \frac{\text{density}_{I}(a)}{\text{density}_{J}(a)} \cdot \frac{\text{efficiency}_{I}(a)}{\text{efficiency}_{J}(a)}. +\end{align} +Equation \@ref(eq:prop-error) for the error in a species' proportion and Equation \@ref(eq:ratio-error) for the error in the ratio between two species both follow directly from this more general expression. -The likelihood of a zero for a given taxon varies with the mean efficiency of a sample. -For example, when the vaginal microbiome shifts from Lactobacillus to Gardnerella dominance and the mean efficiency drops by 10X in figure x, it becomes ... -This becomes problematic for presence/absense analysis. -But it is also an issue for analysis of log proportions, log ratios, or log densities, in a manner that depends on how the DA method handles zeros. -A simple and common approach is to ignore the uncertainty from the counting process, and opt to replace zeros with some defined, small value (either in the counts or in the proportions). -(TODO: implications) -Some statistical packages (e.g. corncob, fido) take a more sophisticated approach and directly account for the uncertainty by modeling the counting process. -For these methods, bias against an organism can cause reduced precision, in a sample-dependent manner. - -- Maybe: implications of overdispersion / multiplicative noise prior to counting noise. -- Note, the above is for regression analysis, but looking at two-sample case is also illustrative. Can see a case where having a zero or very small proportion (if overdispersed) makes it very hard to -- Consider case of a zero observation; how to interpret, in the light of taxonomic bias? -- Limit of zero efficiency -> a value of zero reveals no information about the taxon. -- Maybe: Implications for presence/absense analysis +### Revised main text equations -# Taxonomic bias in total-density measurements {#appendix-total-density} +Let $S$ be the set of all species. +Then +\begin{align} + \text{reads}_S(a) + = \text{density}_S(a) \cdot \text{efficiency}_S(a) \cdot \text{effort}(a), +\end{align} +where +\begin{align} + \text{efficiency}_S(a) + \equiv \frac{\sum_{j\in S}\text{density}_j(a)\cdot \text{efficiency}_j}{\text{density}_S(a)} +\end{align} +Also, +\begin{align} + \widehat{\text{prop}}_{i}(a) = \frac{\text{reads}_i(a)}{\text{reads}_S(a)}. +\end{align} +and +\begin{align} + \widehat{\text{prop}}_{i}(a) + &= \text{prop}_{i}(a) \cdot \frac{\text{efficiency}_{i}}{\text{efficiency}_S(a)}. +\end{align} -A noted in the main text, we should expect species-specific efficiencies for total density estimates. -State the model: Assume constant absolute measurement efficiencies, $\text{efficiency}^{\text{tot}}_i$, which denote the absolute efficiency with which a cell of species $i$ contributes to the measurement of total density in sample $a$. -This implies consistent relative efficiencies and that density measurement is linear, in the sense that a doubling of the actual density when species composition is fixed always leads to a doubling in the measured density. -Absolute because we're not considering a compositional measurement; the scale of the species' efficiencies does impact the measured density, though not the fold variation across samples. +## Taxonomic bias in total-density measurement {#total-density-bias} -How do these species specific efficiencies impact the total density measurement? -We can write the measured total density in terms of the species' efficiencies as +To understand the impact of taxonomic bias in the total-density measurement on community normalization, we model error in total-density measurements similarly to that for MGS measurements. +Let $\widehat{\text{density}}_{S}(a)$ be the measurement of the density of all species $S$. +We suppose that this number is the sum of the (unobserved) contributions from each species. +The contribution of a species $i$ is proportional to its actual density $\text{density}_{i}(a)$ and its _absolute efficiency for the total-density measurement_ $\text{efficiency}_{i}^{\text{tot}}(a)$. +The contributions may also be multiplied by a common sample-specific error factor to account for non-linearity of extraction and/or random error, which we ignore for now. +The measured total density therefore equals \begin{align} - \widehat{\text{total density}}(a) - &= \sum_i \text{density}_i(a) \cdot \text{efficiency}^{\text{tot}}_i -\\&= \text{total density}(a) \cdot \text{mean efficiency}^{\text{tot}}(a) + \widehat{\text{density}}_S(a) + &= \sum_{i\in S} \text{density}_i(a) \cdot \text{efficiency}^{\text{tot}}_i +\\&= \text{density}_S(a) \cdot \text{efficiency}^{\text{tot}}_S(a) \end{align} where \begin{align} (\#eq:app-total-mean-efficiency) - \text{mean efficiency}^{\text{tot}}(a) - \equiv \frac{\sum_{j}\text{density}_j(a)\cdot \text{efficiency}^{\text{tot}}_j}{\text{total density}(a)} + \text{efficiency}^{\text{tot}}_S(a) + \equiv \frac{\sum_{i \in S}\text{density}_j(a)\cdot \text{efficiency}^{\text{tot}}_i}{\text{density}_S(a)} \end{align} is the mean efficiency in the sample with respect to the total-density measurement. + The error in the total density depends on the species composition through the mean efficiency of the sample; hence spurious changes in measured density can occur simply due to shifts from high- to low-efficiency species (or vice versa). How does this error affect the measured densities of individual species when the total is used for normalization? @@ -78,30 +127,214 @@ Substituting the mean efficiency of the total measurement for the error term in \begin{align} (\#eq:app-density-prop-error) \widehat{\text{density}}_{i}(a) - = \text{density}_{i}(a) \cdot \frac{\text{efficiency}_{i} \cdot \text{mean efficiency}^{\text{tot}}(a)}{\text{mean efficiency}(a)}. + = \text{density}_{i}(a) \cdot \frac{\text{efficiency}_{i} \cdot \text{efficiency}^{\text{tot}}_S(a)}{\text{efficiency}_S(a)}. +\end{align} +The mean efficiency of the total measurement appears in the numerator, while the mean efficiency of the MGS measurement is in the denominator. +To the extent that these two are positively associated across samples (specifically, their logarithms are positively linearly correlated), then the variation in one will tend to be offset by the variation in the other. +In this case the (geometric) variation in the ratio will be reduced, and the fold error in the species density measurement will tend to be more consistent than if the total density measurement was taxonomically unbiased (i.e. $\text{efficiency}^\text{tot}_S(a) = 1$ for all samples). +The error in species densities for individual samples may not be reduced; but the error in fold changes and regression analysis across samples will be. + +The extent to which the log mean efficiency of the MGS measurement and the total measurement are correlated depends on the species efficiencies as well as the changes in species composition across samples, making it difficult to make general statements about this effect. +We can get a sense for the effect by considering the correlation between log efficiencies across species between the two measurement types. +Suppose that distribution over species of log efficiencies of the MGS measurement has variance $\sigma^{2}$, the log efficiencies for the total density measurement has variance $\sigma_{\text{tot}}^{2}$, and the correlation between the two is $\rho$. +The variance in the difference $\log \text{efficiency}_{i}^{\text{tot}} - \log \text{efficiency}_{i}$ is thus $\sigma^{2} + \sigma_{\text{tot}}^{2} - \rho \sigma \sigma_{\text{tot}}$. +All else equal, the smaller this variance, the smaller we expect the variation in the error factor $\text{efficiency}^{\text{tot}}_S(a)/ \text{efficiency}_S(a)$. + +**Unclassified reads:** It is common that a given MGS protocol generates sequencing reads from particular taxa that the bioinformatics portion entirely discards, due to an inability to taxonomically assign the reads to the chosen taxonomic units. +For instance, 16S data that is analyzed with closed-reference OTU assignment and shotgun data analyzed by mapping to a database of reference genomes will be unable to assign sequences that are less than a chosen similarity cutoff (e.g. 97%) from the reference sequences. +These taxa may form an appreciable fraction of the community and thus lead to a large fraction of the total sequenced reads for a given sample may remain unassigned. + +So far, we have defined the efficiency of the MGS protocol in terms of taxonomically classified reads and so have treated such taxa as having an MGS efficiency of 0. +If such species are included in the total density measurement, then we have a mismatch between the two measurement types. +However, we may be able to remove this mismatch by altering the equation we use to form species density measurements. +In particular, we can compute the proportion of the focal species from the ratio of its assigned reads to the total sequenced reads, rather than to the total set of assigned reads. +This approach raises additional questions around how to treat reads from assignable species that are filtered during quality control steps. + +Suppose that the set of species that can be classified by our bioinformatics pipeline, $S$, is a proper subset of a larger set of species $S'$ that are present and yield sequencing reads in our sample. +Suppose that there is no additional taxonomic bias in our MGS measurement; that is, all species in $S$ have an efficiency of 1, and the species in $S' \setminus S$ have an efficiency of 0. +Moreover, suppose that all reads from the $S$ species are classified; that is, there is no loss of reads due to routine QC filtering. +Further suppose that we are capable of perfectly measuring the total density of all cells from the full set of species $S'$; that is, $\text{efficiency}^{\text{tot}}_{i} = 1$ for all species $i \in S'$. + +In this case, we can overcome the mismatch between MGS and total measurement when we use them to measure species densities, by using the total reads rather than the assigned reads to compute the species proportions. +That is, instead of +\begin{align} + \widehat{\text{prop}}_{i}(a) = \frac{\text{reads}_i(a)}{\text{reads}_S(a)}, +\end{align} +we take +\begin{align} + \widehat{\text{prop}}_{i}(a) + &= \frac{\text{reads}_i(a)}{\text{total reads}(a)} +\\&= \frac{\text{reads}_i(a)}{\text{reads}_{S'}(a)}. +\end{align} +By accounting for the contribution of unknown species when computing proportions, we are able to resolve the mismatch and obtain accurate species densities. + +## Using reference species for total-density normalization {#total-density-ref} + +Constant reference species are sometimes used to measure total density of $S$ by the ratio of $S$ reads to $R$ reads. +For example, a study of *Arabidopsis* microbiomes used the ratio of bacterial to host reads in shotgun sequencing as a proxy for total bacterial density, which they then used for total-community normalization of 16S amplicon sequencing measurements (@karasov2020ther, @regalado2019comb). +@chng2020meta similarly used the ratio of bacterial to host or diet reads in shotgun sequencing of mouse fecal samples as a proxy for total bacterial density (though they did not use this measurement for community normalization). +@smets2016amet similarly used the ratio of non-spike-in to spike-in reads to estimate total density. + +What is the impact of taxonomic bias on these total density estimates and the species densities derived from them? +The measured density of $S$ is +\begin{align} + \widehat{\text{density}}_{S}(a) + &= \frac{\text{reads}_S(a)}{\text{reads}_{R}(a)} +\\&= \frac{\text{density}_S(a)}{\text{density}_{R}} \cdot \frac{\text{efficiency}_S(a)}{\text{efficiency}_{R}} +\\&= \text{density}_S(a) \cdot \text{efficiency}_S(a) \cdot \text{constant}. +\end{align} +Hence variation in the mean efficiency among the species $S$ across samples creates a variable fold error in the density measurement, similar to direct measurements of total density. + +The impact of this variable error for species density measurement via Equation \@ref(eq:density-prop-meas) depends on whether the species proportions are derived from the same or a different MGS measurement. + +If proportions from the same MGS measurement are used, then the measured species densities are +\begin{align} + \widehat{\text{density}}_{i}(a) + &= \frac{\text{reads}_i(a)}{\text{reads}_{S}(a)} \cdot \widehat{\text{density}}_{S}(a) +\\&= \frac{\text{reads}_{i}(a)}{\text{reads}_{R}(a)}. +\end{align} +where the second line follows by substitution of the previous equation. +The variable error in the total density cancels with that in the proportion, yielding a constant fold error. +In fact, the measurement we obtain is exactly the same as that from reference normalization (Equation \@ref(eq:density-ratio-meas)). + +The situation differs when the total density and community composition are estimated using different sequencing and/or bioinformatic methods, as in the plant microbiome example above. +In this case, the mean efficiency of the total density measurement will not equal that in the proportions, and the more general case of total-density normalization discussed in Appendix \@ref(total-density-bias) applies. + +Although we only consider constant reference species, similar behavior occurs if for varying reference species assayed by targeted measurements. + +## Linearity of extraction + +Some popular absolute-abundance methods use post-extraction DNA (or RNA) density as a proxy for cell density in the original sample; in particular, using florescence-based total DNA quantification or using qPCR or ddPCR to quantify a marker-gene. +In addition, some have suggested normalization of species to spike-ins of extraneous DNA added after extraction. +Both of these approaches presuppose that DNA extraction is linear in the sense that the DNA concentration is proportional to the cell concentration in the original sample (possibly after correction by known dilution or concentration factors). +Deviations from linearity can occur due to random and systematic variation in DNA yields. +Here we consider the impact of these deviations on species density measurements. + +One source of systematic variation in yield is taxonomic bias. +We already explored the impact of taxonomic bias in extraction. + +NOTE: This might not be a correct way to talk about this; should perhaps make a more explicit model to verify this argument. + +To understand the impact of non-linearity after bias has been accounted for, let $C(a)$ be a sample-specific factor that accounts for DNA extraction non-linearity after controlling for bias. +From Equation \@ref(eq:density-prop-error), we have the more general equation +\begin{align} + (\#eq:app-density-prop-error) + \widehat{\text{density}}_{i}(a) + = \text{density}_{i}(a) \cdot \frac{\text{efficiency}_{i} \cdot \text{efficiency}^{\text{tot}}_S(a)}{\text{efficiency}_S(a)} \cdot C(a). \end{align} - - - - - -Note, having more similar efficiencies between MGS and total measurements does not necessarily improve the accuracy of the individual sample density estimates (possible misconception of other article?), since even if they perfectly cancel the species-specific error remains. -Yet it does improve fold change estimates. -Equation \@ref(eq:app-density-prop-error) shows that the variation in the fold error across samples is driven by the variation in the ratio of the mean efficiency of the total measurement and that of the sequencing measurement. -To the extent that these two are positively associated (specifically, their logarithms are positively linearly correlated), then the variation across samples will be lower than if a perfectly accurate total density were used. +$C(a)$ might fluctuate randomly among samples, or might vary systematically --- for example, if DNA yield saturates at high concentrations then $C(a)$ will be smaller in higher-concentration samples. +If DNA extraction is not linear, then measures of cell density might give more accurate fold changes even if they share less bias with the MGS measurement. + +Post-extraction spike-ins and targeted measurements face a similar problem. +These perfectly control for bias in fold change estimation _if_ DNA extraction is linear. +However, if not then the fold error in $\widehat{\text{density}_r}(a)$ for a species with a targeted measurement will vary with $C(a)$ and cause error in fold changes. +The same problem applies to DNA spike-ins added after DNA extraction (although different accounting is needed since $\text{density}_r(a)$ no longer has meaning). + +# Review of experimental methods for obtaining absolute densities {#review-absolute-methods} + +There are many experimental techniques to be able to add absolute-density information to MGS measurements. +Here we review the experimental techniques; the next section considers the implications for systematic error. + +_NOTE: Right now I don't consistently address why various targeted methods might be expected to produce constant fold errors. In revision, seek to connect each method with the relevant theory._ + +## Measurement of total cell density + +Total cell density in the original sample can be directly measured by cell counting, either via microscopy (@kevorkian2018esti, @lloyd2020evid) or flow cytometry (@props2017abso, @vandeputte2017quan). +Total cell density or biomass can also be measured via properties assumed to be proportional to cell density, such as fluorescence (as in fluorescence spectroscopy, @wang2021curr), components of microbial cell membranes (as in PLFA analysis, @smets2016amet), and the rate of microbial respiration (as in SIR method, @smets2016amet). +So far, it is primarily the cell counting methods that have been used for species-density measurement (rather than simply for the total density), by multiplying the estimated total density by the MGS proportions. + +## Measurement of total DNA density post extraction + +It is also common to use density of bulk DNA or a marker gene as a proxy for total community density. +Marker-gene density is typically measured with qPCR or ddPCR using 'universal primers' that target the marker gene of interest (typically the 16S gene for bacterial microbiome experiments) (@tettamantiboshier2020comp, @jian2020quan, @galazzo2020howt). +Bulk DNA density can be measured using fluorescence-based DNA quantification assays (@contijoch2019gutm; @korpela2018inte). +In either case, the DNA density is measured after DNA extraction and so is affected by taxonomic bias in the extraction process, such as variation in lysis efficiency among species. +Other sources of bias that affect the DNA density measurement include variation in marker-gene copy number (for marker-gene density) and variation in genome size (for bulk DNA density). + +The measured DNA density is typically used as a direct proxy for cell density in the original sample. +In particular, it is assumed that a doubling of cell density in the original sample leads to a doubling of DNA density in the extraction (possibly after adjustment for known dilution factors). +This _linearity assumption_ may be violated for several reasons. +First, because of taxonomic bias. +For example, samples dominated by easy-to-lyse species will give more DNA per cell than samples dominated by hard-to-lyse species. +Second, systematic non-linearity may occur in the DNA yield as a function of input, even if species composition is held fixed. +For example, DNA yield may saturate at high sample inputs. +Third, the DNA yield may vary apparently randomly due to subtle differences in sample chemistry or handling during the experiment. + +## Equivolumetric protocol + +A large part of the reason that there is not a direct correspondence between total density in the sample and total reads sequenced is that MGS experiments are typically intentionally designed to yield a similar number of sequencing reads from each sample, regardless of total density. +@cruz2021equi propose instead designing the MGS experiment so as to make total reads proportional total density. +The 'equivolumetric protocol' they develop represents a first attempt in this direction. +In their protocol, total reads is a saturating function of total density; this function can be measured with a calibration experiment, and the calibration curve used to predict the total density in the source sample. +This total density estimate is then used to scale the read counts to estimate species densities in a manner equivalent to the total-community density method. + +## Housekeeping species + +We use _housekeeping species_ (by analogy with housekeeping genes used for normalization in RNAseq experiments) to refer to species whose density is assumed to be constant, either in the MGS sample or in the source ecosystem it is derived from. + +Housekeeping species can sometimes be identified from prior scientific knowledge. +Several studies that have employed shotgun sequencing of host-associated microbiomes have use the plant or animal host for this purpose. +A study of *Arabidopsis* microbiomes used the ratio of bacterial to host reads in shotgun sequencing as a proxy for total bacterial density, which they then used for total-community normalization of 16S amplicon sequencing measurements (@karasov2020ther, @regalado2019comb). +@chng2020meta similarly used the ratio of bacterial to host reads in shotgun sequencing of mouse fecal samples as a proxy for total bacterial density (though they did not use this measurement for community normalization). +They also use reads from dietary plants for the same purpose. +@wallace2021thed used shotgun sequencing to study the virome of _Drosophila_, and normalized virus reads to _Drosophila_ reads to measure viral abundance per fly. +Organelle reads can also be used. +@diener2021nonr use mitochondria reads in 16S sequencing of mouse fecal pellets to assess total microbial load, though in a qualitative fashion (as mitochondrial reads were only non-zero at very low bacterial densities induced by antibiotics). + +In some cases, there may also be microbes or viruses thought to have stable densities. +A recent example is that the most abundant DNA virus in human feces, crAssphage, and the most abundant RNA virus in human feces, Pepper Mild Mottle Virus, have been treated as stable reference species in wastewater monitoring for SARS-CoV-2. +Although primarily used in the context of qPCR measurements, these viruses could also be used as references in RNA and DNA shotgun sequencing experiments. + +Housekeeping species may not be known _a priori_; to address this case, several methods have been put forward to computationally identify unchanging microbes species directly from MGS measurements. +These studies are often focused on mammalian gut bacterial communities. +It is perhaps unreasonable to expect bacterial species to be unchanging across hosts, but weaker assumptions can be made to develop normalization methods for the MGS measurements with a similar spirit to reference-based normalization. +These studies have instead developed normalization methods based on assumptions such as that most species do not change between any pair of samples (@david2014host) or that the mean (log) abundance between two sample conditions is unchanged for at least some species (@mandal2015anal, @kumar2018anal). + +When housekeeping species are sequenced along with the primary MGS measurement, they can be used to obtain species densities via reference-normalization (Equation \@ref(eq:density-ratio-meas). +In this case, the only relevant taxonomic bias is that of the primary MGS measurement; if it is constant then it will cancel in fold-change calculations. +Because the density of the housekeeping species is unknown, we can consider either that the density of focal species has a constant error, or is in units of the housekeeping species. +Non-constant error might arise if the species is treated as constant when it is in fact not. + +Housekeeping species have also been used to estimate total community density by $\text{reads}_{S} / \text{reads}_{R}$. +This estimate has been used to study variation in total density across samples, or for total-density normalization. + +## Spike-ins + +Spike-in methods differ by the biological spike-in material: +Cellular spike-ins can be added prior to DNA extraction, and DNA spike-ins can be added prior to or following DNA extraction. +In either case, a variety of methods can be used to actually leverage the spike-ins for absolute density analysis. + +**Cellular spike-ins:** +Cellular spike-ins are added to the sample prior to DNA extraction. +Some sample processing has typically occurred prior to spiking, for the purposes of storage (e.g. a freeze thaw cycle) and homogenization. +We should expect there to be taxonomic bias between the spike-in species and those naturally in the sample due to genetic differences and because of physiological differences induced by the sample processing prior to spiking and the experimental procedure used to grow and prepare the spike-in cells. +Our analysis acknowledges this bias, but assumes that it is consistent across samples. +The nominal density of the spike-in species added to each sample is subject to random and systematic fold error; but systematic fold error that is shared across samples will induce a constant fold error in species densities and so not impact DA analysis. +For instance, if source stock is actually 1.5X higher concentration than thought, the true spike-in concentration will be 1.5X greater than nominal in all samples and not pose a problem for accurate DA inference beyond leading to a greater than intended sequencing effort being expended on the spike-in. -Applications of these ideas: +Example studies include @stammler2016adju, @ji2019quan, and @rao2021mult. -- example: 16S sequencing may be better paired with 16S qPCR than flow cytometry -- example: shotgun sequencing may be best paired with bulk DNA measurement (depending on bioinformatics protocol); can even handle a large fraction of unassigned reads _if_ total reads are used in the denominator of the proportion calculation (rather than assigned reads) -- example: estimating total density from a spike-in; mathematically reduces to the reference-species measurement; here the error completely offsets. +**DNA spike-ins:** +Another possibility is to add DNA spike-ins, derived from natural or artificial sequence. +DNA spike-ins can be added the samples before DNA extraction (e.g. @smets2016amet, @tkacz2018abso, @zemb2020abso) or after DNA extraction (e.g. @hardwick2018synt, @tkacz2018abso). +Adding spike-ins prior to extraction is thought to be preferable as it makes it possible to detect and correct for variation in DNA extraction yield among samples +(@tkacz2018abso, @zemb2020abso, @harrison2021theq). +Below, we consider the distinction between pre- and post-extraction spike-ins in the light of taxonomic bias coupled with other sources of variation in extraction efficiency. -Extra assumptions in this section: -- Linearity of DNA extraction -- Thinking that log efficiencies between MGS and total are positively correlated comes from the idea that we can treat bias from different steps and/or sources as approximately independent +**How spike-ins are used:** +Like housekeeping species, spike-ins have been used in a variety of ways to analyze absolute abundances. +Let $R$ (for reference) be the spike-in species and $S$ be the native species. +@smets2016amet used the ratio of $S$ to $R$ reads as an estimate of total density, which they were interested in for its own sake, though one could imagine then also using this total density estimate in community normalization (Equation \@ref(eq:density-prop-meas)). +@zemb2020abso used the spike-ins to measure total density from the ratio of $S$ to $R$ qPCR abundance estimates and then used Equation \@ref(eq:density-prop-meas). +@stammler2016adju and others used the ratio-based method of Equation \@ref(eq:density-ratio-meas). -# Taxonomic bias in spike-ins and targeted measurements +## Targeted measurements -- Distinction between cellular vs DNA and pre- vs post-extraction spike-ins and targeted measurements -- Pull from Section 5 of the theory notebook +A variety of methods exist for targeted measurement of absolute density of a specific species (or higher-order taxon). +The most common approach is to use qPCR or ddPCR to measure the concentration of a marker gene in the extracted DNA, using primers scoped to the target taxon. +This approach is therefore subject to sources of taxonomic bias including extraction, marker-gene copy number, and primer-binding. +It is also subject to non-species-specific variation in extraction yields unless these are otherwise controlled for. +It is also possible to directly measure cell density using methods. +Some species can be measured by CFU counting after plating on selective media (REFs), and ddPCR has been used to direct measure cells (@dreo2014opti, @morella2018rapi) and viruses (@pavsic2016digi, @morella2018rapi) without first performing an extraction. +Species-specific florescent probes also make it possible to measure individual species via microscopy or flow cytometry (REFs). diff --git a/main.bib b/main.bib index 7a6cd7e..21d8914 100644 --- a/main.bib +++ b/main.bib @@ -1,4 +1,4 @@ -@article{brooks2015thet, +@raarticle{brooks2015thet, author = {Brooks, J Paul and Edwards, David J and Harwich, Michael D and Rivera, Maria C and Fettweis, Jennifer M and Serrano, Myrna G and Reris, Robert A and Sheth, Nihar U and Huang, Bernice and Girerd, Philippe and Strauss, Jerome F and Jefferson, Kimberly K and Buck, Gregory A}, doi = {10.1186/s12866-015-0351-6}, journal = {BMC Microbiol.}, @@ -45,6 +45,58 @@ @article{contijoch2019gutm volume = {8}, year = {2019} } +@article{cruz2021equi, +author = {Cruz, Giuliano Netto Flores and Christoff, Ana Paula and de Oliveira, Luiz Felipe Valter}, +doi = {10.3389/fmicb.2021.638231}, +journal = {Front. Microbiol.}, +keywords = {Absolute abundances,Bacteria,Compositional,Hospital,Microbiome,NGS,Next-generation sequencing,Total microbial load}, +month = {feb}, +number = {February}, +pages = {1--16}, +title = {{Equivolumetric Protocol Generates Library Sizes Proportional to Total Microbial Load in 16S Amplicon Sequencing}}, +url = {https://www.frontiersin.org/articles/10.3389/fmicb.2021.638231/full}, +volume = {12}, +year = {2021} +} +@article{david2014host, +abstract = {Disturbance to human microbiota may underlie several pathologies. Yet, we lack a comprehensive understanding of how lifestyle affects the dynamics of human-associated microbial communities. Here, we link over 10,000 longitudinal measurements of human wellness and action to the daily gut and salivary microbiota dynamics of two individuals over the course of one year. These time series show overall microbial communities to be stable for months. However, rare events in each subjects' life rapidly and broadly impacted microbiota dynamics. Travel from the developed to the developing world in one subject led to a nearly two-fold increase in the Bacteroidetes to Firmicutes ratio, which reversed upon return. Enteric infection in the other subject resulted in the permanent decline of most gut bacterial taxa, which were replaced by genetically similar species. Still, even during periods of overall community stability, the dynamics of select microbial taxa could be associated with specific host behaviors. Most prominently, changes in host fiber intake positively correlated with next-day abundance changes among 15{\%} of gut microbiota members. Our findings suggest that although human-associated microbial communities are generally stable, they can be quickly and profoundly altered by common human actions and experiences.}, +author = {David, Lawrence A and Materna, Arne C and Friedman, Jonathan and Campos-Baptista, Maria I and Blackburn, Matthew C and Perrotta, Allison and Erdman, Susan E and Alm, Eric J}, +doi = {10.1186/gb-2014-15-7-r89}, +journal = {Genome Biol.}, +month = {jul}, +number = {7}, +pages = {R89}, +publisher = {BioMed Central}, +title = {{Host lifestyle affects human microbiota on daily timescales}}, +url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2014-15-7-r89}, +volume = {15}, +year = {2014} +} +@article{diener2021nonr, +author = {Diener, Christian and Hoge, Anna C.H. and Kearney, Sean M. and Kusebauch, Ulrike and Patwardhan, Sushmita and Moritz, Robert L. and Erdman, Susan E. and Gibbons, Sean M.}, +doi = {10.1038/s42003-021-01841-8}, +journal = {Commun. Biol.}, +number = {1}, +pmid = {33750910}, +publisher = {Springer US}, +title = {{Non-responder phenotype reveals apparent microbiome-wide antibiotic tolerance in the murine gut}}, +url = {http://dx.doi.org/10.1038/s42003-021-01841-8}, +volume = {4}, +year = {2021} +} +@article{dreo2014opti, +author = {Dreo, Tanja and Pirc, Manca and Ram{\v{s}}ak, {\v{Z}}iva and Pav{\v{s}}i{\v{c}}, Jernej and Milavec, Mojca and {\v{Z}}el, Jana and Gruden, Kristina}, +doi = {10.1007/s00216-014-8084-1}, +journal = {Anal. Bioanal. Chem.}, +month = {oct}, +number = {26}, +pages = {6513--6528}, +pmid = {25173868}, +title = {{Optimising droplet digital PCR analysis approaches for detection and quantification of bacteria: a case study of fire blight and potato brown rot}}, +url = {http://link.springer.com/10.1007/s00216-014-8084-1}, +volume = {406}, +year = {2014} +} @article{fettweis2019thev, author = {Fettweis, Jennifer M. and Serrano, Myrna G. and Brooks, Jamie Paul and Edwards, David J. and Girerd, Philippe H. and Parikh, Hardik I. and Huang, Bernice and Arodz, Tom J. and Edupuganti, Laahirie and Glascock, Abigail L. and Xu, Jie and Jimenez, Nicole R. and Vivadelli, Stephany C. and Fong, Stephen S. and Sheth, Nihar U. and Jean, Sophonie and Lee, Vladimir and Bokhari, Yahya A. and Lara, Ana M. and Mistry, Shreni D. and Duckworth, Robert A. and Bradley, Steven P. and Koparde, Vishal N. and Orenda, X. Valentine and Milton, Sarah H. and Rozycki, Sarah K. and Matveyev, Andrey V. and Wright, Michelle L. and Huzurbazar, Snehalata V. and Jackson, Eugenie M. and Smirnova, Ekaterina and Korlach, Jonas and Tsai, Yu-Chih and Dickinson, Molly R. and Brooks, Jamie L. and Drake, Jennifer I. and Chaffin, Donald O. and Sexton, Amber L. and Gravett, Michael G. and Rubens, Craig E. and Wijesooriya, N. Romesh and Hendricks-Mu{\~{n}}oz, Karen D. and Jefferson, Kimberly K. and Strauss, Jerome F. and Buck, Gregory A.}, doi = {10.1038/s41591-019-0450-2}, @@ -108,6 +160,57 @@ @article{gloor2017micr volume = {8}, year = {2017} } +@article{hardwick2018synt, +author = {Hardwick, Simon A. and Chen, Wendy Y. and Wong, Ted and Kanakamedala, Bindu S. and Deveson, Ira W. and Ongley, Sarah E. and Santini, Nadia S. and Marcellin, Esteban and Smith, Martin A. and Nielsen, Lars K. and Lovelock, Catherine E. and Neilan, Brett A. and Mercer, Tim R.}, +doi = {10.1038/s41467-018-05555-0}, +journal = {Nat. Commun.}, +month = {dec}, +number = {1}, +pages = {3096}, +publisher = {Nature Publishing Group}, +title = {{Synthetic microbe communities provide internal reference standards for metagenome sequencing and analysis}}, +url = {http://www.nature.com/articles/s41467-018-05555-0}, +volume = {9}, +year = {2018} +} +@article{harrison2021theq, +author = {Harrison, Joshua G. and {John Calder}, W. and Shuman, Bryan and {Alex Buerkle}, C.}, +doi = {10.1111/1755-0998.13247}, +journal = {Mol. Ecol. Resour.}, +month = {jan}, +number = {1}, +pages = {30--43}, +title = {{The quest for absolute abundance: The use of internal standards for DNA‐based community ecology}}, +url = {https://onlinelibrary.wiley.com/doi/10.1111/1755-0998.13247}, +volume = {21}, +year = {2021} +} +@article{ji2019quan, +author = {Ji, Brian W. and Sheth, Ravi U. and Dixit, Purushottam D. and Huang, Yiming and Kaufman, Andrew and Wang, Harris H. and Vitkup, Dennis}, +doi = {10.1038/s41592-019-0467-y}, +journal = {Nat. Methods}, +number = {8}, +pages = {731--736}, +pmid = {31308552}, +publisher = {Springer US}, +title = {{Quantifying spatiotemporal variability and noise in absolute microbiota abundances using replicate sampling}}, +url = {http://dx.doi.org/10.1038/s41592-019-0467-y}, +volume = {16}, +year = {2019} +} +@article{jian2020quan, +author = {Jian, Ching and Luukkonen, Panu and Yki-J{\"{a}}rvinen, Hannele and Salonen, Anne and Korpela, Katri}, +doi = {10.1371/journal.pone.0227285}, +editor = {Vaz-Moreira, Ivone}, +journal = {PLoS One}, +month = {jan}, +number = {1}, +pages = {e0227285}, +title = {{Quantitative PCR provides a simple and accessible method for quantitative microbiota profiling}}, +url = {https://dx.plos.org/10.1371/journal.pone.0227285}, +volume = {15}, +year = {2020} +} @article{jian2021comm, author = {Jian, Ching and Salonen, Anne and Korpela, Katri}, doi = {10.3389/fcimb.2021.627910}, @@ -142,6 +245,30 @@ @article{kevorkian2018esti volume = {84}, year = {2018} } +@article{korpela2018inte, +author = {Korpela, Katri and Blakstad, Elin W. and Moltu, Sissel J. and Str{\o}mmen, Kenneth and Nakstad, Britt and R{\o}nnestad, Arild E. and Br{\ae}kke, Kristin and Iversen, Per O. and Drevon, Christian A. and de Vos, Willem}, +doi = {10.1038/s41598-018-20827-x}, +journal = {Sci. Rep.}, +number = {1}, +pages = {1--9}, +pmid = {29410448}, +title = {{Intestinal microbiota development and gestational age in preterm neonates}}, +volume = {8}, +year = {2018} +} +@article{kumar2018anal, +author = {Kumar, M. Senthil and Slud, Eric V. and Okrah, Kwame and Hicks, Stephanie C. and Hannenhalli, Sridhar and {Corrada Bravo}, H{\'{e}}ctor}, +doi = {10.1186/s12864-018-5160-5}, +journal = {BMC Genomics}, +month = {dec}, +number = {1}, +pages = {799}, +publisher = {BioMed Central}, +title = {{Analysis and correction of compositional bias in sparse sequencing count data}}, +url = {https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-018-5160-5}, +volume = {19}, +year = {2018} +} @article{leopold2020host, author = {Leopold, Devin R and Busby, Posy E}, doi = {10.1016/j.cub.2020.06.011}, @@ -177,6 +304,18 @@ @article{lozupone2013meta url = {http://www.genome.org/cgi/doi/10.1101/gr.151803.112}, year = {2013} } +@article{mandal2015anal, +author = {Mandal, Siddhartha and {Van Treuren}, Will and White, Richard A. and Eggesb{\o}, Merete and Knight, Rob and Peddada, Shyamal D.}, +doi = {10.3402/mehd.v26.27663}, +journal = {Microb. Ecol. Heal. Dis.}, +month = {may}, +number = {1}, +pages = {27663}, +title = {{Analysis of composition of microbiomes: a novel method for studying microbial composition}}, +url = {http://www.microbecolhealthdis.net/index.php/mehd/article/view/27663}, +volume = {26}, +year = {2015} +} @article{martin2020mode, abstract = {Using a sample from a population to estimate the proportion of the population with a certain category label is a broadly important problem. In the context of microbiome studies, this problem arises when researchers wish to use a sample from a population of microbes to estimate the population proportion of a particular taxon, known as the taxon's relative abundance.Inthis paper, we propose a beta-binomial model for this task. Like existing models, our model allows for a taxon's relative abundance to be associated with co-variates of interest. However, unlike existing models, our proposal also allows for the overdispersion in the taxon's counts to be associated with covariates of interest. We exploit this model in order to propose tests not only for differential relative abundance, but also for differential variability. The latter is particularly valuable in light of speculation that dysbiosis, the perturbation from a normal microbiome that can occur in certain disease conditions, may manifest as a loss of stability, or increase in variability, of the counts associated with each taxon. We demonstrate the performance of our proposed model using a simulation study and an application to soil microbial data.}, author = {Martin, Bryan D. and Witten, Daniela and Willis, Amy D.}, @@ -219,6 +358,32 @@ @misc{mclaren2021esti title = {{Estimating bias from mock communities using the fido R package}}, url = {https://microbiomemeasurement.org/posts/bias-estimation-from-mocks-with-fido/} } +@article{morella2018rapi, +author = {Morella, Norma M. and Yang, Shangyang Christopher and Hernandez, Catherine A. and Koskella, Britt}, +doi = {10.1016/j.jviromet.2018.05.007}, +journal = {J. Virol. Methods}, +number = {May}, +pages = {18--24}, +pmid = {29859196}, +publisher = {Elsevier}, +title = {{Rapid quantification of bacteriophages and their bacterial hosts in vitro and in vivo using droplet digital PCR}}, +url = {https://doi.org/10.1016/j.jviromet.2018.05.007}, +volume = {259}, +year = {2018} +} +@article{pavsic2016digi, +author = {Pav{\v{s}}i{\v{c}}, Jernej and {\v{Z}}el, Jana and Milavec, Mojca}, +doi = {10.1007/s00216-015-9109-0}, +journal = {Anal. Bioanal. Chem.}, +month = {jan}, +number = {1}, +pages = {67--75}, +pmid = {26483186}, +title = {{Digital PCR for direct quantification of viruses without DNA extraction}}, +url = {http://link.springer.com/10.1007/s00216-015-9109-0}, +volume = {408}, +year = {2016} +} @article{props2017abso, author = {Props, Ruben and Kerckhof, Frederiek-Maarten and Rubbens, Peter and {De Vrieze}, Jo and {Hernandez Sanabria}, Emma and Waegeman, Willem and Monsieurs, Pieter and Hammes, Frederik and Boon, Nico}, doi = {10.1038/ismej.2016.117}, @@ -282,6 +447,17 @@ @article{rao2021mult volume = {591}, year = {2021} } +@article{regalado2019comb, +author = {Regalado, Julian and Lundberg, Derek S. and Deusch, Oliver and Kersten, Sonja and Karasov, Talia and Poersch, Karin and Shirsekar, Gautam and Weigel, Detlef}, +doi = {10.1038/s41396-020-0665-8}, +journal = {ISME J.}, +month = {may}, +pages = {823492}, +publisher = {Springer US}, +title = {{Combining whole-genome shotgun sequencing and rRNA gene amplicon analyses to improve detection of microbe–microbe interaction networks in plant leaves}}, +url = {http://www.nature.com/articles/s41396-020-0665-8}, +year = {2020} +} @article{riverapinto2018bala, author = {Rivera-Pinto, J. and Egozcue, J. J. and Pawlowsky-Glahn, V. and Paredes, R. and Noguera-Julian, M. and Calle, M. L.}, doi = {10.1128/mSystems.00053-18}, @@ -320,6 +496,30 @@ @article{silverman2017aphy volume = {6}, year = {2017} } +@article{smets2016amet, +author = {Smets, Wenke and Leff, Jonathan W. and Bradford, Mark A. and McCulley, Rebecca L. and Lebeer, Sarah and Fierer, Noah}, +doi = {10.1016/j.soilbio.2016.02.003}, +journal = {Soil Biol. Biochem.}, +pages = {145--151}, +publisher = {Elsevier Ltd}, +title = {{A method for simultaneous measurement of soil bacterial abundances and community composition via 16S rRNA gene sequencing}}, +url = {http://dx.doi.org/10.1016/j.soilbio.2016.02.003}, +volume = {96}, +year = {2016} +} +@article{stammler2016adju, +author = {St{\"{a}}mmler, Frank and Gl{\"{a}}sner, Joachim and Hiergeist, Andreas and Holler, Ernst and Weber, Daniela and Oefner, Peter J. and Gessner, Andr{\'{e}} and Spang, Rainer}, +doi = {10.1186/s40168-016-0175-0}, +journal = {Microbiome}, +month = {dec}, +number = {1}, +pages = {28}, +publisher = {BioMed Central}, +title = {{Adjusting microbiome profiles for differences in microbial load by spike-in bacteria}}, +url = {http://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-016-0175-0}, +volume = {4}, +year = {2016} +} @article{tettamantiboshier2020comp, author = {{Tettamanti Boshier}, Florencia A. and Srinivasan, Sujatha and Lopez, Anthony and Hoffman, Noah G. and Proll, Sean and Fredricks, David N. and Schiffer, Joshua T.}, doi = {10.1128/mSystems.00777-19}, @@ -332,6 +532,19 @@ @article{tettamantiboshier2020comp volume = {5}, year = {2020} } +@article{tkacz2018abso, +author = {Tkacz, Andrzej and Hortala, Marion and Poole, Philip S.}, +doi = {10.1186/s40168-018-0491-7}, +journal = {Microbiome}, +month = {dec}, +number = {1}, +pages = {110}, +publisher = {BioMed Central}, +title = {{Absolute quantitation of microbiota abundance in environmental samples}}, +url = {https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-018-0491-7}, +volume = {6}, +year = {2018} +} @book{vandenboogaart2013anal, address = {Berlin, Heidelberg}, author = {van den Boogaart, K. Gerald and Tolosana-Delgado, Raimon}, @@ -354,6 +567,19 @@ @article{vandeputte2017quan volume = {551}, year = {2017} } +@article{vaninsberghe2020diar, +author = {VanInsberghe, David and Elsherbini, Joseph A. and Varian, Bernard and Poutahidis, Theofilos and Erdman, Susan and Polz, Martin F.}, +doi = {10.1038/s41564-020-0668-2}, +journal = {Nat. Microbiol.}, +number = {4}, +pages = {642--650}, +pmid = {32042128}, +publisher = {Springer US}, +title = {{Diarrhoeal events can trigger long-term Clostridium difficile colonization with recurrent blooms}}, +url = {http://dx.doi.org/10.1038/s41564-020-0668-2}, +volume = {5}, +year = {2020} +} @article{vieirasilva2019quan, author = {Vieira-Silva, Sara and Sabino, Jo{\~{a}}o and Valles-Colomer, Mireia and Falony, Gwen and Kathagen, Gunter and Caenepeel, Clara and Cleynen, Isabelle and van der Merwe, Schalk and Vermeire, S{\'{e}}verine and Raes, Jeroen}, doi = {10.1038/s41564-019-0483-9}, @@ -367,6 +593,30 @@ @article{vieirasilva2019quan volume = {4}, year = {2019} } +@article{wallace2021thed, +author = {Wallace, Megan A. and Coffman, Kelsey A. and Gilbert, Cl{\'{e}}ment and Ravindran, Sanjana and Albery, Gregory F. and Abbott, Jessica and Argyridou, Eliza and Bellosta, Paola and Betancourt, Andrea J. and Colinet, Herv{\'{e}} and Eric, Katarina and Glaser-Schmitt, Amanda and Grath, Sonja and Jelic, Mihailo and Kankare, Maaria and Kozeretska, Iryna and Loeschcke, Volker and Montchamp-Moreau, Catherine and Ometto, Lino and Onder, Banu Sebnem and Orengo, Dorcas J. and Parsch, John and Pascual, Marta and Patenkovic, Aleksandra and Puerma, Eva and Ritchie, Michael G. and Rota-Stabelli, Omar and Schou, Mads Fristrup and Serga, Svitlana V. and Stamenkovic-Radak, Marina and Tanaskovic, Marija and Veselinovic, Marija Savic and Vieira, Jorge and Vieira, Cristina P. and Kapun, Martin and Flatt, Thomas and Gonz{\'{a}}lez, Josefa and Staubach, Fabian and Obbard, Darren J.}, +doi = {10.1093/ve/veab031}, +journal = {Virus Evol.}, +month = {jan}, +number = {1}, +pages = {1--23}, +title = {{The discovery, distribution, and diversity of DNA viruses associated with Drosophila melanogaster in Europe}}, +url = {https://academic.oup.com/ve/article/doi/10.1093/ve/veab031/6207981}, +volume = {7}, +year = {2021} +} +@article{wang2021curr, +author = {Wang, Xiaofan and Howe, Samantha and Deng, Feilong and Zhao, Jiangchao}, +doi = {10.3390/microorganisms9091797}, +journal = {Microorganisms}, +month = {aug}, +number = {9}, +pages = {1797}, +title = {{Current Applications of Absolute Bacterial Quantification in Microbiome Studies and Decision-Making Regarding Different Biological Questions}}, +url = {https://www.mdpi.com/2076-2607/9/9/1797}, +volume = {9}, +year = {2021} +} @article{washburne2017phyl, author = {Washburne, Alex D. and Silverman, Justin D. and Leff, Jonathan W. and Bennett, Dominic J. and Darcy, John L. and Mukherjee, Sayan and Fierer, Noah and David, Lawrence A.}, doi = {10.7717/peerj.2969}, @@ -424,6 +674,19 @@ @article{yeh2018taxo volume = {3}, year = {2018} } +@article{zemb2020abso, +author = {Zemb, Olivier and Achard, Caroline S and Hamelin, Jerome and {De Almeida}, Marie‐L{\'{e}}a and Gabinaud, B{\'{e}}atrice and Cauquil, Laurent and Verschuren, Lisanne M.G. and Godon, Jean-jacques}, +doi = {10.1002/mbo3.977}, +journal = {Microbiologyopen}, +keywords = {absolute abundance,blagnac cedex,france,inra,lallemand sas,lbe,spike-in,university of montpellier}, +month = {mar}, +number = {3}, +pages = {1--21}, +title = {{Absolute quantitation of microbes using 16S rRNA gene metabarcoding: A rapid normalization of relative abundances by quantitative PCR targeting a 16S rRNA gene spike‐in standard}}, +url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/mbo3.977}, +volume = {9}, +year = {2020} +} @incollection{zhao2021alog, author = {Zhao, Ni and Satten, Glen A}, booktitle = {Stat. Anal. Microbiome Data},