diff --git a/phylonco-lphy/examples/gt16ReadCountModel.lphy b/phylonco-lphy/examples/gt16ReadCountModel.lphy new file mode 100644 index 0000000..e07e9c8 --- /dev/null +++ b/phylonco-lphy/examples/gt16ReadCountModel.lphy @@ -0,0 +1,28 @@ +n = 4; +l = 5; + +Θ ~ LogNormal(meanlog=-2.0, sdlog=1.0); +ψ ~ Coalescent(n=n, theta=Θ); +π ~ Dirichlet(conc=[3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0]); +rates ~ Dirichlet(conc=[1.0, 2.0, 1.0, 1.0, 2.0, 1.0]); +Q = gt16(freq=π, rates=rates); // construct the GT16 instantaneous rate matrix +A ~ PhyloCTMC(L=l, Q=Q, tree=ψ, dataType=phasedGenotype()); +epsilon ~ Beta(alpha=2, beta=18); +delta ~ Beta(alpha=1.5, beta=4.5); +//epsilon = 0.01; +//delta = 0.24; +// cov = [rep(1, 5), rep(2, 5), rep(3, 5), rep(4, 5)]; +meanT = 2.3; // average coverage = lognormal(meanT, sdT) +sdT = 0.1; +meanV = 0.1; // variance coverage = lognormal(meanV, sdV) +sdV = 0.05; +meanS = 0.04; // cell-specific scaling +sdS = 0.001; +t ~ LogNormal(meanlog= meanT, sdlog= sdT); +v ~ LogNormal(meanlog= meanV, sdlog= sdV); +s ~ LogNormal(meanlog= meanS, sdlog= sdS,repe); +alpha ~ AlphaSimulator(l= l, n=n, delta= delta); +coverage ~ CoverageSimulator(alpha= alpha, t= t, v= v, s= s); + + +r ~ ReadCountModel(D=A, epsilon=epsilon, alpha=alpha, coverage=coverage); \ No newline at end of file diff --git a/phylonco-lphy/src/main/java/module-info.java b/phylonco-lphy/src/main/java/module-info.java index ef3ae14..432fad0 100644 --- a/phylonco-lphy/src/main/java/module-info.java +++ b/phylonco-lphy/src/main/java/module-info.java @@ -8,6 +8,7 @@ exports phylonco.lphy.evolution.alignment; exports phylonco.lphy.evolution.datatype; exports phylonco.lphy.evolution.substitutionmodel; + exports phylonco.lphy.evolution.readcountmodel; // declare what service interface the provider intends to use diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/datatype/PhasedGenotype.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/datatype/PhasedGenotype.java index 0656ba0..e088663 100644 --- a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/datatype/PhasedGenotype.java +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/datatype/PhasedGenotype.java @@ -149,6 +149,15 @@ public String toString() { return "GT16"; // trimmed in studio if too long } + /** + * + * @param index index of state + * @return canonical state at index + */ + public static PhasedGenotypeState getCanonicalState(int index) { + return CANONICAL_STATES[index]; + } + /** * * @param stateIndex the phased genotype state index diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/AlphaSimulator.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/AlphaSimulator.java new file mode 100644 index 0000000..f7c1732 --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/AlphaSimulator.java @@ -0,0 +1,75 @@ +package phylonco.lphy.evolution.readcountmodel; + +import lphy.core.model.GenerativeDistribution; +import lphy.core.model.RandomVariable; +import lphy.core.model.Value; +import lphy.core.model.annotation.ParameterInfo; + +import java.util.Map; + +public class AlphaSimulator implements GenerativeDistribution { + private Value l; + private Value n; + private Value delta; + + private RandomVariable alpha; + + + + public static final String lParamName = "l"; + public static final String nParamName = "n"; + public static final String deltaParamName = "delta"; + + + + + private AlphaSimulator( + @ParameterInfo(name = lParamName, description = "the length of sequencing") Value l, + @ParameterInfo(name = nParamName, description = "the number of cells.") Value n, + @ParameterInfo(name = deltaParamName, description = "allelic dropout error probability.") Value delta + + ){ + super(); + this.l = l; + this.n = n; + this.delta = delta; + + + + } + + @Override + public RandomVariable sample() { + Multinomial multinomialAlpha; + Double[] proAlpha = {this.delta.value(), 1-this.delta.value()}; + Value proA = new Value<>("proA", proAlpha); + Value numberA = new Value<>("numberA", 1); + multinomialAlpha = new Multinomial(numberA, proA); + + Integer[][] alp = new Integer[n.value()][l.value()]; + for (int i = 0; i < n.value(); i++) { + for (int j = 0; j < l.value(); j++) { + Value alpha1 = multinomialAlpha.sample(); + if (alpha1.value()[0] == 1) { + alp[i][j] = 1; + } else alp[i][j] = 2; + } + } + + alpha = new RandomVariable<>("alpha", alp, this); + return alpha; + } + + + @Override + public Map getParams() { + return Map.of( + lParamName,l, + nParamName,n, + deltaParamName,delta + ); + } + + + +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/CoverageSimulator.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/CoverageSimulator.java new file mode 100644 index 0000000..3dcf33d --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/CoverageSimulator.java @@ -0,0 +1,80 @@ +package phylonco.lphy.evolution.readcountmodel; + +import lphy.base.distribution.NegativeBinomial; +import lphy.base.evolution.alignment.Alignment; +import lphy.core.model.GenerativeDistribution; +import lphy.core.model.RandomVariable; +import lphy.core.model.Value; +import lphy.core.model.annotation.ParameterInfo; + +import java.util.Map; + +public class CoverageSimulator implements GenerativeDistribution { + private NegativeBinomial negativeBinomial; + private Value alpha; + private Value t; + private Value v; + private Value s; + private RandomVariable coverage; + + + + + public static final String alphaParamName = "alpha"; + public static final String tParamName = "t"; + public static final String vParamName = "v"; + public static final String sParamName = "s"; + + + public CoverageSimulator( + @ParameterInfo(name = alphaParamName, description = "alpha, allelic dropout events for each cell at each site.") Value alpha, + @ParameterInfo(name = tParamName, description = "t") Value t, + @ParameterInfo(name = vParamName, description = "v") Value v, + @ParameterInfo(name = sParamName, description = "s") Value s + + ) { + super(); + this.alpha = alpha; + this.t = t; + this.v = v; + this.s = s; + + + } + + @Override + public RandomVariable sample(){ + int n = alpha.value().length; + int l = alpha.value()[0].length; + Value r; + Value p; + Integer[][] cov = new Integer[n][l]; + + for (int i = 0; i < n; i++) { + for (int j = 0; j < l; j++) { + Double mean = (double) (this.alpha.value()[i][j] * this.t.value() * this.s.value()[i]); + Double variance = mean + (Math.pow((double) alpha.value()[i][j], 2.0) * v.value() * Math.pow(this.s.value()[i], 2.0)); + double pValue = mean / variance; + float rFloat = (float) (Math.pow(mean, 2) / (variance - mean)); + int rValue = Math.round(rFloat); + r = new Value<>("r", rValue); + p = new Value<>("p", pValue); + NegativeBinomial negativeBinomial = new NegativeBinomial(r, p); + cov[i][j] = negativeBinomial.sample().value(); + } + } + coverage = new RandomVariable<>("coverage", cov, this); + return coverage; + } + + @Override + public Map getParams() { + return Map.of( + alphaParamName, alpha, + tParamName, t, + vParamName, v, + sParamName, s + ); + } + +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/Multinomial.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/Multinomial.java new file mode 100644 index 0000000..eb50454 --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/Multinomial.java @@ -0,0 +1,97 @@ +package phylonco.lphy.evolution.readcountmodel; + +import lphy.base.distribution.Binomial; +import lphy.base.distribution.DistributionConstants; +import lphy.core.model.GenerativeDistribution; +import lphy.core.model.RandomVariable; +import lphy.core.model.Value; +import lphy.core.model.annotation.ParameterInfo; + +import java.util.Arrays; +import java.util.Map; +import java.util.TreeMap; + +public class Multinomial implements GenerativeDistribution { + + private Value n; + private Value p; + private Value q; + + public Multinomial( + @ParameterInfo(name = DistributionConstants.nParamName, description = "number of trials.") Value n, + @ParameterInfo(name = DistributionConstants.pParamName, description = "event probabilities.") Value p) { + super(); + this.n = n; + this.p = p; + + + } + + public Multinomial(){} + + + @Override + public RandomVariable sample() { + //org.apache.mahout.math.random.Multinomial multinomial = new org.apache.mahout.math.random.Multinomial(); + Double[] q1 = new Double[this.p.value().length]; + double cum_prob = 1.0; + for (int i = 0; i < this.p.value().length; i++) { + if (p.value()[i] == 0.0) + q1[i] = 0.0; + else { + q1[i] = this.p.value()[i] / cum_prob; + cum_prob = cum_prob - this.p.value()[i]; + } + } + q = new Value<>("q", q1); + int sampleSize = n.value(); + Integer[] result = new Integer[p.value().length]; + for (int i = 0; i < p.value().length-1; i++) { + Value pro = new Value("pro", this.q.value()[i]); + Value sampleS = new Value<>("sample", sampleSize); + Binomial binomial = new Binomial(pro, sampleS); + int rand = binomial.sample().value(); + result[i] = rand; + sampleSize -= rand; + + if (sampleSize == 0) { + Arrays.fill(result, i + 1, result.length, 0); + break; + } + } + + result[p.value().length-1] = sampleSize; + return new RandomVariable<>(null, result, this); + } + + + @Override + public Map getParams() { + return new TreeMap<>() {{ + put(DistributionConstants.nParamName, n); + put(DistributionConstants.pParamName, p); + }}; + } + + @Override + public void setParam(String paramName, Value value) { + switch (paramName) { + case DistributionConstants.nParamName: + n = value; + break; + case DistributionConstants.pParamName: + p = value; + break; + default: + throw new RuntimeException("Unrecognised parameter name: " + paramName); + } + } + + + + + + public String toString() { + return getName(); + } + } diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCount.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCount.java new file mode 100644 index 0000000..a847af2 --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCount.java @@ -0,0 +1,34 @@ +package phylonco.lphy.evolution.readcountmodel; + +public class ReadCount { + + public final static int NUM_NUCLEOTIDES = 4; + + private final String nucleotides = "ACGT"; + + private int[] readCounts; + + public ReadCount(int[] readCounts) { + this.readCounts = readCounts; + } + + public int[] getReadCounts() { + return readCounts; + } + + public int getCount(String nucleotide) { + String capitalizedNucleotide = nucleotide.toUpperCase(); + int index = nucleotides.indexOf(capitalizedNucleotide); + if (index != -1) { + return readCounts[index]; + } else { + return 0; + } + } + + public int getNumStates() { + return NUM_NUCLEOTIDES; + } + + +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountData.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountData.java new file mode 100644 index 0000000..d2d28e5 --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountData.java @@ -0,0 +1,67 @@ +package phylonco.lphy.evolution.readcountmodel; + +import lphy.base.evolution.Taxa; +import lphy.base.evolution.alignment.TaxaCharacterMatrix; +import lphy.core.model.datatype.Array2DUtils; + +public class ReadCountData implements TaxaCharacterMatrix { + + ReadCount[][] readCountDataMatrix; // taxa, position + Taxa taxa; + + public ReadCountData(Taxa taxa, ReadCount[][] readCountDataMatrix) { + this.taxa = taxa; + this.readCountDataMatrix = readCountDataMatrix; + } + + @Override + public ReadCount getState(String taxon, int column) { + return readCountDataMatrix[taxa.indexOfTaxon(taxon)][column]; + } + + public ReadCount getState(int taxonIndex, int column) { + return readCountDataMatrix[taxonIndex][column]; + } + + @Override + public Class getComponentType() { + return ReadCount.class; + } + + @Override + public String toJSON() { + return ""; + } + + @Override + public Taxa getTaxa() { + return taxa; + } + + @Override + public Integer nchar() { + return readCountDataMatrix[0].length; + } + + @Override + public int getDimension() { + return nchar() * taxa.getDimension() * ReadCount.NUM_NUCLEOTIDES; + } + + @Override + public String toString() { + String result = ""; + int n = getTaxa().getDimension();; + int l = nchar(); + for (int i = 0; i < n; i++) { + for (int j = 0; j < l; j++) { + int countA = readCountDataMatrix[i][j].getCount("A"); + int countC = readCountDataMatrix[i][j].getCount("C"); + int countG = readCountDataMatrix[i][j].getCount("G"); + int countT = readCountDataMatrix[i][j].getCount("T"); + result += String.format("A: %d, C: %d, G: %d, T: %d; \t", countA, countC, countG, countT); + } + } + return result; + } +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountDataType.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountDataType.java new file mode 100644 index 0000000..39f8d9e --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountDataType.java @@ -0,0 +1,27 @@ +package phylonco.lphy.evolution.readcountmodel; + +import lphy.core.model.Value; + +public class ReadCountDataType extends Value { + + public ReadCountDataType(String id, ReadCountData value) { + super(id, value); + } + + @Override + public String toString() { + String result = ""; + int n = value.getTaxa().getDimension();; + int l = value.nchar(); + for (int i = 0; i < n; i++) { + for (int j = 0; j < l; j++) { + int countA = value.readCountDataMatrix[i][j].getCount("A"); + int countC = value.readCountDataMatrix[i][j].getCount("C"); + int countG = value.readCountDataMatrix[i][j].getCount("G"); + int countT = value.readCountDataMatrix[i][j].getCount("T"); + result += String.format("A: %d, C: %d, G: %d, T: %d; \t", countA, countC, countG, countT); + } + } + return result; + } +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountModel.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountModel.java new file mode 100644 index 0000000..5ae12c0 --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountModel.java @@ -0,0 +1,384 @@ +package phylonco.lphy.evolution.readcountmodel; + +import lphy.base.distribution.LogNormal; +import lphy.base.distribution.NegativeBinomial; +import lphy.base.evolution.Taxa; +import lphy.base.evolution.alignment.Alignment; +import lphy.core.model.GenerativeDistribution; +import lphy.core.model.RandomVariable; +import lphy.core.model.Value; +import lphy.core.model.annotation.ParameterInfo; +import phylonco.lphy.evolution.datatype.PhasedGenotype; +import phylonco.lphy.evolution.datatype.PhasedGenotypeState; + +import java.util.Map; +// lphy script +// n = 40; +// l = 200; +// t ~ LogNormal(mean, sd); +// v ~ LogNormal(mean2, sd2); +// ones ~ rep(1, n*l); +// alpha ~ Multinomial(n=1, p=[delta], replicates=n*l) + ones; // alpha_ij = 1 or 2 +// sPrime ~ Lognormal(sMean, sStdDev, replicates=n); +// s ~ transpose(rep(sPrime, l)); +// mean ~ alpha * rep(t, n*l) * s; +// variance ~ +// p ~ // NegativeBinomial p +// r ~ // NegativeBinomial r +// cov ~ NegativeBinomial(...); +// cov ~ reshape(cov, size=[n, l]); +// data ~ PhyloCTMC(); +// Reads ~ ReadCountModel(data, cov, delta, epsilon); + +public class ReadCountModel implements GenerativeDistribution { + + private Value coverage; + private Value data; + //private Value readCount; + private Multinomial multinomial = new Multinomial(); + private Value epsilon; +// private Value delta; + private RandomVariable readCountData; + private Value alpha; +// private Multinomial multinomialAlpha; +// private Value t; +// private Value meanT; +// private Value sdT; +// private Value v; +// private Value meanV; +// private Value sdV; +// private Value s; +// private Value meanS; +// private Value sdS; +// private LogNormal logNormalT; +// private LogNormal logNormalV; +// private LogNormal logNormalS; + + + + public static final String dParamName = "D"; + public static final String covParamName = "coverage"; + public static final String alphaParamName = "alpha"; +// public static final String deltaParamName = "delta"; + public static final String epsilonParamName = "epsilon"; +// public static final String meanTParamName = "meanT"; +// public static final String sdTParamName = "sdT"; +// public static final String meanVParamName = "meanV"; +// public static final String sdVParamName = "sdV"; +// public static final String meanSParamName = "meanS"; +// public static final String sdSParamName = "sdS"; + + + public ReadCountModel( + @ParameterInfo(name = dParamName, description = "genotype alignment.") Value data, + @ParameterInfo(name = covParamName, description = "coverage.") Value coverage, + @ParameterInfo(name = alphaParamName, description = "allelic dropout events for each cell at each site.") Value alpha, +// @ParameterInfo(name = deltaParamName, description = "allelic dropout error probability.") Value delta, + @ParameterInfo(name = epsilonParamName, description = "sequencing and amplification error probability.") Value epsilon +// @ParameterInfo(name = meanTParamName, description = "sequencing and amplification error probability.") Value meanT, +// @ParameterInfo(name = sdTParamName, description = "sequencing and amplification error probability.") Value sdT, +// @ParameterInfo(name = meanVParamName, description = "sequencing and amplification error probability.") Value meanV, +// @ParameterInfo(name = sdVParamName, description = "sequencing and amplification error probability.") Value sdV, +// @ParameterInfo(name = meanSParamName, description = "sequencing and amplification error probability.") Value meanS, +// @ParameterInfo(name = sdSParamName, description = "sequencing and amplification error probability.") Value sdS + ) { + super(); + this.data = data; + this.coverage = coverage; + this.alpha = alpha; +// this.delta = delta; + this.epsilon = epsilon; +// this.meanT = meanT; +// this.sdT = sdT; +// this.meanV = meanV; +// this.sdV = sdV; +// this.meanS = meanS; +// this.sdS = sdS; + + + } + + // D ~ GT16 genotypes (Alignment) + // t ~ ... + // return ACGT {{1,2,3,4}, {4,5,6,7}, ... } + + @Override + public RandomVariable sample() { + int n = data.value().getTaxa().length(); + int l = data.value().nchar(); +// Value r; +// Value p; + Double eps = epsilon.value(); + Double pA; + Double pC; + Double pG; + Double pT; + Integer[][][] readC = new Integer[n][l][4]; + Integer[][] cov = new Integer[n][l]; + Integer[][] alp = new Integer[n][l]; +// Double[] proAlpha = {this.delta.value(), 1-this.delta.value()}; +// Value proA = new Value<>("proA", proAlpha); +// Value numberA = new Value<>("numberA", 1); +// multinomialAlpha = new Multinomial(numberA, proA); +// logNormalT = new LogNormal(this.meanT, this.sdT, null); +// logNormalV = new LogNormal(this.meanV, this.sdV, null); +// logNormalS = new LogNormal(this.meanS, this.sdS, null); +// t = logNormalT.sample(); +// v = logNormalV.sample(); + + + +/* for (int i = 0; i < n; i++) { + this.s = logNormalS.sample(); + for (int j = 0; j < l; j++) { + Value alpha1 = multinomialAlpha.sample(); + if (alpha1.value()[0] == 1){ + alp[i][j] = 1; + }else alp[i][j] = 2; + + Double mean = alp[i][j] * this.t.value() * this.s.value(); + Double variance = mean + (Math.pow((double) alp[i][j], 2.0) * v.value() * Math.pow((this.s.value()), 2.0)); + double pValue = mean / variance; + float rFloat = (float) (Math.pow(mean, 2) / (variance - mean)); + int rValue = Math.round(rFloat); + r = new Value<>("r", rValue); + p = new Value<>("p", pValue); + NegativeBinomial negativeBinomial = new NegativeBinomial(r, p); + cov[i][j] = negativeBinomial.sample().value(); + } + } + coverage = new Value<>("coverage", cov); + alpha = new Value<>("alpha", alp); +**/ + + for (int i = 0; i < n; i++) { + for (int j = 0; j < l; j++) { + int stateIndex = data.value().getState(i, j); + PhasedGenotypeState genotypeState = PhasedGenotype.getCanonicalState(stateIndex); + String genotype = genotypeState.getFullName();//get genotype + Integer cov_ij = coverage.value()[i][j]; + Double[] proMatrix; + Value cover = new Value<>("cover", cov_ij); + multinomial.setParam("n", cover); + if (genotype.equals("AA")){ + pA = 1-eps; pC = eps/3; pG = eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("AC") || genotype.equals("CA")){ + if (alpha.value()[i][j] == 1){ + if (Math.random() < 0.5){ + pA = 1-eps; pC = eps/3; pG = eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + }else { + pA = eps/3; pC = 1-eps; pG = eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + }else { + pA = 0.5 - eps / 3; pC = 0.5 - eps / 3; pG = eps / 3; pT = eps / 3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + + } else if (genotype.equals("AG") || genotype.equals("GA")){ + if (alpha.value()[i][j] == 1){ + if (Math.random() < 0.5){ + pA = 1-eps; pC = eps/3; pG = eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + }else { + pA = eps/3; pC = eps/3; pG = 1-eps; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + }else { + pA = 0.5-eps/3; pC = eps/3; pG = 0.5-eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + + } else if (genotype.equals("AT") || genotype.equals("TA")){ + if (alpha.value()[i][j] == 1){ + if (Math.random() < 0.5){ + pA = 1-eps; pC = eps/3; pG = eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + }else { + pA = eps/3; pC = eps/3; pG = eps/3; pT = 1-eps; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + }else { + pA = 0.5 - eps / 3; pC = eps / 3; pG = eps / 3; pT = 0.5 - eps / 3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + + } else if (genotype.equals("CC")){ + pA = eps/3; pC = 1-eps; pG = eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + else if (genotype.equals("CG") || genotype.equals("GC")){ + if (alpha.value()[i][j] == 1){ + if (Math.random() < 0.5){ + pA = eps/3; pC = 1-eps; pG = eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + }else { + pA = eps/3; pC = eps/3; pG = 1-eps; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + } else { + pA = eps / 3; pC = 0.5 - eps / 3; pG = 0.5 - eps / 3; pT = eps / 3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + + } else if (genotype.equals("CT") || genotype.equals("TC")){ + if (alpha.value()[i][j] == 1){ + if (Math.random() < 0.5){ + pA = eps/3; pC = 1-eps; pG = eps/3; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + }else { + pA = eps/3; pC = eps/3; pG = eps/3; pT = 1-eps; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + } else { + pA = eps / 3; pC = 0.5 - eps / 3; pG = eps / 3; pT = 0.5 - eps / 3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + + } else if (genotype.equals("GG")){ + pA = eps/3; pC = eps/3; pG = 1-eps; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + + else if (genotype.equals("GT") || genotype.equals("TG")){ + if (alpha.value()[i][j] == 1){ + if (Math.random() < 0.5){ + pA = eps/3; pC = eps/3; pG = 1-eps; pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + }else { + pA = eps/3; pC = eps/3; pG = eps/3; pT = 1-eps; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + }else { + pA = eps / 3; pC = eps / 3; pG = 0.5 - eps / 3; pT = 0.5 - eps / 3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + + } else if (genotype.equals("TT")){ + pA = eps/3; pC = eps/3; pG = eps/3; pT = 1-eps; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + } + + } + //readCount = new Value<>("readCount", readC); + Taxa taxa = data.value().getTaxa(); + ReadCount[][] readCountMatrix = new ReadCount[n][l]; + for (int i = 0; i < n; i++) { + for (int j = 0; j < l; j++) { + System.out.println("cell = " + j + ", site = " + i); + System.out.println("coverage = " + coverage.value()[i][j]); + int stateIndex = data.value().getState(i, j); + PhasedGenotypeState genotypeState = PhasedGenotype.getCanonicalState(stateIndex); + String genotype = genotypeState.getFullName(); + System.out.println("genotype = " + genotype); + System.out.println("alpha = " + alpha.value()[i][j]); + int[] values = new int[ReadCount.NUM_NUCLEOTIDES]; + for (int k = 0; k < ReadCount.NUM_NUCLEOTIDES; k++) { + values[k] = (int) readC[i][j][k]; + System.out.print(values[k] + "\t"); + } + System.out.println(); + readCountMatrix[i][j] = new ReadCount(values); + + } + } + readCountData = new RandomVariable<>("readCountData", new ReadCountData(taxa, readCountMatrix), this); + + return readCountData; + } + + @Override + public Map getParams() { + return Map.of( + dParamName, data, + covParamName, coverage, + alphaParamName, alpha, +// deltaParamName, delta, + epsilonParamName, epsilon +// meanTParamName,meanT, +// sdTParamName,sdT, +// meanVParamName,meanV, +// sdVParamName,sdV, +// meanSParamName,meanS, +// sdSParamName,sdS + ); + } +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountSimulator.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountSimulator.java new file mode 100644 index 0000000..3870a09 --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/ReadCountSimulator.java @@ -0,0 +1,353 @@ +package phylonco.lphy.evolution.readcountmodel; + + +import lphy.base.distribution.LogNormal; +import lphy.base.distribution.NegativeBinomial; +import lphy.base.distribution.Normal; +import lphy.base.evolution.alignment.Alignment; +import lphy.base.evolution.coalescent.Coalescent; +import lphy.base.evolution.likelihood.PhyloCTMC; +import lphy.base.evolution.tree.TimeTree; +import lphy.core.model.Value; +import phylonco.lphy.evolution.datatype.PhasedGenotype; +import phylonco.lphy.evolution.datatype.PhasedGenotypeState; +import phylonco.lphy.evolution.substitutionmodel.GT16; + +public class ReadCountSimulator { + private Value n; + private Value l; + private Value r; + private Value p; + private Value M; + private Value S; + private Value theta; + private Value psi; + private Value coverage; + private Value data; + private Value readCount; + private Multinomial multinomial = new Multinomial(); + private Value delta; + //private Value meanDelta; + //private Value sdDelta; + private Value epsilon; + //private Value meanEpsilon; + //private Value sdEpsilon; + private Value t; + private Value meanT; + private Value sdT; + private Value v; + private Value meanV; + private Value sdV; + private Value s; + private Value meanS; + private Value sdS; + //private Normal normalDelta; + //private Normal normalEpsilon; + private LogNormal logNormalT; + private LogNormal logNormalV; + private LogNormal logNormalS; + private Value alpha; + private Multinomial multinomialAlpha; + //Maybe generate all the Alphas one time. + + + + public ReadCountSimulator(Integer n, Integer l, Double M, Double S, Double delta, Double epsilon, + Number meanT, Number sdT, Number meanV, Number sdV, Number meanS, Number sdS){ + this.n = new Value<>("n", n); + this.l = new Value<>("l", l); + this.M = new Value<>("M", M); + this.S = new Value<>("S", S); + //this.meanDelta = new Value<>("meanDelta", meanDelta); + //this.sdDelta = new Value<>("sdDelta", sdDelat); + //normalDelta = new Normal(this.meanDelta, this.sdDelta); + //delta = normalDelta.sample(); + //this.meanEpsilon = new Value<>("meanEpsilon", meanEpsilon); + //this.sdEpsilon = new Value<>("sdEpsilon", sdEpsilon); + //normalEpsilon = new Normal(this.meanEpsilon, this.sdEpsilon); + //epsilon = normalEpsilon.sample(); + this.delta = new Value<>("delta", delta); + this.epsilon = new Value<>("epsilon", epsilon); + this.meanT = new Value<>("meanT", meanT); + this.sdT = new Value<>("sdT", sdT); + logNormalT = new LogNormal(this.meanT, this.sdT, null); + t = logNormalT.sample(); + this.meanV = new Value<>("meanV", meanV); + this.sdV = new Value<>("sdV", sdV); + logNormalV = new LogNormal(this.meanV, this.sdV, null); + v = logNormalV.sample(); + this.meanS = new Value<>("meanS", meanS); + this.sdS = new Value<>("sdS", sdS); + logNormalS = new LogNormal(this.meanS, this.sdS, null); + Double[] proAlpha = {this.delta.value(), 1-this.delta.value()}; + Value proA = new Value<>("proA", proAlpha); + Value numberA = new Value<>("numberA", 1); + multinomialAlpha = new Multinomial(numberA, proA); + + } + + + + public void simulateReadCount() { + simulateData(); + simulateAlpha(); + simulateCoverage(); + Integer[][][] readC = new Integer[n.value()][l.value()][4]; + + + for (int i = 0; i < n.value(); i++) { + for (int j = 0; j < l.value(); j++) { + int stateIndex = data.value().getState(i, j); + PhasedGenotypeState genotypeState = PhasedGenotype.getCanonicalState(stateIndex); + String genotype = genotypeState.getFullName();//get genotype + Integer cov = coverage.value()[i][j]; + Double[] proMatrix; + Value cover = new Value<>("cover", cov); + multinomial.setParam("n", cover); + + if (genotype.equals("AA")){ + Double eps = epsilon.value(); + Double pA = 1-eps; + Double pC = eps/3; + Double pG = eps/3; + Double pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("AC") || genotype.equals("CA")){ + Double eps = epsilon.value(); + Double pA = 0.5-eps/3; + Double pC = 0.5-eps/3; + Double pG = eps/3; + Double pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("AG") || genotype.equals("GA")){ + Double eps = epsilon.value(); + Double pA = 0.5-eps/3; + Double pC = eps/3; + Double pG = 0.5-eps/3; + Double pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("AT") || genotype.equals("TA")){ + Double eps = epsilon.value(); + Double pA = 0.5-eps/3; + Double pC = eps/3; + Double pG = eps/3; + Double pT = 0.5-eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("CC")){ + Double eps = epsilon.value(); + Double pA = eps/3; + Double pC = 1-eps; + Double pG = eps/3; + Double pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("CG") || genotype.equals("GC")){ + Double eps = epsilon.value(); + Double pA = eps/3; + Double pC = 0.5-eps/3; + Double pG = 0.5-eps/3; + Double pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("CT") || genotype.equals("TC")){ + Double eps = epsilon.value(); + Double pA = eps/3; + Double pC = 0.5-eps/3; + Double pG = eps/3; + Double pT = 0.5-eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("GG")){ + Double eps = epsilon.value(); + Double pA = eps/3; + Double pC = eps/3; + Double pG = 1-eps; + Double pT = eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("GT") || genotype.equals("TG")){ + Double eps = epsilon.value(); + Double pA = eps/3; + Double pC = eps/3; + Double pG = 0.5-eps/3; + Double pT = 0.5-eps/3; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } else if (genotype.equals("TT")){ + Double eps = epsilon.value(); + Double pA = eps/3; + Double pC = eps/3; + Double pG = eps/3; + Double pT = 1-eps; + proMatrix = new Double[]{pA, pC, pG, pT}; + Value proMat = new Value<>("proMatrix", proMatrix); + multinomial.setParam("p", proMat); + readC[i][j] = multinomial.sample().value(); + } + + } + + } + readCount = new Value<>("readCount", readC); + + } + + + private void simulateData() { + + Double equalRate = Double.valueOf(1.0/6); + Double[] ratesValue = new Double[]{ + equalRate, equalRate, equalRate, + equalRate, equalRate, equalRate}; + + // Dirichlet rates for GT16 substitution model + // rates ~ Dirichlet(conc=[1.0, 2.0, 1.0, 1.0, 2.0, 1.0]); + Value gt16Rates = new Value<>("gt16Rates", ratesValue); + Double equalFreqRate = Double.valueOf(1.0/16); + Double[] gt16FreqsValue = new Double[]{ + equalFreqRate, equalFreqRate, equalFreqRate, equalFreqRate, + equalFreqRate, equalFreqRate, equalFreqRate, equalFreqRate, + equalFreqRate, equalFreqRate, equalFreqRate, equalFreqRate, + equalFreqRate, equalFreqRate, equalFreqRate, equalFreqRate}; + + // Dirichlet frequencies + // pi ~ Dirichlet(conc=[3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0]); + Value gt16Freqs = new Value<>("gt16Freqs", gt16FreqsValue); + Value meanRate = new Value<>("meanRate", Double.valueOf(1.0)); + Value Q = new GT16(gt16Rates, gt16Freqs, meanRate).apply(); // GT16 + + LogNormal logNormal = new LogNormal(this.M, this.S, null); + theta = logNormal.sample(); + Coalescent coalescent = new Coalescent(theta, this.n, null); + psi = coalescent.sample(); + PhyloCTMC phyloCTMC = new PhyloCTMC(psi, null, null, Q, + null, null, this.l, null, null); + data = phyloCTMC.sample(); // alignment genotypes + + } + + + private void simulateCoverage() { + Integer[][] cov = new Integer[this.n.value()][this.l.value()]; + +// Integer[] kValues = new Integer[this.n.value() * this.l.value()]; +// for (int i = 0; i < kValues.length; i++) { +// // alpha = 1 (dropout) or 2 (no dropout) +// // r and p in terms of mu_ij and sigma_ij +// NegativeBinomial negativeBinomial = new NegativeBinomial(this.r, this.p); +// kValues[i] = negativeBinomial.sample().value(); +// } +// int index = 0; + for (int i = 0; i < n.value(); i++) { + this.s = logNormalS.sample(); + for (int j = 0; j < l.value(); j++) { + Double mean = this.alpha.value()[i][j] * this.t.value() * this.s.value(); + Double variance = mean + (Math.pow((double) this.alpha.value()[i][j], 2.0) * v.value() * Math.pow((this.s.value()), 2.0)); + double p = mean / variance; + float rFloat = (float) (Math.pow(mean, 2) / (variance - mean)); + int r = Math.round(rFloat); + this.r = new Value<>("r", r); + this.p = new Value<>("p", p); + NegativeBinomial negativeBinomial = new NegativeBinomial(this.r, this.p); + cov[i][j] = negativeBinomial.sample().value(); + } + } + + coverage = new Value<>("coverage", cov); + + } + + + private void simulateAlpha() { + Integer[][] alp = new Integer[this.n.value()][this.l.value()]; + for (int i = 0; i < n.value(); i++) { + for (int j = 0; j < l.value(); j++) { + Value alpha1 = multinomialAlpha.sample(); + if (alpha1.value()[0] == 1){ + alp[i][j] = 1; + }else alp[i][j] = 2; + } + } + alpha = new Value<>("alpha", alp); + } + + + + public void printData() { + Alignment alignment = data.value(); + int numCells = alignment.ntaxa(); + int numSites = alignment.nchar(); + System.out.println("Alignment: "); + for (int i = 0; i < numCells; i++) { + for (int j = 0; j < numSites; j++) { + int stateIndex = alignment.getState(i, j); + PhasedGenotypeState genotypeState = PhasedGenotype.getCanonicalState(stateIndex); + String genotype = genotypeState.getFullName(); + System.out.print(genotype + "\t"); + } + System.out.println(); + } + + } + + + public void printCoverage() { + System.out.println("Coverage: "); + for (int i = 0; i < coverage.value().length; i++) { + for (int j = 0; j < coverage.value()[i].length; j++) { + System.out.print(coverage.value()[i][j] + "\t"); + } + System.out.println(); + } + } + + public void printAlpha() { + System.out.println("Alpha: "); + for (int i = 0; i < alpha.value().length; i++) { + for (int j = 0; j < alpha.value()[i].length; j++) { + System.out.print(alpha.value()[i][j] + "\t"); + } + System.out.println(); + } + } + + public Value getCoverage() { + return this.coverage; + } + + public Value getData() { + return this.data; + } + + public Value getAlpha() { + return this.alpha; + } + + public Value getReadCount() { + return this.readCount; + } + + +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/TestReadcountModel.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/TestReadcountModel.java new file mode 100644 index 0000000..653b5d9 --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/TestReadcountModel.java @@ -0,0 +1,5 @@ +package phylonco.lphy.evolution.readcountmodel; + +public class TestReadcountModel { + +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/testReadCount.java b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/testReadCount.java new file mode 100644 index 0000000..604f09d --- /dev/null +++ b/phylonco-lphy/src/main/java/phylonco/lphy/evolution/readcountmodel/testReadCount.java @@ -0,0 +1,45 @@ +package phylonco.lphy.evolution.readcountmodel; + +import lphy.base.evolution.alignment.Alignment; +import lphy.core.model.Value; +import phylonco.lphy.evolution.datatype.PhasedGenotype; +import phylonco.lphy.evolution.datatype.PhasedGenotypeState; + +public class testReadCount { + public static void main(String[] args) { + +// double mean = 5.0; +// double variance = 5.0; +// double p = mean / (variance * variance); +// float rFloat = (float) (Math.pow(mean, 2) / (Math.pow(variance, 2) - mean)); +// int r = Math.round(rFloat); + + ReadCountSimulator readCountSimulator = new ReadCountSimulator(3,20,1.0,1.0, 0.163, 0.002,20, 10,2,1,2.5,1); + readCountSimulator.simulateReadCount(); + Value readCount = readCountSimulator.getReadCount(); + //readCountSimulator.printData(); + //readCountSimulator.printCoverage(); + Value coverage = readCountSimulator.getCoverage(); + Value alpha = readCountSimulator.getAlpha(); + Value data = readCountSimulator.getData(); + + readCountSimulator.printData(); + readCountSimulator.printCoverage(); + readCountSimulator.printAlpha(); + + for (int i = 1; i < coverage.value().length; i++) { + for (int j = 0; j < coverage.value()[i].length; j++) { + int stateIndex = data.value().getState(i, j); + PhasedGenotypeState genotypeState = PhasedGenotype.getCanonicalState(stateIndex); + String genotype = genotypeState.getFullName(); + System.out.println("Cell: " + (i+1) + ", Site: " + (j+1)); + System.out.println("Genotype: " + genotype + ", Alpha: " + alpha.value()[i][j] + ", Coverage: " + coverage.value()[i][j]); + System.out.println("A: " + readCount.value()[i][j][0] + ", C: " + readCount.value()[i][j][1] + ", G: " + readCount.value()[i][j][2] + ", T: " + readCount.value()[i][j][3]); + System.out.println(); + } + } + + + + } +} diff --git a/phylonco-lphy/src/main/java/phylonco/lphy/spi/PhyloncoImpl.java b/phylonco-lphy/src/main/java/phylonco/lphy/spi/PhyloncoImpl.java index f8675b1..1a7d085 100644 --- a/phylonco-lphy/src/main/java/phylonco/lphy/spi/PhyloncoImpl.java +++ b/phylonco-lphy/src/main/java/phylonco/lphy/spi/PhyloncoImpl.java @@ -8,6 +8,8 @@ import phylonco.lphy.evolution.alignment.HomozygousAlignmentDistribution; import phylonco.lphy.evolution.alignment.UnphaseGenotypeAlignment; import phylonco.lphy.evolution.datatype.PhasedGenotypeFunction; +import phylonco.lphy.evolution.readcountmodel.Multinomial; +import phylonco.lphy.evolution.readcountmodel.ReadCountModel; import phylonco.lphy.evolution.substitutionmodel.GT16; import java.util.Arrays; @@ -30,7 +32,9 @@ public PhyloncoImpl() { @Override public List> declareDistributions() { - return Arrays.asList( GT16ErrorModel.class, HomozygousAlignmentDistribution.class); + return Arrays.asList( GT16ErrorModel.class, HomozygousAlignmentDistribution.class, + Multinomial.class, + ReadCountModel.class); } @Override