Skip to content

Commit

Permalink
Implement Read count model #56
Browse files Browse the repository at this point in the history
  • Loading branch information
zjzxiaohei committed May 13, 2024
1 parent ea52dd8 commit 282070a
Show file tree
Hide file tree
Showing 14 changed files with 1,210 additions and 1 deletion.
28 changes: 28 additions & 0 deletions phylonco-lphy/examples/gt16ReadCountModel.lphy
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
n = 4;
l = 5;

Θ ~ LogNormal(meanlog=-2.0, sdlog=1.0);
ψ ~ Coalescent(n=n, theta=Θ);
π ~ Dirichlet(conc=[3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0]);
rates ~ Dirichlet(conc=[1.0, 2.0, 1.0, 1.0, 2.0, 1.0]);
Q = gt16(freq=π, rates=rates); // construct the GT16 instantaneous rate matrix
A ~ PhyloCTMC(L=l, Q=Q, tree=ψ, dataType=phasedGenotype());
epsilon ~ Beta(alpha=2, beta=18);
delta ~ Beta(alpha=1.5, beta=4.5);
//epsilon = 0.01;
//delta = 0.24;
// cov = [rep(1, 5), rep(2, 5), rep(3, 5), rep(4, 5)];
meanT = 2.3; // average coverage = lognormal(meanT, sdT)
sdT = 0.1;
meanV = 0.1; // variance coverage = lognormal(meanV, sdV)
sdV = 0.05;
meanS = 0.04; // cell-specific scaling
sdS = 0.001;
t ~ LogNormal(meanlog= meanT, sdlog= sdT);
v ~ LogNormal(meanlog= meanV, sdlog= sdV);
s ~ LogNormal(meanlog= meanS, sdlog= sdS,repe);
alpha ~ AlphaSimulator(l= l, n=n, delta= delta);
coverage ~ CoverageSimulator(alpha= alpha, t= t, v= v, s= s);


r ~ ReadCountModel(D=A, epsilon=epsilon, alpha=alpha, coverage=coverage);
1 change: 1 addition & 0 deletions phylonco-lphy/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
exports phylonco.lphy.evolution.alignment;
exports phylonco.lphy.evolution.datatype;
exports phylonco.lphy.evolution.substitutionmodel;
exports phylonco.lphy.evolution.readcountmodel;


// declare what service interface the provider intends to use
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,15 @@ public String toString() {
return "GT16"; // trimmed in studio if too long
}

/**
*
* @param index index of state
* @return canonical state at index
*/
public static PhasedGenotypeState getCanonicalState(int index) {
return CANONICAL_STATES[index];
}

/**
*
* @param stateIndex the phased genotype state index
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package phylonco.lphy.evolution.readcountmodel;

import lphy.core.model.GenerativeDistribution;
import lphy.core.model.RandomVariable;
import lphy.core.model.Value;
import lphy.core.model.annotation.ParameterInfo;

import java.util.Map;

public class AlphaSimulator implements GenerativeDistribution<Integer[][]> {
private Value<Integer> l;
private Value<Integer> n;
private Value<Double> delta;

private RandomVariable<Integer[][]> alpha;



public static final String lParamName = "l";
public static final String nParamName = "n";
public static final String deltaParamName = "delta";




private AlphaSimulator(
@ParameterInfo(name = lParamName, description = "the length of sequencing") Value<Integer> l,
@ParameterInfo(name = nParamName, description = "the number of cells.") Value<Integer> n,
@ParameterInfo(name = deltaParamName, description = "allelic dropout error probability.") Value<Double> delta

){
super();
this.l = l;
this.n = n;
this.delta = delta;



}

@Override
public RandomVariable<Integer[][]> sample() {
Multinomial multinomialAlpha;
Double[] proAlpha = {this.delta.value(), 1-this.delta.value()};
Value<Double[]> proA = new Value<>("proA", proAlpha);
Value<Integer> numberA = new Value<>("numberA", 1);
multinomialAlpha = new Multinomial(numberA, proA);

Integer[][] alp = new Integer[n.value()][l.value()];
for (int i = 0; i < n.value(); i++) {
for (int j = 0; j < l.value(); j++) {
Value<Integer[]> alpha1 = multinomialAlpha.sample();
if (alpha1.value()[0] == 1) {
alp[i][j] = 1;
} else alp[i][j] = 2;
}
}

alpha = new RandomVariable<>("alpha", alp, this);
return alpha;
}


@Override
public Map<String, Value> getParams() {
return Map.of(
lParamName,l,
nParamName,n,
deltaParamName,delta
);
}



}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package phylonco.lphy.evolution.readcountmodel;

import lphy.base.distribution.NegativeBinomial;
import lphy.base.evolution.alignment.Alignment;
import lphy.core.model.GenerativeDistribution;
import lphy.core.model.RandomVariable;
import lphy.core.model.Value;
import lphy.core.model.annotation.ParameterInfo;

import java.util.Map;

public class CoverageSimulator implements GenerativeDistribution<Integer[][]> {
private NegativeBinomial negativeBinomial;
private Value<Integer[][]> alpha;
private Value<Integer> t;
private Value<Integer> v;
private Value<Integer[]> s;
private RandomVariable<Integer[][]> coverage;




public static final String alphaParamName = "alpha";
public static final String tParamName = "t";
public static final String vParamName = "v";
public static final String sParamName = "s";


public CoverageSimulator(
@ParameterInfo(name = alphaParamName, description = "alpha, allelic dropout events for each cell at each site.") Value<Integer[][]> alpha,
@ParameterInfo(name = tParamName, description = "t") Value<Integer> t,
@ParameterInfo(name = vParamName, description = "v") Value<Integer> v,
@ParameterInfo(name = sParamName, description = "s") Value<Integer[]> s

) {
super();
this.alpha = alpha;
this.t = t;
this.v = v;
this.s = s;


}

@Override
public RandomVariable<Integer[][]> sample(){
int n = alpha.value().length;
int l = alpha.value()[0].length;
Value<Integer> r;
Value<Double> p;
Integer[][] cov = new Integer[n][l];

for (int i = 0; i < n; i++) {
for (int j = 0; j < l; j++) {
Double mean = (double) (this.alpha.value()[i][j] * this.t.value() * this.s.value()[i]);
Double variance = mean + (Math.pow((double) alpha.value()[i][j], 2.0) * v.value() * Math.pow(this.s.value()[i], 2.0));
double pValue = mean / variance;
float rFloat = (float) (Math.pow(mean, 2) / (variance - mean));
int rValue = Math.round(rFloat);
r = new Value<>("r", rValue);
p = new Value<>("p", pValue);
NegativeBinomial negativeBinomial = new NegativeBinomial(r, p);
cov[i][j] = negativeBinomial.sample().value();
}
}
coverage = new RandomVariable<>("coverage", cov, this);
return coverage;
}

@Override
public Map<String, Value> getParams() {
return Map.of(
alphaParamName, alpha,
tParamName, t,
vParamName, v,
sParamName, s
);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package phylonco.lphy.evolution.readcountmodel;

import lphy.base.distribution.Binomial;
import lphy.base.distribution.DistributionConstants;
import lphy.core.model.GenerativeDistribution;
import lphy.core.model.RandomVariable;
import lphy.core.model.Value;
import lphy.core.model.annotation.ParameterInfo;

import java.util.Arrays;
import java.util.Map;
import java.util.TreeMap;

public class Multinomial implements GenerativeDistribution<Integer[]> {

private Value<Integer> n;
private Value<Double[]> p;
private Value<Double[]> q;

public Multinomial(
@ParameterInfo(name = DistributionConstants.nParamName, description = "number of trials.") Value<Integer> n,
@ParameterInfo(name = DistributionConstants.pParamName, description = "event probabilities.") Value<Double[]> p) {
super();
this.n = n;
this.p = p;


}

public Multinomial(){}


@Override
public RandomVariable<Integer[]> sample() {
//org.apache.mahout.math.random.Multinomial multinomial = new org.apache.mahout.math.random.Multinomial();
Double[] q1 = new Double[this.p.value().length];
double cum_prob = 1.0;
for (int i = 0; i < this.p.value().length; i++) {
if (p.value()[i] == 0.0)
q1[i] = 0.0;
else {
q1[i] = this.p.value()[i] / cum_prob;
cum_prob = cum_prob - this.p.value()[i];
}
}
q = new Value<>("q", q1);
int sampleSize = n.value();
Integer[] result = new Integer[p.value().length];
for (int i = 0; i < p.value().length-1; i++) {
Value<Double> pro = new Value<Double>("pro", this.q.value()[i]);
Value<Integer> sampleS = new Value<>("sample", sampleSize);
Binomial binomial = new Binomial(pro, sampleS);
int rand = binomial.sample().value();
result[i] = rand;
sampleSize -= rand;

if (sampleSize == 0) {
Arrays.fill(result, i + 1, result.length, 0);
break;
}
}

result[p.value().length-1] = sampleSize;
return new RandomVariable<>(null, result, this);
}


@Override
public Map<String, Value> getParams() {
return new TreeMap<>() {{
put(DistributionConstants.nParamName, n);
put(DistributionConstants.pParamName, p);
}};
}

@Override
public void setParam(String paramName, Value value) {
switch (paramName) {
case DistributionConstants.nParamName:
n = value;
break;
case DistributionConstants.pParamName:
p = value;
break;
default:
throw new RuntimeException("Unrecognised parameter name: " + paramName);
}
}





public String toString() {
return getName();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package phylonco.lphy.evolution.readcountmodel;

public class ReadCount {

public final static int NUM_NUCLEOTIDES = 4;

private final String nucleotides = "ACGT";

private int[] readCounts;

public ReadCount(int[] readCounts) {
this.readCounts = readCounts;
}

public int[] getReadCounts() {
return readCounts;
}

public int getCount(String nucleotide) {
String capitalizedNucleotide = nucleotide.toUpperCase();
int index = nucleotides.indexOf(capitalizedNucleotide);
if (index != -1) {
return readCounts[index];
} else {
return 0;
}
}

public int getNumStates() {
return NUM_NUCLEOTIDES;
}


}
Loading

0 comments on commit 282070a

Please sign in to comment.