Skip to content

Commit

Permalink
PR changes
Browse files Browse the repository at this point in the history
  • Loading branch information
jethror1 committed Sep 27, 2020
1 parent 8f15c75 commit de1242b
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 75 deletions.
122 changes: 61 additions & 61 deletions bin/coverage_report_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def load_files(self, threshold, exon_stats,
template_dir = os.path.join(bin_dir, "../data/templates/")
single_template = os.path.join(template_dir, "single_template.html")

with open(single_template, 'r') as temp:
html_template = temp.read()
with open(single_template, 'r') as template:
html_template = template.read()

# read in exon stats file
with open(exon_stats.name) as exon_file:
Expand All @@ -84,11 +84,11 @@ def load_files(self, threshold, exon_stats,
key = ln.split(":")[0].strip("#")
val = ln.split(":")[1]
flagstat[key] = val

if "build" not in locals():
# build no. not included in gene_stats file
build = "Unknown"

column = [
"chrom", "exon_start", "exon_end",
"gene", "tx", "exon", "cov_start",
Expand Down Expand Up @@ -124,17 +124,17 @@ def load_files(self, threshold, exon_stats,


def build_report(self, html_template, total_stats, gene_stats,
sub_thrshld_stats, snps_low_cov, snps_high_cov, fig,
sub_threshold_stats, snps_low_cov, snps_high_cov, fig,
all_plots, summary_plot, report_vals
):
"""
Build report from template and variables to write to file
Args:
- html_template (str): string of HTML template
- html_template (str): string of HTML template file
- total_stats (df): total stats table of all genes & exons
- gene_stats (df): stats table of whole gene
- sub_thrshld_stats (df): table of exons with < threshold
- sub_threshold_stats (df): table of exons with < threshold
- snps_low_cov (df): table of snps with cov < threshold
- snsp_high_cov (df): table of snps with cov > threshold
- fig (figure): grid of low coverage exon plots (plotly)
Expand All @@ -154,9 +154,9 @@ def build_report(self, html_template, total_stats, gene_stats,
threshold=report_vals["threshold"],
exon_issues=report_vals["exon_issues"],
gene_issues=report_vals["gene_issues"],
covered_genes=report_vals["covered_genes"],
fully_covered_genes=report_vals["fully_covered_genes"],
name=report_vals["name"],
sub_thrshld_stats=sub_thrshld_stats,
sub_threshold_stats=sub_threshold_stats,
low_cov_plots=fig,
all_plots=all_plots,
summary_plot=summary_plot,
Expand Down Expand Up @@ -272,7 +272,7 @@ def low_coverage_regions(self, cov_stats, raw_coverage, threshold):

# pandas is terrible and forces floats, change back to int
dtypes = {
'chrom': int,
'chrom': str,
'exon': int,
'exon_start': int,
'exon_end': int,
Expand Down Expand Up @@ -612,18 +612,18 @@ def summary_gene_plot(self, cov_summary, threshold):

print("Generating summary plot")

thrshld = str(threshold) + "x"
threshold = str(threshold) + "x"

# define colours based on values
cov_summary["colours"] = 'green'
cov_summary.loc[cov_summary[thrshld] < 100, 'colours'] = 'orange'
cov_summary.loc[cov_summary[thrshld] < 90, 'colours'] = 'red'
cov_summary.loc[cov_summary[threshold] < 100, 'colours'] = 'orange'
cov_summary.loc[cov_summary[threshold] < 90, 'colours'] = 'red'

cov_summary = cov_summary.sort_values(by=[thrshld], ascending=False)
cov_summary = cov_summary.sort_values(by=[threshold], ascending=False)

summary_plot, axs = plt.subplots(figsize=(18, 10))
plt.bar(
cov_summary["gene"], [int(x) for x in cov_summary[thrshld]],
cov_summary["gene"], [int(x) for x in cov_summary[threshold]],
color=cov_summary.colours
)

Expand All @@ -643,7 +643,7 @@ def summary_gene_plot(self, cov_summary, threshold):
axs.tick_params(axis='both', which='major', labelsize=10)

plt.xlabel("")
plt.ylabel("% coverage >= {}".format(thrshld))
plt.ylabel("% coverage >= {}".format(threshold))

axs.yaxis.grid(linewidth=0.5, color="grey", linestyle="-.")
plt.box(False)
Expand Down Expand Up @@ -697,7 +697,7 @@ def generate_report(self, cov_stats, cov_summary, snps_low_cov,
print("Generating report")

# str of threshold for selecting df columns etc.
thrshld = str(args.threshold) + "x"
threshold = str(args.threshold) + "x"

# get threshold columns and add to column names
threshold_cols = list(cov_stats.filter(regex='[0-9]+x', axis=1))
Expand All @@ -709,16 +709,16 @@ def generate_report(self, cov_stats, cov_summary, snps_low_cov,

column.extend(threshold_cols)

sub_thrshld = pd.DataFrame(columns=column)
sub_threshold = pd.DataFrame(columns=column)

# get all exons with <100% coverage at threshold
for i, row in cov_stats.iterrows():
if int(row[thrshld]) < 100:
sub_thrshld = sub_thrshld.append(row, ignore_index=True)
if int(row[threshold]) < 100:
sub_threshold = sub_threshold.append(row, ignore_index=True)

# pandas is terrible and forces floats, change back to int
dtypes = {
'chrom': int,
'chrom': str,
'exon': int,
'exon_len': int,
'exon_start': int,
Expand All @@ -727,7 +727,7 @@ def generate_report(self, cov_stats, cov_summary, snps_low_cov,
'max': int
}

sub_thrshld = sub_thrshld.astype(dtypes)
sub_threshold = sub_threshold.astype(dtypes)

vals = ["min", "mean", "max"]
vals.extend(threshold_cols)
Expand All @@ -740,21 +740,21 @@ def generate_report(self, cov_stats, cov_summary, snps_low_cov,
values=vals
)

sub_thrshld_stats = pd.pivot_table(
sub_thrshld,
sub_threshold_stats = pd.pivot_table(
sub_threshold,
index=["gene", "tx", "chrom", "exon", "exon_len",
"exon_start", "exon_end"],
values=vals
)

# reset index to fix formatting
total_stats = total_stats.reindex(vals, axis=1)
sub_thrshld_stats = sub_thrshld_stats.reindex(vals, axis=1)
sub_threshold_stats = sub_threshold_stats.reindex(vals, axis=1)
total_stats.reset_index(inplace=True)
sub_thrshld_stats.reset_index(inplace=True)
sub_threshold_stats.reset_index(inplace=True)

# rename columns to display properly
sub_thrshld_stats = sub_thrshld_stats.rename(columns={
sub_threshold_stats = sub_threshold_stats.rename(columns={
"gene": "Gene",
"tx": "Transcript",
"chrom": "Chromosome",
Expand Down Expand Up @@ -791,60 +791,60 @@ def generate_report(self, cov_stats, cov_summary, snps_low_cov,

# get values to display in report
total_genes = len(cov_summary["Gene"])
gene_issues = len(list(set(sub_thrshld_stats["Gene"].tolist())))
exon_issues = len(sub_thrshld_stats["Exon"])
covered_genes = total_genes - gene_issues
gene_issues = len(list(set(sub_threshold_stats["Gene"].tolist())))
exon_issues = len(sub_threshold_stats["Exon"])
fully_covered_genes = total_genes - gene_issues

# empty dict to add values for displaying in report text
report_vals = {}

report_vals["name"] = str(args.sample_name)
report_vals["total_genes"] = str(total_genes)
report_vals["covered_genes"] = str(covered_genes)
report_vals["fully_covered_genes"] = str(fully_covered_genes)
report_vals["gene_issues"] = str(gene_issues)
report_vals["threshold"] = thrshld
report_vals["threshold"] = threshold
report_vals["exon_issues"] = str(exon_issues)
report_vals["build"] = build

# set ranges for colouring cells
x0 = pd.IndexSlice[sub_thrshld_stats.loc[(
sub_thrshld_stats[thrshld] < 10
x0 = pd.IndexSlice[sub_threshold_stats.loc[(
sub_threshold_stats[threshold] < 10
) & (
sub_thrshld_stats[thrshld] > 0)].index, thrshld]
x10 = pd.IndexSlice[sub_thrshld_stats.loc[(
sub_thrshld_stats[thrshld] < 30
sub_threshold_stats[threshold] > 0)].index, threshold]
x10 = pd.IndexSlice[sub_threshold_stats.loc[(
sub_threshold_stats[threshold] < 30
) & (
sub_thrshld_stats[thrshld] >= 10)].index, thrshld]
x30 = pd.IndexSlice[sub_thrshld_stats.loc[(
sub_thrshld_stats[thrshld] < 50
sub_threshold_stats[threshold] >= 10)].index, threshold]
x30 = pd.IndexSlice[sub_threshold_stats.loc[(
sub_threshold_stats[threshold] < 50
) & (
sub_thrshld_stats[thrshld] >= 30)].index, thrshld]
x50 = pd.IndexSlice[sub_thrshld_stats.loc[(
sub_thrshld_stats[thrshld] < 70
sub_threshold_stats[threshold] >= 30)].index, threshold]
x50 = pd.IndexSlice[sub_threshold_stats.loc[(
sub_threshold_stats[threshold] < 70
) & (
sub_thrshld_stats[thrshld] >= 50)].index, thrshld]
x70 = pd.IndexSlice[sub_thrshld_stats.loc[(
sub_thrshld_stats[thrshld] < 90
sub_threshold_stats[threshold] >= 50)].index, threshold]
x70 = pd.IndexSlice[sub_threshold_stats.loc[(
sub_threshold_stats[threshold] < 90
) & (
sub_thrshld_stats[thrshld] >= 70)].index, thrshld]
x90 = pd.IndexSlice[sub_thrshld_stats.loc[(
sub_thrshld_stats[thrshld] < 95
sub_threshold_stats[threshold] >= 70)].index, threshold]
x90 = pd.IndexSlice[sub_threshold_stats.loc[(
sub_threshold_stats[threshold] < 95
) & (
sub_thrshld_stats[thrshld] >= 90)].index, thrshld]
x95 = pd.IndexSlice[sub_thrshld_stats.loc[(
sub_thrshld_stats[thrshld] >= 95)].index, thrshld]
sub_threshold_stats[threshold] >= 90)].index, threshold]
x95 = pd.IndexSlice[sub_threshold_stats.loc[(
sub_threshold_stats[threshold] >= 95)].index, threshold]

# df column index of threshold
col_idx = sub_thrshld_stats.columns.get_loc(thrshld)
col_idx = sub_threshold_stats.columns.get_loc(threshold)

# make dict for rounding coverage columns to 2dp
rnd = {}
for col in list(sub_thrshld_stats.columns[10:15]):
for col in list(sub_threshold_stats.columns[10:15]):
rnd[col] = '{0:.2f}%'

# apply colours to coverage cell based on value, 0 is given solid red
s = sub_thrshld_stats.style.apply(lambda x: [
"background-color: #d70000" if x[thrshld] == 0 and idx == col_idx
s = sub_threshold_stats.style.apply(lambda x: [
"background-color: #d70000" if x[threshold] == 0 and idx == col_idx
else "" for idx, v in enumerate(x)
], axis=1)\
.bar(subset=x0, color='red', vmin=0, vmax=100)\
Expand All @@ -858,7 +858,7 @@ def generate_report(self, cov_stats, cov_summary, snps_low_cov,
.set_table_attributes('table border="1"\
class="dataframe table table-hover table-bordered"')

sub_thrshld_stats["Mean"] = sub_thrshld_stats["Mean"].apply(
sub_threshold_stats["Mean"] = sub_threshold_stats["Mean"].apply(
lambda x: int(x)
)

Expand All @@ -875,20 +875,20 @@ def generate_report(self, cov_stats, cov_summary, snps_low_cov,
total_stats = total_stats.to_html(justify='left').replace(
style[0], style[1]
)
sub_thrshld_stats = s.render()
sub_threshold_stats = s.render()

if snps_low_cov is not None:
snps_not_covered = len(snps_low_cov.index)
snps_low_cov = snps_low_cov.to_html().replace(style[0], style[1])
else:
snps_low_cov = "<b>No SNPs present</b>"
snps_low_cov = "<b>No low covered SNPs</b>"
snps_not_covered = 0

if snps_high_cov is not None:
snps_covered = len(snps_high_cov.index)
snps_high_cov = snps_high_cov.to_html().replace(style[0], style[1])
else:
snps_high_cov = "<b>No SNPs present</b>"
snps_high_cov = "<b>No covered SNPs</b>"
snps_covered = 0

total_snps = str(snps_covered + snps_not_covered)
Expand All @@ -904,7 +904,7 @@ def generate_report(self, cov_stats, cov_summary, snps_low_cov,

# add tables & plots to template
html_string = self.build_report(
html_template, total_stats, gene_stats, sub_thrshld_stats,
html_template, total_stats, gene_stats, sub_threshold_stats,
snps_low_cov, snps_high_cov, fig, all_plots, summary_plot,
report_vals
)
Expand Down
16 changes: 10 additions & 6 deletions bin/coverage_stats_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,14 @@ def cov_stats(self, data, thresholds):
exon_cov["cov"] > int(thrshld)
]["cov_bin_len"].sum()

# calculate % bases at each threshold from raw to 2 dp.
# calculate % bases at each threshold from raw to 2 dp.
pct_bases = {}
for key, value in raw_bases.items():
pct_bases[key] = round(value / tx_len * 100, 2)
# limit to 2dp using math.floor, use of round() with
# 2dp may lead to inaccuracy such as 99.99 => 100.00
raw_value = value / tx_len * 100
rounded_value = math.floor(raw_value * 100) / 100
pct_bases[key] = rounded_value

stats = {
"chrom": row["chrom"], "exon_start": row["exon_start"],
Expand Down Expand Up @@ -268,16 +272,16 @@ def summary_stats(self, cov_stats, thresholds):

cov_summary = cov_summary.append(stats, ignore_index=True)

# round calculated vals to 2 dp
# limit calculated vals to 2 dp
round_cols = ['mean'] + threshold_header
cov_summary[round_cols] = cov_summary[round_cols].round(2)
cov_summary[round_cols] = math.floor(
cov_summary[round_cols] * 100) / 100

return cov_summary


def write_outfiles(self, cov_stats, cov_summary, outfile, flagstat, build):
"""
If --outfile arg given, writes coverage stats to file.
Writes both exon and gene level coverage stats to file.
Args:
- cov_stats (df): df of generated coverage stats
Expand Down
9 changes: 1 addition & 8 deletions data/templates/single_template.html
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
text-align:left;
}


td {
word-wrap: break-word;
word-break:break-all
Expand All @@ -46,19 +45,16 @@
tr:hover {background-color:#ecebfc !important}

/* styling for collapsible sections */

.active, .collapsible:hover {
background-color: rgb(255, 255, 255);
}

.collapsible:after {
content: '+';
color: rgb(255, 255, 255);
font-weight: bold;
float: right;
margin-left: 5px;
}

.active:after {
content: "-";
}
Expand Down Expand Up @@ -100,7 +96,7 @@ <h2>Summary</h2>
</ul>
</p>
<br></br>
Of the <b>$total_genes</b> genes in the panel, <b>$covered_genes</b> genes had 100% coverage at $threshold,
Of the <b>$total_genes</b> genes in the panel, <b>$fully_covered_genes</b> genes had 100% coverage at $threshold,
with <b>$gene_issues</b> genes having less than 100% coverage at $threshold .<br>

$summary_plot
Expand Down Expand Up @@ -209,7 +205,6 @@ <h2> Coverage of SNPs </h2>
}
</script>


<!-- function to make sections collapsible -->
<script>
var coll = document.getElementsByClassName("collapsible");
Expand All @@ -227,9 +222,7 @@ <h2> Coverage of SNPs </h2>
});
}
</script>

</div>
</div>

</body>
</html>
Empty file removed output/single_report.html
Empty file.

0 comments on commit de1242b

Please sign in to comment.