Merge pull request #396 from ARTbio/further_patching

Further patching
ARTbio · Jun 24, 2019 · ab0c5a2 · ab0c5a2
2 parents da71a86 + 09dcd74
commit ab0c5a2
Show file tree

Hide file tree

Showing 10 changed files with 17,038 additions and 17,029 deletions.
diff --git a/tools/gsc_cpm_tpm_rpk/cpm_tpm_rpk.xml b/tools/gsc_cpm_tpm_rpk/cpm_tpm_rpk.xml
@@ -30,7 +30,7 @@
             #end if
 ]]></command>
     <inputs>
-        <param name="input" type="data" format="txt" label="Raw counts of expression data"/>
+        <param name="input" type="data" format="txt,tabular" label="Raw counts of expression data"/>
         <param name="input_sep" type="select" label="Input column separator">
             <option value="tab" selected="true">Tabulation</option>
             <option value=",">Comma</option>
@@ -46,7 +46,7 @@
                     <option value="rpk">RPK</option>
                 </param>
                 <when value="tpm">
-                    <param name="gene_file" type="data" format="txt" label="Gene length file"/> 
+                    <param name="gene_file" type="data" format="txt,tabular" label="Gene length file"/> 
                     <param name="gene_sep" type="select" label="Gene length column separator">
                         <option value="tab" selected="true">Tabulation</option>
                         <option value=",">Commas</option>
@@ -57,7 +57,7 @@
                     </param>
                 </when>
                 <when value="rpk">
-                    <param name="gene_file" type="data" format="txt" label="Gene length file"/>
+                    <param name="gene_file" type="data" format="txt,tabular" label="Gene length file"/>
                     <param name="gene_sep" type="select" label="Gene length column separator">
                         <option value="tab" selected="true">Tabs</option>
                         <option value=",">Commas</option>

diff --git a/tools/gsc_filter_cells/filter_cells.xml b/tools/gsc_filter_cells/filter_cells.xml
@@ -31,7 +31,7 @@
             --output_metada $output_metada
 ]]></command>
     <inputs>
-        <param name="input" type="data" format="txt" label="Raw counts of expression data"/>
+        <param name="input" type="data" format="txt,tabular" label="Raw counts of expression data"/>
         <param name="sep" type="select" label="Indicate column separator">
             <option value="tab" selected="true">Tabs</option>
             <option value="comma">Comma</option>
@@ -45,7 +45,7 @@
         <param name="absolute_counts" value="0" type="integer" label="Absolute number of aligned read Threshold [integer]"
                help="Cells with number of aligned reads below this absolute threshold will be filtered out. Leave at 0 for no filtering" />
         <param name="manage_cutoffs" type="select" label=" filter out intersection or union of cutoffs"
-               help="If you use two cutoffs on number of detected genes and number of aligned reads, respectively, their is two options
+               help="If you use two cutoffs on number of detected genes and number of aligned reads, respectively, there are two options
                for using these cutoffs in filtering: either excluding items that are below one or the other threshold (union) or
                excluding items that are below both thresholds (intersection)" >
             <option value="union" selected="true">Union of cutoffs</option>

diff --git a/tools/gsc_filter_genes/filter_genes.xml b/tools/gsc_filter_genes/filter_genes.xml
@@ -21,7 +21,7 @@
             --output $output
 ]]></command>
     <inputs>
-        <param name="input" type="data" format="txt" label="Expression data" help="a csv or tsv table file" />
+        <param name="input" type="data" format="txt,tabular" label="Expression data" help="a csv or tsv table file" />
         <param name="sep" type="select" label="Indicate column separator">
             <option value="tab" selected="true">Tabs</option>
             <option value="comma">Comma</option>

diff --git a/tools/gsc_gene_expression_correlations/correlation_with_signature.xml b/tools/gsc_gene_expression_correlations/correlation_with_signature.xml
@@ -23,9 +23,9 @@
             --gene_corr_pval '$gene_corr_pval'
 ]]></command>
     <inputs>
-        <param name="expression_file" type="data" format="txt" label="Expression data"
+        <param name="expression_file" type="data" format="txt,tabular" label="Expression data"
                help="a csv or tsv file that contains log2(CPM +1) expression values" />
-        <param name="signatures_file" type="data" format="txt" label="signature values"
+        <param name="signatures_file" type="data" format="txt,tabular" label="signature values"
                help="a csv or tsv file that contains cell signatures" />
         <param name="sep" type="select" label="Indicate column separator"
                help="Note that all input files must have the same format">

diff --git a/tools/gsc_high_dimensions_visualisation/high_dim_visu.xml b/tools/gsc_high_dimensions_visualisation/high_dim_visu.xml
@@ -64,7 +64,7 @@
             
 ]]></command>
     <inputs>
-        <param name="input" type="data" format="txt" label="expression data"/>
+        <param name="input" type="data" format="txt,tabular" label="expression data"/>
         <param name="input_sep" type="select" label="Input column separator">
             <option value="tab" selected="true">Tabs</option>
             <option value=",">Comma</option>

diff --git a/tools/gsc_mannwhitney_de/MannWhitney_DE.R b/tools/gsc_mannwhitney_de/MannWhitney_DE.R
@@ -45,12 +45,12 @@ option_list = list(
   make_option(
     "--factor1",
     type = 'character',
-    help = "First factor of rate category in comparison factor file"
+    help = "level associated to the control condition in the factor file"
   ), 
   make_option(
     "--factor2",
     type = 'character',
-    help = "Second factor of rate category in comparison factor file"
+    help = "level associated to the test condition in the factor file"
   ),
   make_option(
     "--fdr",
@@ -122,13 +122,13 @@ descriptive_stats <- function(InputData) {
     Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
       (sum(x != 0) / ncol(y)) * 100
     }),
-    mean_factor2 = rowMeans(InputData[,factor2_cells]),
-    mean_factor1 = rowMeans(InputData[, factor1_cells])
+    mean_condition2 = rowMeans(InputData[,factor2_cells]),
+    mean_condition1 = rowMeans(InputData[, factor1_cells])
   )
   if(opt$log) {
-  SummaryData$fold_change <- SummaryData$mean_factor1 - SummaryData$mean_factor2
+  SummaryData$log2FC <- SummaryData$mean_condition2 - SummaryData$mean_condition1
   } else {
-  SummaryData$fold_change <- SummaryData$mean_factor1 / SummaryData$mean_factor2
+  SummaryData$log2FC <- log2(SummaryData$mean_condition2 / SummaryData$mean_condition1)
   }
   return(SummaryData)
 }

diff --git a/tools/gsc_mannwhitney_de/mannwhitney_de.xml b/tools/gsc_mannwhitney_de/mannwhitney_de.xml
@@ -24,7 +24,7 @@
             --output '$output'
 ]]></command>
     <inputs>
-        <param name="input" type="data" format="txt" label="Expression data" help="a csv or tsv table file" />
+        <param name="input" type="data" format="txt,tabular" label="Expression data" help="a csv or tsv table file" />
         <param name="sep" type="select" label="Indicate column separator">
             <option value="tab" selected="true">Tabs</option>
             <option value="comma">Comma</option>
@@ -35,8 +35,8 @@
         </param>
         <param name="comparison_factor_file" type="data" format="tabular" label="Comparison factor table"
                help="A tsv table file with two columns : cell identifiers and a column that split cells into two categories." />
-        <param name="factor1" type="text" label="The first value that the comparison factor can take." help="typical values could be 'HIGH', 'mutant', 't1', etc."/>
-        <param name="factor2" type="text" label="The second value that the comparison factor can take." help="typical values could be 'LOW', 'wt', 't2', etc."/>
+        <param name="factor1" type="text" label="Condition-1. The first level (value) that the comparison factor can take." help="typical values could be 'LOW', 'wt', 't1', etc."/>
+        <param name="factor2" type="text" label="Condition-2. The second level (value) that the comparison factor can take." help="typical values could be 'HIGH', 'mutant', 't2', etc."/>
         <param name="fdr" type="float" value="0.01" label="FDR threshold"
                help="Reject H0 of no differential expression if adjusted p-values (Benjamini-Hochberg correction) is higher than the FDR cut-off."/>
         <param name="log" type="boolean" checked="false" label="Expression data are log-transformed" truevalue="--log" falsevalue=""/>
@@ -51,8 +51,8 @@
             <param name="sep" value="tab" />
             <param name="colnames" value="TRUE"/>
             <param name="comparison_factor_file" value="signature_2columns.tsv" ftype="tabular"/>
-            <param name="factor1" value="HIGH"/>
-            <param name="factor2" value="LOW"/>
+            <param name="factor1" value="LOW"/>
+            <param name="factor2" value="HIGH"/>
             <param name="fdr" value="0.01"/>
             <param name="log" value="true"/>
             <output name="output" file="geneMetadata_log.tsv" ftype="tabular"/>
@@ -62,8 +62,8 @@
             <param name="sep" value="tab" />
             <param name="colnames" value="TRUE"/>
             <param name="comparison_factor_file" value="signature_2columns.tsv" ftype="tabular"/>
-            <param name="factor1" value="HIGH"/>
-            <param name="factor2" value="LOW"/>
+            <param name="factor1" value="LOW"/>
+            <param name="factor2" value="HIGH"/>
             <param name="fdr" value="0.01"/>
             <param name="log" value="false"/>
             <output name="output" file="geneMetadata_nolog.tsv" ftype="tabular"/>
@@ -73,14 +73,23 @@
 
 **What it does**
 
-The tools takes a table of gene expression values (e.E. log2(CPM+1), etc...) from single cell RNAseq sequencing libraries (columns) and a metadata file that contains at least two columns :
+The tools takes a table of gene expression values (e.E. log2(CPM+1), etc...) from single cell RNAseq sequencing libraries (columns)
+
+and a metadata file that contains at least two columns :
     * Cell identifiers
-    * Column that differentiate cell in two groups. It must be a column with only two categories (factor levels).
+    * A Column that differentiates cell in two groups (the two levels of a comparison factor). It must be a column with only values (the factor levels).
 
-For each gene (rows in expression data file), this script perform a Mann-Whitney test between 
+For each gene (rows in expression data file), this script perform a 2-sided Mann-Whitney test between 
 the two groups of cells (high/low, mutant/wild type) and then adjust the returned p-values by using the
-Benjamini-Hochberg (BH) correction. A False Discovery Rate (FDR) threshold is used to determine if gene
-can be considerated as significant (p-adjust below FDR cut-off) or not (p-adjust above the FDR cut-off). 
+Benjamini-Hochberg (BH) correction. A False Discovery Rate (FDR) threshold is used to determine if gene expression
+can be considered as significantly deviant for the H0 hypothesis of no-differential-expression (p-adjust below FDR cut-off) or not (p-adjust above the FDR cut-off). 
+
+.. class:: warningmark
+
+**Comparison plan**
+
+Note that log2Fold-Changes computed by the tool are based on the comparison of condition-2 (level-2) versus condition-1 (level-1) i.e.
+the tools returns the log2FC in condition-2 **relative** to condition-1.
 
 **Output**