Skip to content

Commit

Permalink
Merge pull request #76 from icgc-argo/[email protected]
Browse files Browse the repository at this point in the history
[release]
  • Loading branch information
edsu7 authored Jun 27, 2023
2 parents a910ea9 + 970cf89 commit 2206aa0
Show file tree
Hide file tree
Showing 52 changed files with 144 additions and 76 deletions.
36 changes: 21 additions & 15 deletions argo-data-submission-wf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@
*/

nextflow.enable.dsl = 2
version = '1.0.1'
version = '1.0.2'

// universal params go here, change default value as needed
params.container = ""
params.container_registry = ""
params.container_version = ""
params.cpus = 1
params.mem = 1 // GB
params.force = false
params.publish_dir = "" // set to empty string will disable publishDir

// tool specific parmas go here, add / change as needed
Expand All @@ -40,10 +41,13 @@ params.cleanup = true
// ArgoDataSubmissionWf
params.study_id=""
params.download_mode="local"
params.song_container = ""
params.song_container_version = ""
params.score_container = ""
params.score_container_version = ""
params.song_container = "ghcr.io/overture-stack/song-client"
params.song_container_version = "5.0.2"
params.score_container = "ghcr.io/overture-stack/score"
params.score_container_version = "5.9.0"
params.score_mem = 20
params.score_cpus = 8
params.score_force = false

// sanityChecks
params.song_url=""
Expand Down Expand Up @@ -129,8 +133,12 @@ egaDownload_params = [
upload_params = [
'max_retries': params.max_retries,
'first_retry_wait_time': params.first_retry_wait_time,
'cpus': params.cpus,
'mem': params.mem,
'score_force' : params.score_force,
'score_cpus' : params.score_cpus,
'score_mem' : params.score_mem,
'score_transport_mem' : params.score_mem,
'song_cpus' : params.cpus,
'song_mem' : params.mem,
'song_url': params.song_url,
'song_container': params.song_container,
'song_container_version': params.song_container_version,
Expand All @@ -156,15 +164,15 @@ submissionReceipt_params = [
'mem': params.mem,
]

include { SongScoreUpload as uploadWf } from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].2/main.nf' params(upload_params)
include { validateSeqtools as valSeq} from './wfpr_modules/github.com/icgc-argo/argo-data-submission/[email protected].7/main.nf' params(validateSeq_params)
include { SongScoreUpload as uploadWf } from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3/main.nf' params(upload_params)
include { validateSeqtools as valSeq} from './wfpr_modules/github.com/icgc-argo/argo-data-submission/[email protected].8/main.nf' params(validateSeq_params)
include { EgaDownloadWf as egaWf } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/[email protected]/main.nf' params(egaDownload_params)
include { payloadGenSeqExperiment as pGenExp} from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected].2/main.nf' params(payloadGen_params)
include { payloadGenSeqExperiment as pGenExp} from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected].3/main.nf' params(payloadGen_params)
include { cleanupWorkdir as cleanup } from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected]/main.nf'
include { cram2bam } from './wfpr_modules/github.com/icgc-argo-workflows/dna-seq-processing-tools/[email protected]/main.nf' params(cram2bam_params)
include { getSecondaryFiles } from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected]/main.nf' params([*:params, 'cleanup': false])
include { sanityCheck } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/[email protected].1/main.nf' params(sanityCheck_params)
include { payloadJsonToTsvs } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/[email protected].1/main.nf' params(payloadJsonToTsvs_params)
include { sanityCheck } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/[email protected].3/main.nf' params(sanityCheck_params)
include { payloadJsonToTsvs } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/[email protected].2/main.nf' params(payloadJsonToTsvs_params)
include { submissionReceipt } from './wfpr_modules/github.com/icgc-argo/argo-data-submission/[email protected]/main.nf' params(submissionReceipt_params)
// please update workflow code as needed

Expand Down Expand Up @@ -436,8 +444,6 @@ workflow ArgoDataSubmissionWf {
uploadWf.out.analysis_id,
submissionReceipt.out.receipt
)
//Channel.from(pGenExp.out.payload).view()
//Channel.of(pGenExp.out.payload).view()

emit:
json_file=pGenExp.out.payload
Expand All @@ -461,4 +467,4 @@ workflow {
params.song_url,
params.clinical_url
)
}
}
14 changes: 7 additions & 7 deletions argo-data-submission-wf/pkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "argo-data-submission-wf",
"version": "1.0.1",
"version": "1.0.2",
"description": "ARGO data submission workflow",
"main": "main.nf",
"deprecated": false,
Expand All @@ -12,15 +12,15 @@
"url": "https://github.com/icgc-argo/argo-data-submission.git"
},
"dependencies": [
"github.com/icgc-argo/argo-data-submission/[email protected].7",
"github.com/icgc-argo/argo-data-submission/[email protected].8",
"github.com/icgc-argo/argo-data-submission/[email protected]",
"github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected].2",
"github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected].3",
"github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected]",
"github.com/icgc-argo-workflows/dna-seq-processing-tools/[email protected]",
"github.com/icgc-argo/argo-data-submission/[email protected].1",
"github.com/icgc-argo/argo-data-submission/[email protected].1",
"github.com/icgc-argo/argo-data-submission/[email protected].2",
"github.com/icgc-argo/argo-data-submission/[email protected].3",
"github.com/icgc-argo/argo-data-submission/[email protected]",
"github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].2"
"github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3"
],
"devDependencies": [],
"contributors": [
Expand All @@ -36,4 +36,4 @@
"license": "GNU Affero General Public License v3",
"bugReport": "https://github.com/icgc-argo/argo-data-submission/issues",
"homepage": "https://github.com/icgc-argo/argo-data-submission#readme"
}
}
2 changes: 1 addition & 1 deletion argo-data-submission-wf/tests/checker.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
*/

nextflow.enable.dsl = 2
version = '1.0.1'
version = '1.0.2'

// universal params
params.publish_dir = ""
Expand Down
7 changes: 0 additions & 7 deletions argo-data-submission-wf/tests/file.v2.tsv

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
type submitter_sequencing_experiment_id program_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count
sequencing_experiment SubWf_exp_02_FASTQ_input TEST-QA TEST_SUBMITTER_DONOR_ID_aaupbblgaa TEST_SPECIMEN_DONOR_ID_aaupbblgaa TEST_SAMPLE_DONOR_ID_aaupbblgaa EXT ILLUMINA HiSeq 2000 WGS 2014-12-12 3
sequencing_experiment SubWf_exp_02_FASTQ_input TEST-QA SubWf_test_01 SubWf_test_01_SP_T SubWf_test_01_SA_T SubWf_test_01_SA_N EXT ILLUMINA HiSeq 2000 WXS 2014-12-11 3
2 changes: 2 additions & 0 deletions argo-data-submission-wf/tests/input/experiment-fq.badWXS2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
type submitter_sequencing_experiment_id program_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count target_capture_kit primary_target_regions capture_target_regions
sequencing_experiment SubWf_exp_02_FASTQ_input TEST-QA SubWf_test_01 SubWf_test_01_SP_T SubWf_test_01_SA_T SubWf_tes EXT ILLUMINA HiSeq 2000 WXS 2014-12-12 3 dummy_val dummy_val dummy_val
2 changes: 2 additions & 0 deletions argo-data-submission-wf/tests/input/experiment-fq.goodWXS.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
type submitter_sequencing_experiment_id program_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count target_capture_kit primary_target_regions capture_target_regions
sequencing_experiment SubWf_exp_02_FASTQ_input TEST-QA SubWf_test_01 SubWf_test_01_SP_T SubWf_test_01_SA_T SubWf_test_01_SA_N EXT ILLUMINA HiSeq 2000 WXS 2014-12-12 3 dummy_val dummy_val dummy_val
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"study_id": "TEST-QA",
"experiment_info_tsv": "input/experiment-fq.badWXS.tsv",
"read_group_info_tsv": "input/read_group-fq.v2.tsv",
"file_info_tsv": "input/file-fq.v2.tsv",
"download_mode": "local",
"publish_dir": "outdir"
}
8 changes: 8 additions & 0 deletions argo-data-submission-wf/tests/local-test-job-15-WXS.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"study_id": "TEST-QA",
"experiment_info_tsv": "input/experiment-fq.goodWXS.tsv",
"read_group_info_tsv": "input/read_group-fq.v2.tsv",
"file_info_tsv": "input/file-fq.v2.tsv",
"download_mode": "local",
"publish_dir": "outdir"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"study_id": "TEST-QA",
"experiment_info_tsv": "input/experiment-fq.badWXS2.tsv",
"read_group_info_tsv": "input/read_group-fq.v2.tsv",
"file_info_tsv": "input/file-fq.v2.tsv",
"download_mode": "local",
"publish_dir": "outdir"
}
4 changes: 0 additions & 4 deletions argo-data-submission-wf/tests/read_group.v2.tsv

This file was deleted.

6 changes: 1 addition & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ process {
cpus = 2
memory = 2.GB
}
withName: scoreUpload {
cpus = 4
memory = 10.GB
}
withName: songPublish {
cpus = 2
memory = 2.GB
Expand Down Expand Up @@ -89,4 +85,4 @@ profiles {
params.clinical_url = 'https://clinical.dev.argo.cancercollaboratory.org'
params.schema_url = 'https://submission-song.rdpc-dev.cancercollaboratory.org/schemas/sequencing_experiment'
}
}
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
/* this block is auto-generated based on info from pkg.json where */
/* changes can be made if needed, do NOT modify this block manually */
nextflow.enable.dsl = 2
version = '0.8.2'
version = '0.8.3'

container = [
'ghcr.io': 'ghcr.io/icgc-argo-workflows/data-processing-utility-tools.payload-gen-seq-experiment'
Expand Down
38 changes: 28 additions & 10 deletions .../[email protected]/main.py → .../[email protected]/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@
'submitter_sample_id','sample_type', 'submitter_matched_normal_sample_id', 'sequencing_center',
'platform', 'platform_model','experimental_strategy', 'sequencing_date', 'read_group_count']
TSV_FIELDS['experiment']["conditional"]=[
"library_isolation_protocol","library_preparation_kit",
"library_strandedness","rin","dv200","spike_ins_included",
"spike_ins_fasta","spike_ins_concentration",
"target_capture_kit"]
"library_preparation_kit",
"library_strandedness",
"rin","dv200",
"target_capture_kit","number_of_genes","gene_padding","coverage",
"primary_target_regions","capture_target_regions"
]

TSV_FIELDS['read_group']= {}
TSV_FIELDS['read_group']["core"]=[
Expand Down Expand Up @@ -77,10 +79,10 @@

def empty_str_to_null(metadata):
for k in metadata:
if k in ['read_groups', 'files']:
if k in ['read_groups', 'files','experiment']:
for i in range(len(metadata[k])):
empty_str_to_null(metadata[k][i])
if isinstance(metadata[k], str) and metadata[k] in ["", "_NULL_"]:
if isinstance(metadata[k], str) and metadata[k] in ["", "_NULL_","null","NULL","Null","None","NONE","none"]:
metadata[k] = None


Expand Down Expand Up @@ -146,10 +148,20 @@ def load_all_tsvs(exp_tsv, rg_tsv, file_tsv):
rg['is_paired_end'] = None

for field in ('read_length_r1', 'read_length_r2', 'insert_size'):
if rg[field]:
if isinstance(rg[field],str):
if re.match("^[0-9]+$", rg[field]):
rg[field] = int(rg[field])
continue
for empty_string in ["", "_NULL_",'null',"NULL","Null","None","NONE","none"]:
if rg[field]==empty_string:
rg[field] = None
break
elif isinstance(rg[field],int):
rg[field] = int(rg[field])
else:
elif rg[field] is None:
rg[field] = None
else:
sys.exit("Unrecognnized value '%s' in field %s for '%s'" % (str(rg[field]),field,rg['submitter_read_group_id']))

metadata_dict['read_groups'].append(rg)

Expand Down Expand Up @@ -262,7 +274,7 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,recalculate_size_and
optional_experimental_fields.remove("rin")

for optional_experimental_field in optional_experimental_fields:
if metadata.get(optional_experimental_field):
if optional_experimental_field in metadata.keys():
payload['experiment'][optional_experimental_field]=metadata.get(optional_experimental_field)
# Int
optional_experimental_fields=["rin"]
Expand All @@ -274,6 +286,12 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,recalculate_size_and
if metadata.get('experimental_strategy')=='RNA-Seq' and not metadata.get("library_strandedness"):
sys.exit(f"'experimental_strategy' 'RNA-Seq' specified but 'library_strandedness' is missing. Resubmit with both values 'experimental_strategy' and 'library_strandedness'")

# Targetted Sequencing :
if metadata.get('experimental_strategy')=="Targeted-Seq" or metadata.get('experimental_strategy')=="WXS":
for field in ['target_capture_kit','primary_target_regions','capture_target_regions']:
if field not in metadata.keys():
sys.exit(f"'experimental_strategy' '%s' specified but '%s' is missing. Resubmit with both values 'experimental_strategy' and '%s'" % (metadata.get('experimental_strategy'),field,field))

# get sample of the payload
sample = {
'submitterSampleId': metadata.get('submitter_sample_id'),
Expand Down Expand Up @@ -449,4 +467,4 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,recalculate_size_and
extra_info[row_type][row_id][row_field]=row_val


main(metadata,url,args.bam_from_cram,args.bam_from_cram_reference,args.recalculate_size_and_md5_files,extra_info)
main(metadata,url,args.bam_from_cram,args.bam_from_cram_reference,args.recalculate_size_and_md5_files,extra_info)
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "payload-gen-seq-experiment",
"version": "0.8.2",
"version": "0.8.3",
"description": "SONG payload generation for sequencing experiment",
"main": "main.nf",
"deprecated": false,
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ params.first_retry_wait_time = 1 // in seconds

// required params w/ default
params.container = "ghcr.io/overture-stack/score"
params.container_version = "5.8.1"
params.container_version = "latest"
params.transport_mem = 2 // Transport memory is in number of GBs

// optional if secret mounted from pod else required
params.api_token = "" // song/score API token for download process

params.score_force = false
// required params, no default
// --song_url song url for download process
// --score_url score url for download process
Expand Down Expand Up @@ -52,13 +52,14 @@ process scoreUpload {

script:
accessToken = params.api_token ? params.api_token : "`cat /tmp/rdpc_secret/secret`"
forceFlag = params.score_force ? "--force" : ""
"""
export METADATA_URL=${params.song_url}
export STORAGE_URL=${params.score_url}
export TRANSPORT_PARALLEL=${params.cpus}
export TRANSPORT_MEMORY=${params.transport_mem}
export ACCESSTOKEN=${accessToken}
score-client upload --manifest ${manifest}
score-client upload --manifest ${manifest} ${forceFlag}
"""
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ params.first_retry_wait_time = 1 // in seconds

// required params w/ default
params.container = "ghcr.io/overture-stack/song-client"
params.container_version = "5.0.2"
params.container_version = "latest"

// optional if secret mounted from pod else required
params.api_token = "" // song/score API token for download process
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ params.first_retry_wait_time = 1 // in seconds

// required params w/ default
params.container = "ghcr.io/overture-stack/song-client"
params.container_version = "5.0.2"
params.container_version = "latest"

// optional if secret mounted from pod else required
params.api_token = "" // song/score API token for download process
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ params.first_retry_wait_time = 1 // in seconds

// required params w/ default
params.container = "ghcr.io/overture-stack/song-client"
params.container_version = "5.0.2"
params.container_version = "latest"

// optional if secret mounted from pod else required
params.api_token = "" // song/score API token for download process
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
*/

nextflow.enable.dsl = 2
version = '2.9.2'
version = '2.9.3'

// universal params go here, change default value as needed
params.publish_dir = "" // set to empty string will disable publishDir
Expand All @@ -42,15 +42,16 @@ params.song_mem = 1 // GB
params.song_url = "https://song.rdpc-qa.cancercollaboratory.org"
params.song_api_token = ""
params.song_container = "ghcr.io/overture-stack/song-client"
params.song_container_version = "5.0.2"
params.song_container_version = "latest"

params.score_cpus = 1
params.score_mem = 1 // GB
params.score_transport_mem = 1 // GB
params.score_url = "https://score.rdpc-qa.cancercollaboratory.org"
params.score_api_token = ""
params.score_container = "ghcr.io/overture-stack/score"
params.score_container_version = "5.8.1"
params.score_container_version = "latest"
params.score_force = false

song_params = [
*:params,
Expand All @@ -71,7 +72,8 @@ score_params = [
'score_url': params.score_url,
'score_container': params.score_container,
'score_container_version': params.score_container_version,
'api_token': params.score_api_token ?: params.api_token
'api_token': params.score_api_token ?: params.api_token,
'score_force': params.score_force
]

include { songSubmit as songSub } from './local_modules/song-submit' params(song_params)
Expand Down
File renamed without changes.
Loading

0 comments on commit 2206aa0

Please sign in to comment.