diff --git a/.github/workflows/build-test-release.yml b/.github/workflows/build-test-release.yml index e5a5458..b3a7249 100644 --- a/.github/workflows/build-test-release.yml +++ b/.github/workflows/build-test-release.yml @@ -258,4 +258,4 @@ jobs: upload_url: ${{ steps.create_release.outputs.upload_url }} asset_path: ./pkg-release.json asset_name: pkg-release.json - asset_content_type: application/json + asset_content_type: application/json \ No newline at end of file diff --git a/payload-json-to-tsvs/main.nf b/payload-json-to-tsvs/main.nf index 84f1935..a34b31b 100755 --- a/payload-json-to-tsvs/main.nf +++ b/payload-json-to-tsvs/main.nf @@ -24,7 +24,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.1' +version = '0.1.2' container = [ 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.payload-json-to-tsvs' diff --git a/payload-json-to-tsvs/main.py b/payload-json-to-tsvs/main.py index 2ae9c1d..27005d4 100755 --- a/payload-json-to-tsvs/main.py +++ b/payload-json-to-tsvs/main.py @@ -84,9 +84,9 @@ def make_experiment_tsv(metadata): for field in [ 'submitter_sequencing_experiment_id','sequencing_center', 'platform', 'platform_model','experimental_strategy', 'sequencing_date', - "library_isolation_protocol","library_preparation_kit", - "library_strandedness","rin","dv200","spike_ins_included","spike_ins_fasta", - "spike_ins_concentration","target_capture_kit"]: + "library_preparation_kit","library_strandedness","rin","dv200", + "primary_target_regions","capture_target_regions","number_of_genes", + "gene_padding","coverage","target_capture_kit"]: return_metadata[field]=metadata['experiment'].get(field) if metadata['experiment'].get(field) else None return_metadata['program_id']=metadata.get("studyId") diff --git a/payload-json-to-tsvs/pkg.json b/payload-json-to-tsvs/pkg.json index 1dfebc6..f6e2306 100644 --- a/payload-json-to-tsvs/pkg.json +++ b/payload-json-to-tsvs/pkg.json @@ -1,6 +1,6 @@ { "name": "payload-json-to-tsvs", - "version": "0.1.1", + "version": "0.1.2", "description": "Converts JSON payload to 3 TSVs : Experiment,Files,Read groups", "main": "main.nf", "deprecated": false, diff --git a/payload-json-to-tsvs/tests/checker.nf b/payload-json-to-tsvs/tests/checker.nf index 7f1db1f..36900ec 100755 --- a/payload-json-to-tsvs/tests/checker.nf +++ b/payload-json-to-tsvs/tests/checker.nf @@ -29,7 +29,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.1' +version = '0.1.2' container = [ 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.payload-json-to-tsvs' diff --git a/payload-json-to-tsvs/tests/expected/expected_experiment.tsv b/payload-json-to-tsvs/tests/expected/expected_experiment.tsv index 2c7f25d..1ed698f 100644 --- a/payload-json-to-tsvs/tests/expected/expected_experiment.tsv +++ b/payload-json-to-tsvs/tests/expected/expected_experiment.tsv @@ -1,2 +1,2 @@ -type submitter_donor_id gender submitter_specimen_id tumour_normal_designation specimen_type specimen_tissue_source submitter_sample_id sample_type submitter_matched_normal_sample_id submitter_sequencing_experiment_id sequencing_center platform platform_model experimental_strategy sequencing_date library_isolation_protocol library_preparation_kit library_strandedness rin dv200 spike_ins_included spike_ins_fasta spike_ins_concentration target_capture_kit program_id read_group_count -sequencing_experiment TEST_SUBMITTER_DONOR_ID_jpupbblgfa Male TEST_SUBMITTER_SPECIMEN_ID_jpupbblgfa Tumour Primary tumour - adjacent to normal Buccal cell TEST_SUBMITTER_SAMPLE_ID_jpupbblgfaRNA Total RNA EXP-9 QCMG ILLUMINA Illumina HiSeq 2000 RNA-Seq FIRST_READ_ANTISENSE_STRAND TEST-QA 4 +type submitter_donor_id gender submitter_specimen_id tumour_normal_designation specimen_type specimen_tissue_source submitter_sample_id sample_type submitter_matched_normal_sample_id submitter_sequencing_experiment_id sequencing_center platform platform_model experimental_strategy sequencing_date library_preparation_kit library_strandedness rin dv200 primary_target_regions capture_target_regions number_of_genes gene_padding coverage target_capture_kit program_id read_group_count +sequencing_experiment TEST_SUBMITTER_DONOR_ID_jpupbblgfa Male TEST_SUBMITTER_SPECIMEN_ID_jpupbblgfa Tumour Primary tumour - adjacent to normal Buccal cell TEST_SUBMITTER_SAMPLE_ID_jpupbblgfaRNA Total RNA EXP-9 QCMG ILLUMINA Illumina HiSeq 2000 RNA-Seq FIRST_READ_ANTISENSE_STRAND TEST-QA 4 diff --git a/payload-json-to-tsvs/tests/expected/expected_experiment2.tsv b/payload-json-to-tsvs/tests/expected/expected_experiment2.tsv index cb5ee1c..bea8427 100644 --- a/payload-json-to-tsvs/tests/expected/expected_experiment2.tsv +++ b/payload-json-to-tsvs/tests/expected/expected_experiment2.tsv @@ -1,2 +1,2 @@ -type submitter_donor_id gender submitter_specimen_id tumour_normal_designation specimen_type specimen_tissue_source submitter_sample_id sample_type submitter_matched_normal_sample_id submitter_sequencing_experiment_id sequencing_center platform platform_model experimental_strategy sequencing_date library_isolation_protocol library_preparation_kit library_strandedness rin dv200 spike_ins_included spike_ins_fasta spike_ins_concentration target_capture_kit program_id read_group_count -sequencing_experiment TEST_SUBMITTER_DONOR_ID_jpupbblgfa Male TEST_SUBMITTER_SPECIMEN_ID_jpupbblgfa Tumour Primary tumour Other TEST_SUBMITTER_SAMPLE_ID_jpupbblgfa Total RNA EXP-9 QCMG ILLUMINA Illumina HiSeq 2000 RNA-Seq FIRST_READ_ANTISENSE_STRAND TEST-PR 5 +type submitter_donor_id gender submitter_specimen_id tumour_normal_designation specimen_type specimen_tissue_source submitter_sample_id sample_type submitter_matched_normal_sample_id submitter_sequencing_experiment_id sequencing_center platform platform_model experimental_strategy sequencing_date library_preparation_kit library_strandedness rin dv200 primary_target_regions capture_target_regions number_of_genes gene_padding coverage target_capture_kit program_id read_group_count +sequencing_experiment TEST_SUBMITTER_DONOR_ID_jpupbblgfa Male TEST_SUBMITTER_SPECIMEN_ID_jpupbblgfa Tumour Primary tumour Other TEST_SUBMITTER_SAMPLE_ID_jpupbblgfa Total RNA EXP-9 QCMG ILLUMINA Illumina HiSeq 2000 RNA-Seq FIRST_READ_ANTISENSE_STRAND TEST-PR 5 diff --git a/payload-json-to-tsvs/tests/expected/expected_experiment3.tsv b/payload-json-to-tsvs/tests/expected/expected_experiment3.tsv new file mode 100644 index 0000000..0714e27 --- /dev/null +++ b/payload-json-to-tsvs/tests/expected/expected_experiment3.tsv @@ -0,0 +1,2 @@ +type submitter_donor_id gender submitter_specimen_id tumour_normal_designation specimen_type specimen_tissue_source submitter_sample_id sample_type submitter_matched_normal_sample_id submitter_sequencing_experiment_id sequencing_center platform platform_model experimental_strategy sequencing_date library_preparation_kit library_strandedness rin dv200 primary_target_regions capture_target_regions number_of_genes gene_padding coverage target_capture_kit program_id read_group_count +sequencing_experiment HCC1143 Female HCC1143_BAM_INPUT Normal Cell line - derived from normal Blood derived HCC1143_BAM_INPUT Total DNA TEST_EXP EXT ILLUMINA HiSeq 2000 WXS 2014-12-12 DUMMY_VAL DUMMY_VAL DUMMY_VAL TEST-PRO 3 diff --git a/payload-json-to-tsvs/tests/expected/expected_files3.tsv b/payload-json-to-tsvs/tests/expected/expected_files3.tsv new file mode 100644 index 0000000..bd5d38a --- /dev/null +++ b/payload-json-to-tsvs/tests/expected/expected_files3.tsv @@ -0,0 +1,2 @@ +name size md5sum ega_file_id ega_dataset_id ega_experiment_id ega_sample_id ega_study_id ega_run_id ega_policy_id ega_analysis_id ega_submission_id ega_dac_id path type format +test_rg_3.bam 14911 178f97f7b1ca8bfc28fd5586bdd56799 input/test_rg_3.bam file BAM diff --git a/payload-json-to-tsvs/tests/expected/expected_read_groups3.tsv b/payload-json-to-tsvs/tests/expected/expected_read_groups3.tsv new file mode 100644 index 0000000..1eced02 --- /dev/null +++ b/payload-json-to-tsvs/tests/expected/expected_read_groups3.tsv @@ -0,0 +1,4 @@ +submitter_read_group_id read_group_id_in_bam submitter_sequencing_experiment_id platform_unit is_paired_end file_r1 file_r2 read_length_r1 read_length_r2 insert_size sample_barcode library_name type +C0HVY.2 74_8a True test_rg_3.bam test_rg_3.bam 298 Pond-147580 read_group +D0RE2.1 74_8b True test_rg_3.bam test_rg_3.bam 298 Pond-147580 read_group +D0RH0.2 74_8c True test_rg_3.bam test_rg_3.bam 298 Pond-147580 read_group diff --git a/payload-json-to-tsvs/tests/input/b9167a75-83ea-4c43-be30-e87faf3557dd.sequencing_experiment.payload.json b/payload-json-to-tsvs/tests/input/b9167a75-83ea-4c43-be30-e87faf3557dd.sequencing_experiment.payload.json new file mode 100644 index 0000000..9f71d45 --- /dev/null +++ b/payload-json-to-tsvs/tests/input/b9167a75-83ea-4c43-be30-e87faf3557dd.sequencing_experiment.payload.json @@ -0,0 +1,95 @@ +{ + "analysisType": { + "name": "sequencing_experiment" + }, + "studyId": "TEST-PRO", + "experiment": { + "submitter_sequencing_experiment_id": "TEST_EXP", + "sequencing_center": "EXT", + "platform": "ILLUMINA", + "platform_model": "HiSeq 2000", + "experimental_strategy": "WXS", + "sequencing_date": "2014-12-12", + "library_preparation_kit": null, + "library_strandedness": null, + "dv200": null, + "target_capture_kit": "DUMMY_VAL", + "number_of_genes": null, + "gene_padding": null, + "coverage": null, + "primary_target_regions": "DUMMY_VAL", + "capture_target_regions": "DUMMY_VAL" + }, + "read_group_count": 3, + "read_groups": [ + { + "submitter_read_group_id": "C0HVY.2", + "read_group_id_in_bam": null, + "platform_unit": "74_8a", + "is_paired_end": true, + "file_r1": "test_rg_3.bam", + "file_r2": "test_rg_3.bam", + "read_length_r1": null, + "read_length_r2": null, + "insert_size": 298, + "sample_barcode": null, + "library_name": "Pond-147580" + }, + { + "submitter_read_group_id": "D0RE2.1", + "read_group_id_in_bam": null, + "platform_unit": "74_8b", + "is_paired_end": true, + "file_r1": "test_rg_3.bam", + "file_r2": "test_rg_3.bam", + "read_length_r1": null, + "read_length_r2": null, + "insert_size": 298, + "sample_barcode": null, + "library_name": "Pond-147580" + }, + { + "submitter_read_group_id": "D0RH0.2", + "read_group_id_in_bam": null, + "platform_unit": "74_8c", + "is_paired_end": true, + "file_r1": "test_rg_3.bam", + "file_r2": "test_rg_3.bam", + "read_length_r1": null, + "read_length_r2": null, + "insert_size": 298, + "sample_barcode": null, + "library_name": "Pond-147580" + } + ], + "samples": [ + { + "submitterSampleId": "HCC1143_BAM_INPUT", + "matchedNormalSubmitterSampleId": null, + "sampleType": "Total DNA", + "specimen": { + "submitterSpecimenId": "HCC1143_BAM_INPUT", + "tumourNormalDesignation": "Normal", + "specimenTissueSource": "Blood derived", + "specimenType": "Cell line - derived from normal" + }, + "donor": { + "submitterDonorId": "HCC1143", + "gender": "Female" + } + } + ], + "files": [ + { + "fileName": "test_rg_3.bam", + "fileSize": 14911, + "fileMd5sum": "178f97f7b1ca8bfc28fd5586bdd56799", + "fileType": "BAM", + "fileAccess": "controlled", + "dataType": "Submitted Reads", + "info": { + "data_category": "Sequencing Reads" + } + } + ] +} \ No newline at end of file diff --git a/payload-json-to-tsvs/tests/test-job3.json b/payload-json-to-tsvs/tests/test-job3.json new file mode 100644 index 0000000..5245021 --- /dev/null +++ b/payload-json-to-tsvs/tests/test-job3.json @@ -0,0 +1,7 @@ +{ + "json_file":"input/b9167a75-83ea-4c43-be30-e87faf3557dd.sequencing_experiment.payload.json", + "data_directory":"input", + "expected_experiment_tsv":"expected/expected_experiment3.tsv", + "expected_file_tsv":"expected/expected_files3.tsv", + "expected_read_group_tsv":"expected/expected_read_groups3.tsv" +}