diff --git a/.gitignore b/.gitignore index e4d3694a2c..72da041b24 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ !.idea/runConfigurations *.iml .vscode/* +!.vscode/extensions.json .venv # build output diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000000..647466f3e5 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,3 @@ +{ + "recommendations": ["bluebrown.yamlfmt"] +} diff --git a/.yamlfmt.yaml b/.yamlfmt.yaml new file mode 100644 index 0000000000..54cc132267 --- /dev/null +++ b/.yamlfmt.yaml @@ -0,0 +1,4 @@ +# See https://github.com/google/yamlfmt +formatter: + type: basic + drop_merge_tag: true diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml index 901d14f303..5d5c243acf 100644 --- a/kubernetes/loculus/values.yaml +++ b/kubernetes/loculus/values.yaml @@ -108,10 +108,8 @@ defaultOrganismConfig: &defaultOrganismConfig header: Sample details - name: geo_loc_site ontology_id: GENEPIO:0100436 - definition: The name of a specific geographical location e.g. Credit River (rather - than river). - guidance: Provide the name of the specific geographical site using a specific noun - (a word that names a certain place, thing). + definition: The name of a specific geographical location e.g. Credit River (rather than river). + guidance: Provide the name of the specific geographical site using a specific noun (a word that names a certain place, thing). example: Credit River displayName: Collection site header: Sample details @@ -171,7 +169,7 @@ defaultOrganismConfig: &defaultOrganismConfig customDisplay: type: link url: "https://www.ncbi.nlm.nih.gov/biosample/__value__" - header: "INSDC" + header: "INSDC" noInput: true per_segment: true - name: culture_id @@ -180,13 +178,7 @@ defaultOrganismConfig: &defaultOrganismConfig - name: sample_received_date ontology_id: GENEPIO:0001177 definition: The date on which the sample was received by the laboratory. - guidance: Alternative if "sample_collection_date" is not available. Record the date the sample was received by the laboratory. - Required granularity includes year, month and day. Before sharing this data, ensure this date is not - considered identifiable information. If this date is considered identifiable, - it is acceptable to add "jitter" to the received date by adding or subtracting - calendar days. Do not change the received date in your original records. Alternatively, - collection_date may be used as a substitute in the data you share. - The date should be provided in ISO 8601 standard format "YYYY-MM-DD". + guidance: Alternative if "sample_collection_date" is not available. Record the date the sample was received by the laboratory. Required granularity includes year, month and day. Before sharing this data, ensure this date is not considered identifiable information. If this date is considered identifiable, it is acceptable to add "jitter" to the received date by adding or subtracting calendar days. Do not change the received date in your original records. Alternatively, collection_date may be used as a substitute in the data you share. The date should be provided in ISO 8601 standard format "YYYY-MM-DD". example: '2020-03-20' displayName: Sample Received Date type: date @@ -206,139 +198,92 @@ defaultOrganismConfig: &defaultOrganismConfig header: Sampling - name: presampling_activity ontology_id: GENEPIO:0100433 - definition: The activities or variables introduced upstream of sample collection - that may affect the sample collected. - guidance: If there was presampling activity that would affect the sample prior to - collection (this is different than sample processing which happens after the sample - is collected), provide the experimental activities by selecting one or more values - from the template pick list. If the information is unknown or cannot be provided, - leave blank or provide a null value. + definition: The activities or variables introduced upstream of sample collection that may affect the sample collected. + guidance: If there was presampling activity that would affect the sample prior to collection (this is different than sample processing which happens after the sample is collected), provide the experimental activities by selecting one or more values from the template pick list. If the information is unknown or cannot be provided, leave blank or provide a null value. example: Antimicrobial pre-treatment [GENEPIO:0100537] displayName: Presampling activity header: Sampling - name: anatomical_material ontology_id: GENEPIO:0001211 - definition: A substance obtained from an anatomical part of an organism e.g. tissue, - blood. - guidance: 'Provide a descriptor if an anatomical material was sampled. Use the pick - list provided in the template. If a desired term is missing from the pick list, - use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. - If not applicable, leave blank.' + definition: A substance obtained from an anatomical part of an organism e.g. tissue, blood. + guidance: 'Provide a descriptor if an anatomical material was sampled. Use the pick list provided in the template. If a desired term is missing from the pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. If not applicable, leave blank.' example: Blood [UBERON:0000178] displayName: Anatomical material header: Sampling - name: anatomical_part ontology_id: GENEPIO:0001214 definition: An anatomical part of an organism e.g. oropharynx. - guidance: 'Provide a descriptor if an anatomical part was sampled. Use the pick - list provided in the template. If a desired term is missing from the pick list, - use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. - If not applicable, leave blank.' + guidance: 'Provide a descriptor if an anatomical part was sampled. Use the pick list provided in the template. If a desired term is missing from the pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. If not applicable, leave blank.' example: Nasopharynx (NP) [UBERON:0001728] displayName: Anatomical part header: Sampling - name: body_product ontology_id: GENEPIO:0001216 definition: A substance excreted/secreted from an organism e.g. feces, urine, sweat. - guidance: 'Provide a descriptor if a body product was sampled. Use the pick list - provided in the template. If a desired term is missing from the pick list, use - this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. - If not applicable, leave blank.' + guidance: 'Provide a descriptor if a body product was sampled. Use the pick list provided in the template. If a desired term is missing from the pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/uberon. If not applicable, leave blank.' example: Feces [UBERON:0001988] displayName: Body product header: Sampling - name: environmental_material ontology_id: GENEPIO:0001223 - definition: A substance obtained from the natural or man-made environment e.g. soil, - water, sewage, door handle, bed handrail, face mask. - guidance: 'Provide a descriptor if an environmental material was sampled. Use the - pick list provided in the template. If a desired term is missing from the pick - list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/envo. - If not applicable, leave blank.' + definition: A substance obtained from the natural or man-made environment e.g. soil, water, sewage, door handle, bed handrail, face mask. + guidance: 'Provide a descriptor if an environmental material was sampled. Use the pick list provided in the template. If a desired term is missing from the pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/envo. If not applicable, leave blank.' example: Face mask [OBI:0002787] displayName: Environmental material header: Sampling - name: environmental_site ontology_id: GENEPIO:0001232 - definition: An environmental location may describe a site in the natural or built - environment e.g. hospital, wet market, bat cave. - guidance: 'Provide a descriptor if an environmental site was sampled. Use the pick - list provided in the template. If a desired term is missing from the pick list, - use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/envo. - If not applicable, leave blank.' + definition: An environmental location may describe a site in the natural or built environment e.g. hospital, wet market, bat cave. + guidance: 'Provide a descriptor if an environmental site was sampled. Use the pick list provided in the template. If a desired term is missing from the pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/envo. If not applicable, leave blank.' example: Hospital [ENVO:00002173] displayName: Environmental site header: Sampling - name: collection_device ontology_id: GENEPIO:0001234 definition: The instrument or container used to collect the sample e.g. swab. - guidance: 'Provide a descriptor if a collection device was used for sampling. Use - the pick list provided in the template. If a desired term is missing from the - pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/obi. - If not applicable, leave blank.' + guidance: 'Provide a descriptor if a collection device was used for sampling. Use the pick list provided in the template. If a desired term is missing from the pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/obi. If not applicable, leave blank.' example: Swab [GENEPIO:0100027] displayName: Collection device header: Sampling - name: collection_method ontology_id: GENEPIO:0001241 definition: The process used to collect the sample e.g. phlebotomy, necropsy. - guidance: 'Provide a descriptor if a collection method was used for sampling. Use - the pick list provided in the template. If a desired term is missing from the - pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/obi. - If not applicable, leave blank.' + guidance: 'Provide a descriptor if a collection method was used for sampling. Use the pick list provided in the template. If a desired term is missing from the pick list, use this look-up service to identify a standardized term: https://www.ebi.ac.uk/ols/ontologies/obi. If not applicable, leave blank.' example: Bronchoalveolar lavage (BAL) [GENEPIO:0100032] displayName: Collection method header: Sampling - name: food_product ontology_id: GENEPIO:0100444 definition: A material consumed and digested for nutritional value or enjoyment. - guidance: This field includes animal feed. If applicable, select the standardized - term and ontology ID for the anatomical material from the picklist provided. Multiple - values can be provided, separated by a semi-colon. - example: Feather meal [FOODON:00003927]; Bone meal [ENVO:02000054]; Chicken breast - [FOODON:00002703] + guidance: This field includes animal feed. If applicable, select the standardized term and ontology ID for the anatomical material from the picklist provided. Multiple values can be provided, separated by a semi-colon. + example: Feather meal [FOODON:00003927]; Bone meal [ENVO:02000054]; Chicken breast [FOODON:00002703] displayName: Food Product header: Sampling - name: food_product_properties ontology_id: GENEPIO:0100445 - definition: Any characteristic of the food product pertaining to its state, processing, - a label claim, or implications for consumers. - guidance: Provide any characteristics of the food product including whether it has - been cooked, processed, preserved, any known information about its state (e.g. - raw, ready-to-eat), any known information about its containment (e.g. canned), - and any information about a label claim (e.g. organic, fat-free). + definition: Any characteristic of the food product pertaining to its state, processing, a label claim, or implications for consumers. + guidance: Provide any characteristics of the food product including whether it has been cooked, processed, preserved, any known information about its state (e.g. raw, ready-to-eat), any known information about its containment (e.g. canned), and any information about a label claim (e.g. organic, fat-free). example: Food (chopped) [FOODON:00002777]; Ready-to-eat (RTE) [FOODON:03316636] displayName: Food Product Properties header: Sampling - name: specimen_processing ontology_id: GENEPIO:0100435 - definition: The processing applied to samples post-collection, prior to further - testing, characterization, or isolation procedures. - guidance: Provide the sample processing information by selecting a value from the - template pick list. If the information is unknown or cannot be provided, leave - blank or provide a null value. + definition: The processing applied to samples post-collection, prior to further testing, characterization, or isolation procedures. + guidance: Provide the sample processing information by selecting a value from the template pick list. If the information is unknown or cannot be provided, leave blank or provide a null value. example: Samples pooled [OBI:0600016] displayName: Specimen Processing header: Specimen processing - name: specimen_processing_details ontology_id: GENEPIO:0100311 - definition: Detailed information regarding the processing applied to a sample during - or after receiving the sample. - guidance: Provide a free text description of any processing details applied to a - sample. - example: 25 swabs were pooled and further prepared as a single sample during library - prep. + definition: Detailed information regarding the processing applied to a sample during or after receiving the sample. + guidance: Provide a free text description of any processing details applied to a sample. + example: 25 swabs were pooled and further prepared as a single sample during library prep. displayName: Specimen Processing Details header: Specimen processing - name: experimental_specimen_role_type ontology_id: GENEPIO:0100921 definition: The type of role that the sample represents in the experiment. - guidance: Samples can play different types of roles in experiments. A sample under - study in one experiment may act as a control or be a replicate of another sample - in another experiment. This field is used to distinguish samples under study from - controls, replicates, etc. If the sample acted as an experimental control or - a replicate, select a role type from the picklist. If the sample was not a control, - leave blank or select "Not Applicable". + guidance: Samples can play different types of roles in experiments. A sample under study in one experiment may act as a control or be a replicate of another sample in another experiment. This field is used to distinguish samples under study from controls, replicates, etc. If the sample acted as an experimental control or a replicate, select a role type from the picklist. If the sample was not a control, leave blank or select "Not Applicable". example: Positive experimental control [GENEPIO:0101018] displayName: Experimental Specimen Role Type header: Specimen processing @@ -354,8 +299,7 @@ defaultOrganismConfig: &defaultOrganismConfig - name: host_age_bin ontology_id: GENEPIO:0001394 definition: The age category of the host at the time of sampling. - guidance: Age bins in 10 year intervals have been provided. If a host's age cannot - be specified due to provacy concerns, an age bin can be used as an alternative. + guidance: Age bins in 10 year intervals have been provided. If a host's age cannot be specified due to provacy concerns, an age bin can be used as an alternative. example: 50 - 59 [GENEPIO:0100054] displayName: Host Age Bin header: Host @@ -369,27 +313,20 @@ defaultOrganismConfig: &defaultOrganismConfig - name: host_origin_country ontology_id: GENEPIO:0100438 definition: The country of origin of the host. - guidance: If a sample is from a human or animal host that originated from outside - of Canada, provide the the name of the country where the host originated by selecting - a value from the template pick list. If the information is unknown or cannot be - provided, leave blank or provide a null value. + guidance: If a sample is from a human or animal host that originated from outside of Canada, provide the the name of the country where the host originated by selecting a value from the template pick list. If the information is unknown or cannot be provided, leave blank or provide a null value. example: South Africa [GAZ:00001094] displayName: Host Origin Country header: Host - name: host_disease ontology_id: GENEPIO:0001391 definition: The name of the disease experienced by the host. - guidance: "This field is only required if the Pathogen.cl package was selected. - If the host was sick, provide the name of the disease.The standardized term - can be sourced from this look-up service: https://www.ebi.ac.uk/ols/ontologies/doid - If the disease is not known, put \u201Cmissing\u201D." + guidance: "This field is only required if the Pathogen.cl package was selected. If the host was sick, provide the name of the disease.The standardized term can be sourced from this look-up service: https://www.ebi.ac.uk/ols/ontologies/doid If the disease is not known, put “missing”." example: mastitis, gastroenteritis displayName: Host disease header: Host - name: signs_and_symptoms ontology_id: GENEPIO:0001400 - definition: A perceived change in function or sensation, (loss, disturbance or appearance) - indicative of a disease, reported by a patient. + definition: A perceived change in function or sensation, (loss, disturbance or appearance) indicative of a disease, reported by a patient. guidance: Select all of the symptoms experienced by the host from the pick list. example: Cough [HP:0012735], Fever [HP:0001945], Rigors (fever shakes) [HP:0025145] displayName: Signs and symptoms @@ -411,9 +348,7 @@ defaultOrganismConfig: &defaultOrganismConfig - name: travel_history ontology_id: GENEPIO:0001416 definition: Travel history in last six months. - guidance: Specify the countries (and more granular locations if known) travelled - in the last six months; can include multiple travels. Separate multiple travel - events with a semicolon. Provide as free text. + guidance: Specify the countries (and more granular locations if known) travelled in the last six months; can include multiple travels. Separate multiple travel events with a semicolon. Provide as free text. example: Canada, Vancouver; USA, Seattle; Italy, Milan displayName: Travel History header: Host @@ -427,16 +362,14 @@ defaultOrganismConfig: &defaultOrganismConfig - name: host_role ontology_id: GENEPIO:0001419 definition: The role of the host in relation to the exposure setting. - guidance: Select the host's personal role(s) from the pick list provided in the - template. If the desired term is missing, contact the curation team. + guidance: Select the host's personal role(s) from the pick list provided in the template. If the desired term is missing, contact the curation team. example: Patient [OMRSE:00000030] displayName: Host role header: Host - name: exposure_setting ontology_id: GENEPIO:0001428 definition: The setting leading to exposure. - guidance: Select the host exposure setting(s) from the pick list provided in the - template. If a desired term is missing, contact the curation team. + guidance: Select the host exposure setting(s) from the pick list provided in the template. If a desired term is missing, contact the curation team. example: Healthcare Setting [GENEPIO:0100201] displayName: Exposure setting header: Host @@ -449,23 +382,19 @@ defaultOrganismConfig: &defaultOrganismConfig header: Host - name: previous_infection_disease definition: The name of the disease previously experienced by the host. - guidance: Provide the name(s) of the previous of ongoing disease(s). Multiple diseases - can be separated by a semi-colon. + guidance: Provide the name(s) of the previous of ongoing disease(s). Multiple diseases can be separated by a semi-colon. example: COVID-19 displayName: Previous infection (disease) header: Host - name: previous_infection_organism - definition: The name of the pathogen causing the disease previously experienced - by the host. - guidance: Provide the name(s) of the pathogen(s) causing the previous or ongoing - infections. Multiple pathogen names can be separated using a semi-colon. + definition: The name of the pathogen causing the disease previously experienced by the host. + guidance: Provide the name(s) of the pathogen(s) causing the previous or ongoing infections. Multiple pathogen names can be separated using a semi-colon. example: Sudden Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2) displayName: Previous infection (organism) header: Host - name: host_vaccination_status ontology_id: GENEPIO:0001404 - definition: The vaccination status of the host (fully vaccinated, partially vaccinated, - or not vaccinated). + definition: The vaccination status of the host (fully vaccinated, partially vaccinated, or not vaccinated). guidance: Select the vaccination status of the host from the pick list. example: Fully Vaccinated [GENEPIO:0100100] displayName: Host Vaccination Status @@ -473,11 +402,7 @@ defaultOrganismConfig: &defaultOrganismConfig - name: purpose_of_sequencing ontology_id: GENEPIO:0001445 definition: The reason that the sample was sequenced. - guidance: The reason why a sample was originally collected may differ from the reason - why it was selected for sequencing. The reason a sample was sequenced may provide - information about potential biases in sequencing strategy. Provide the purpose - of sequencing from the picklist in the template. The reason for sample collection - should be indicated in the "purpose of sampling" field. + guidance: The reason why a sample was originally collected may differ from the reason why it was selected for sequencing. The reason a sample was sequenced may provide information about potential biases in sequencing strategy. Provide the purpose of sequencing from the picklist in the template. The reason for sample collection should be indicated in the "purpose of sampling" field. example: Baseline surveillance (random sampling) [GENEPIO:0100005] displayName: Purpose Of Sequencing header: Sequencing @@ -495,10 +420,8 @@ defaultOrganismConfig: &defaultOrganismConfig header: Sequencing - name: amplicon_pcr_primer_scheme ontology_id: GENEPIO:0001456 - definition: The specifications of the primers (primer sequences, binding positions, - fragment size generated etc) used to generate the amplicons to be sequenced. - guidance: Provide the name and version of the primer scheme used to generate the - amplicons for sequencing. + definition: The specifications of the primers (primer sequences, binding positions, fragment size generated etc) used to generate the amplicons to be sequenced. + guidance: Provide the name and version of the primer scheme used to generate the amplicons for sequencing. example: https://github.com/joshquick/artic-ncov2019/blob/master/primer_schemes/nCoV-2019/V3/nCoV-2019.tsv displayName: Amplicon pcr primer scheme header: Sequencing @@ -519,73 +442,49 @@ defaultOrganismConfig: &defaultOrganismConfig - name: sequencing_protocol ontology_id: GENEPIO:0001454 definition: The protocol used to generate the sequence. - guidance: 'Provide a free text description of the methods and materials used to - generate the sequence. Suggested text, fill in information where indicated.: "Viral - sequencing was performed following a tiling amplicon strategy using the primer scheme. Sequencing was performed using a sequencing instrument. - Libraries were prepared using library kit. "' - example: Genomes were generated through amplicon sequencing of 1200 bp amplicons - with Freed schema primers. Libraries were created using Illumina DNA Prep kits, - and sequence data was produced using Miseq Micro v2 (500 cycles) sequencing kits. + guidance: 'Provide a free text description of the methods and materials used to generate the sequence. Suggested text, fill in information where indicated.: "Viral sequencing was performed following a tiling amplicon strategy using the primer scheme. Sequencing was performed using a sequencing instrument. Libraries were prepared using library kit. "' + example: Genomes were generated through amplicon sequencing of 1200 bp amplicons with Freed schema primers. Libraries were created using Illumina DNA Prep kits, and sequence data was produced using Miseq Micro v2 (500 cycles) sequencing kits. displayName: Sequencing protocol header: Sequencing - name: sequencing_assay_type ontology_id: GENEPIO:0100997 - definition: The overarching sequencing methodology that was used to determine the - sequence of a biomaterial. - guidance: 'Example Guidance: Provide the name of the DNA or RNA sequencing technology - used in your study. If unsure refer to the protocol documentation, or provide - a null value.' + definition: The overarching sequencing methodology that was used to determine the sequence of a biomaterial. + guidance: 'Example Guidance: Provide the name of the DNA or RNA sequencing technology used in your study. If unsure refer to the protocol documentation, or provide a null value.' example: whole genome sequencing assay [OBI:0002117] displayName: Sequencing Assay Type header: Sequencing - name: sequenced_by_organization ontology_id: GENEPIO:0100416 - definition: The name of the agency, organization or institution responsible for - sequencing the isolate's genome. - guidance: Provide the name of the agency, organization or institution that performed - the sequencing in full (avoid abbreviations). If the information is unknown or - cannot be provided, leave blank or provide a null value. + definition: The name of the agency, organization or institution responsible for sequencing the isolate's genome. + guidance: Provide the name of the agency, organization or institution that performed the sequencing in full (avoid abbreviations). If the information is unknown or cannot be provided, leave blank or provide a null value. example: Public Health Agency of Canada (PHAC) [GENEPIO:0100551] displayName: Sequenced By header: Sequencing - name: sequenced_by_contact_name ontology_id: GENEPIO:0100471 - definition: The name or title of the contact responsible for follow-up regarding - the sequence. - guidance: Provide the name of an individual or their job title. As personnel turnover - may render the contact's name obsolete, it is more prefereable to provide a job - title for ensuring accuracy of information and institutional memory. If the information - is unknown or cannot be provided, leave blank or provide a null value. + definition: The name or title of the contact responsible for follow-up regarding the sequence. + guidance: Provide the name of an individual or their job title. As personnel turnover may render the contact's name obsolete, it is more prefereable to provide a job title for ensuring accuracy of information and institutional memory. If the information is unknown or cannot be provided, leave blank or provide a null value. example: Enterics Lab Manager displayName: Sequenced By Contact Name header: Sequencing - name: sequenced_by_contact_email ontology_id: GENEPIO:0100422 - definition: The email address of the contact responsible for follow-up regarding - the sequence. - guidance: Provide the email associated with the listed contact. As personnel turnover - may render an individual's email obsolete, it is more prefereable to provide an - address for a position or lab, to ensure accuracy of information and institutional - memory. If the information is unknown or cannot be provided, leave blank or provide - a null value. + definition: The email address of the contact responsible for follow-up regarding the sequence. + guidance: Provide the email associated with the listed contact. As personnel turnover may render an individual's email obsolete, it is more prefereable to provide an address for a position or lab, to ensure accuracy of information and institutional memory. If the information is unknown or cannot be provided, leave blank or provide a null value. example: enterics@lab.ca displayName: Sequenced By Contact Email header: Sequencing - name: raw_sequence_data_processing_method ontology_id: GENEPIO:0001458 - definition: The method used for raw data processing such as removing barcodes, adapter - trimming, filtering etc. - guidance: Provide the name and version numbers of the software used to process the - raw data. + definition: The method used for raw data processing such as removing barcodes, adapter trimming, filtering etc. + guidance: Provide the name and version numbers of the software used to process the raw data. example: Porechop 0.2.3 displayName: Raw sequence data processing method header: Sequencing - name: dehosting_method ontology_id: GENEPIO:0001459 definition: The method used to remove host reads from the pathogen sequence. - guidance: Provide the name and version number of the software used to remove host - reads. + guidance: Provide the name and version number of the software used to remove host reads. example: Nanostripper 1.2.3 displayName: Dehosting method header: Sequencing @@ -612,8 +511,7 @@ defaultOrganismConfig: &defaultOrganismConfig header: Sequencing - name: depth_of_coverage ontology_id: GENEPIO:0001474 - definition: The average number of reads representing a given nucleotide in the reconstructed - sequence. + definition: The average number of reads representing a given nucleotide in the reconstructed sequence. guidance: Provide value as a fold of coverage (as a number). example: 400 displayName: Depth of coverage @@ -629,51 +527,35 @@ defaultOrganismConfig: &defaultOrganismConfig header: Sequencing - name: quality_control_method_name ontology_id: GENEPIO:0100557 - definition: The name of the method used to assess whether a sequence passed a predetermined - quality control threshold. - guidance: Providing the name of the method used for quality control is very important - for interpreting the rest of the QC information. Method names can be provided - as the name of a pipeline or a link to a GitHub repository. Multiple methods should - be listed and separated by a semi-colon. Do not include QC tags in other fields - if no method name is provided. + definition: The name of the method used to assess whether a sequence passed a predetermined quality control threshold. + guidance: Providing the name of the method used for quality control is very important for interpreting the rest of the QC information. Method names can be provided as the name of a pipeline or a link to a GitHub repository. Multiple methods should be listed and separated by a semi-colon. Do not include QC tags in other fields if no method name is provided. example: ncov-tools displayName: Quality control method name header: Sequencing - name: quality_control_method_version ontology_id: GENEPIO:0100558 - definition: The version number of the method used to assess whether a sequence passed - a predetermined quality control threshold. - guidance: Methods updates can make big differences to their outputs. Provide the - version of the method used for quality control. The version can be expressed using - whatever convention the developer implements (e.g. date, semantic versioning). - If multiple methods were used, record the version numbers in the same order as - the method names. Separate the version numbers using a semi-colon. + definition: The version number of the method used to assess whether a sequence passed a predetermined quality control threshold. + guidance: Methods updates can make big differences to their outputs. Provide the version of the method used for quality control. The version can be expressed using whatever convention the developer implements (e.g. date, semantic versioning). If multiple methods were used, record the version numbers in the same order as the method names. Separate the version numbers using a semi-colon. example: "1.2.3" displayName: Quality control method version header: Sequencing - name: quality_control_determination ontology_id: GENEPIO:0100559 definition: The determination of a quality control assessment. - guidance: Select a value from the pick list provided. If a desired value is missing, - submit a new term request to the PHA4GE QC Tag GitHub issuetracker using the New - Term Request form. + guidance: Select a value from the pick list provided. If a desired value is missing, submit a new term request to the PHA4GE QC Tag GitHub issuetracker using the New Term Request form. example: sequence failed quality control displayName: Quality control determination header: Sequencing - name: quality_control_issues ontology_id: GENEPIO:0100560 - definition: The reason contributing to, or causing, a low quality determination - in a quality control assessment. - guidance: Select a value from the pick list provided. If a desired value is missing, - submit a new term request to the PHA4GE QC Tag GitHub issuetracker using the New - Term Request form. + definition: The reason contributing to, or causing, a low quality determination in a quality control assessment. + guidance: Select a value from the pick list provided. If a desired value is missing, submit a new term request to the PHA4GE QC Tag GitHub issuetracker using the New Term Request form. example: low average genome coverage displayName: Quality control issues header: Sequencing - name: quality_control_details ontology_id: GENEPIO:0100561 - definition: The details surrounding a low quality determination in a quality control - assessment. + definition: The details surrounding a low quality determination in a quality control assessment. guidance: Provide notes or details regarding QC results using free text. example: CT value of 39. Low viral load. Low DNA concentration after amplification. displayName: Quality control details @@ -777,60 +659,60 @@ defaultOrganismConfig: &defaultOrganismConfig noInput: true rangeSearch: true preprocessing: - args: { type: int } - inputs: { input: nextclade.totalSubstitutions } + args: {type: int} + inputs: {input: nextclade.totalSubstitutions} - name: total_inserted_nucs type: int header: "Alignment states and QC metrics" noInput: true rangeSearch: true preprocessing: - args: { type: int } - inputs: { input: nextclade.totalInsertions } + args: {type: int} + inputs: {input: nextclade.totalInsertions} - name: total_deleted_nucs type: int header: "Alignment states and QC metrics" noInput: true rangeSearch: true preprocessing: - args: { type: int } - inputs: { input: nextclade.totalDeletions } + args: {type: int} + inputs: {input: nextclade.totalDeletions} - name: total_ambiguous_nucs type: int header: "Alignment states and QC metrics" noInput: true rangeSearch: true preprocessing: - args: { type: int } - inputs: { input: "nextclade.totalNonACGTNs" } + args: {type: int} + inputs: {input: "nextclade.totalNonACGTNs"} - name: total_unknown_nucs type: int header: "Alignment states and QC metrics" noInput: true rangeSearch: true preprocessing: - args: { type: int } - inputs: { input: nextclade.totalMissing } + args: {type: int} + inputs: {input: nextclade.totalMissing} - name: total_frame_shifts type: int rangeSearch: true header: "Alignment states and QC metrics" noInput: true preprocessing: - args: { type: int } - inputs: { input: nextclade.totalFrameShifts } + args: {type: int} + inputs: {input: nextclade.totalFrameShifts} - name: frame_shifts header: "Alignment states and QC metrics" noInput: true preprocessing: - inputs: { input: nextclade.frameShifts } + inputs: {input: nextclade.frameShifts} - name: completeness type: float header: "Alignment states and QC metrics" noInput: true preprocessing: - args: { type: float } - inputs: { input: nextclade.coverage } + args: {type: float} + inputs: {input: nextclade.coverage} website: &website tableColumns: - sample_collection_date @@ -860,7 +742,7 @@ defaultOrganismConfig: &defaultOrganismConfig configFile: &preprocessingConfigFile log_level: DEBUG nextclade_dataset_name: nextstrain/ebola/zaire - genes: [ NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L ] + genes: [NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L] batch_size: 100 ingest: &ingest image: ghcr.io/loculus-project/ingest @@ -893,11 +775,11 @@ defaultOrganisms: ebola-zaire: <<: *defaultOrganismConfig preprocessing: - - <<: *preprocessing - configFile: - <<: *preprocessingConfigFile - nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output - nextclade_dataset_name: nextstrain/ebola/zaire + - <<: *preprocessing + configFile: + <<: *preprocessingConfigFile + nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output + nextclade_dataset_name: nextstrain/ebola/zaire ebola-sudan: <<: *defaultOrganismConfig schema: @@ -910,19 +792,19 @@ defaultOrganisms: header: "Alignment states and QC metrics" noInput: true preprocessing: - args: { type: int } - inputs: { input: nextclade.qc.stopCodons.totalStopCodons } + args: {type: int} + inputs: {input: nextclade.qc.stopCodons.totalStopCodons} - name: stop_codons header: "Alignment states and QC metrics" noInput: true preprocessing: - inputs: { input: nextclade.qc.stopCodons.stopCodons } + inputs: {input: nextclade.qc.stopCodons.stopCodons} preprocessing: - - <<: *preprocessing - configFile: - <<: *preprocessingConfigFile - nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output - nextclade_dataset_name: nextstrain/ebola/sudan + - <<: *preprocessing + configFile: + <<: *preprocessingConfigFile + nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output + nextclade_dataset_name: nextstrain/ebola/sudan ingest: <<: *ingest configFile: @@ -964,14 +846,14 @@ defaultOrganisms: autocomplete: true initiallyVisible: true preprocessing: - inputs: { input: nextclade.clade } + inputs: {input: nextclade.clade} - name: outbreak header: "Clade & Lineage" noInput: true generateIndex: true autocomplete: true preprocessing: - inputs: { input: nextclade.customNodeAttributes.outbreak } + inputs: {input: nextclade.customNodeAttributes.outbreak} - name: lineage header: "Clade & Lineage" noInput: true @@ -979,19 +861,19 @@ defaultOrganisms: autocomplete: true initiallyVisible: true preprocessing: - inputs: { input: nextclade.customNodeAttributes.lineage } + inputs: {input: nextclade.customNodeAttributes.lineage} - name: total_stop_codons type: int header: "Alignment states and QC metrics" noInput: true preprocessing: - args: { type: int } - inputs: { input: nextclade.qc.stopCodons.totalStopCodons } + args: {type: int} + inputs: {input: nextclade.qc.stopCodons.totalStopCodons} - name: stop_codons header: "Alignment states and QC metrics" noInput: true preprocessing: - inputs: { input: nextclade.qc.stopCodons.stopCodons } + inputs: {input: nextclade.qc.stopCodons.stopCodons} website: <<: *website tableColumns: @@ -1000,19 +882,19 @@ defaultOrganisms: - authors - author_affiliations - geo_loc_country - - length + - length - clade - lineage defaultOrderBy: sample_collection_date defaultOrder: descending preprocessing: - - <<: *preprocessing - configFile: - <<: *preprocessingConfigFile - nextclade_dataset_name: nextstrain/mpox/all-clades - batch_size: 5 - genes: - - OPG001 + - <<: *preprocessing + configFile: + <<: *preprocessingConfigFile + nextclade_dataset_name: nextstrain/mpox/all-clades + batch_size: 5 + genes: + - OPG001 ingest: <<: *ingest configFile: @@ -1040,19 +922,19 @@ defaultOrganisms: autocomplete: true initiallyVisible: true preprocessing: - inputs: { input: nextclade.clade } + inputs: {input: nextclade.clade} - name: total_stop_codons type: int header: "Alignment states and QC metrics" noInput: true preprocessing: - args: { type: int } - inputs: { input: nextclade.qc.stopCodons.totalStopCodons } + args: {type: int} + inputs: {input: nextclade.qc.stopCodons.totalStopCodons} - name: stop_codons header: "Alignment states and QC metrics" noInput: true preprocessing: - inputs: { input: nextclade.qc.stopCodons.stopCodons } + inputs: {input: nextclade.qc.stopCodons.stopCodons} website: <<: *website tableColumns: @@ -1067,16 +949,16 @@ defaultOrganisms: defaultOrderBy: sample_collection_date defaultOrder: descending preprocessing: - - <<: *preprocessing - configFile: - <<: *preprocessingConfigFile - nextclade_dataset_name: nextstrain/wnv/all-lineages - nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/wnv/data_output - genes: [ capsid, prM, env, NS1, NS2A, NS2B, NS3, NS4A, 2K, NS4B, NS5 ] + - <<: *preprocessing + configFile: + <<: *preprocessingConfigFile + nextclade_dataset_name: nextstrain/wnv/all-lineages + nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/wnv/data_output + genes: [capsid, prM, env, NS1, NS2A, NS2B, NS3, NS4A, 2K, NS4B, NS5] ingest: <<: *ingest configFile: - taxon_id: 3048448 + taxon_id: 3048448 referenceGenomes: nucleotideSequences: - name: main @@ -1194,7 +1076,7 @@ defaultOrganisms: schema: <<: *schema organismName: "Crimean-Congo Hemorrhagic Fever Virus" - nucleotideSequences: [ L, M, S ] + nucleotideSequences: [L, M, S] image: "https://upload.wikimedia.org/wikipedia/commons/thumb/5/5e/Crimean-Congo_Hemorrhagic_Fever_%28CCHF%29_Virus_%2840689899455%29.jpg/1920px-Crimean-Congo_Hemorrhagic_Fever_%28CCHF%29_Virus_%2840689899455%29.jpg" website: <<: *website @@ -1212,14 +1094,14 @@ defaultOrganisms: defaultOrderBy: sample_collection_date defaultOrder: descending preprocessing: - - <<: *preprocessing - configFile: - <<: *preprocessingConfigFile - log_level: DEBUG - nextclade_dataset_name: nextstrain/cchfv/linked - nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/cornelius-cchfv/data_output - nucleotideSequences: [ L, M, S ] - genes: [ RdRp, GPC, NP ] + - <<: *preprocessing + configFile: + <<: *preprocessingConfigFile + log_level: DEBUG + nextclade_dataset_name: nextstrain/cchfv/linked + nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/cornelius-cchfv/data_output + nucleotideSequences: [L, M, S] + genes: [RdRp, GPC, NP] ingest: <<: *ingest configFile: