-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'dev' into single-read-raw-clean
- Loading branch information
Showing
72 changed files
with
675 additions
and
162 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
name: End-to-end MGS workflow test | ||
|
||
on: [pull_request] | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up JDK 11 | ||
uses: actions/setup-java@v4 | ||
with: | ||
java-version: '11' | ||
distribution: 'adopt' | ||
|
||
- name: Setup Nextflow latest-edge | ||
uses: nf-core/setup-nextflow@v1 | ||
with: | ||
version: "latest-edge" | ||
|
||
- name: Install nf-test | ||
run: | | ||
wget -qO- https://get.nf-test.com | bash | ||
sudo mv nf-test /usr/local/bin/ | ||
- name: Run index workflow | ||
run: nf-test test --tag index --verbose | ||
|
||
- name: Clean docker for more space | ||
run: | | ||
docker kill $(docker ps -q) 2>/dev/null || true | ||
docker rm $(docker ps -a -q) 2>/dev/null || true | ||
docker rmi $(docker images -q) -f 2>/dev/null || true | ||
docker system prune -af --volumes | ||
- name: Clean up nf-test dir | ||
run: sudo rm -rf .nf-test | ||
|
||
- name: Run run workflow | ||
run: nf-test test --tag run --verbose | ||
|
||
- name: Run run_validation workflow | ||
run: nf-test test --tag validation --verbose |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,4 +6,7 @@ test/work | |
test/output | ||
test/.nextflow* | ||
*.Rhistory | ||
pipeline_report.txt | ||
pipeline_report.txt | ||
|
||
.nf-test/ | ||
.nf-test.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import json | ||
import boto3 | ||
import sys | ||
from botocore.exceptions import ClientError | ||
|
||
def load_lifecycle_config(config_path): | ||
try: | ||
with open(config_path, 'r') as f: | ||
return json.load(f) | ||
except json.JSONDecodeError: | ||
print(f"Error: {config_path} contains invalid JSON") | ||
sys.exit(1) | ||
except FileNotFoundError: | ||
print(f"Error: Could not find file {config_path}") | ||
sys.exit(1) | ||
|
||
def print_lifecycle_rules(rules): | ||
if not rules: | ||
print("No lifecycle rules configured") | ||
return | ||
|
||
for rule in rules: | ||
print(f"- {rule['ID']}") | ||
print(f" Status: {rule['Status']}") | ||
if 'Expiration' in rule: | ||
print(f" Expiration: {rule['Expiration'].get('Days', 'N/A')} days") | ||
print() | ||
|
||
def get_current_rules(s3, bucket_name): | ||
try: | ||
response = s3.get_bucket_lifecycle_configuration(Bucket=bucket_name) | ||
return response.get('Rules', []) | ||
except ClientError as e: | ||
if e.response['Error']['Code'] == 'NoSuchLifecycleConfiguration': | ||
return [] | ||
raise | ||
|
||
def apply_lifecycle_rules(bucket_name, lifecycle_config): | ||
s3 = boto3.client('s3') | ||
|
||
try: | ||
# First verify the bucket exists and we have access | ||
s3.head_bucket(Bucket=bucket_name) | ||
|
||
# Show current configuration | ||
print(f"\nCurrent lifecycle rules for bucket {bucket_name}:") | ||
current_rules = get_current_rules(s3, bucket_name) | ||
print_lifecycle_rules(current_rules) | ||
|
||
# Apply the new configuration | ||
s3.put_bucket_lifecycle_configuration( | ||
Bucket=bucket_name, | ||
LifecycleConfiguration=lifecycle_config | ||
) | ||
print(f"\nSuccessfully applied new lifecycle rules to bucket: {bucket_name}") | ||
|
||
# Show the updated configuration | ||
print("\nUpdated lifecycle rules:") | ||
new_rules = get_current_rules(s3, bucket_name) | ||
print_lifecycle_rules(new_rules) | ||
|
||
except ClientError as e: | ||
error_code = e.response.get('Error', {}).get('Code', 'Unknown') | ||
if error_code == '404': | ||
print(f"Error: Bucket {bucket_name} does not exist") | ||
elif error_code == '403': | ||
print(f"Error: Permission denied for bucket {bucket_name}") | ||
else: | ||
print(f"Error applying lifecycle rules: {str(e)}") | ||
sys.exit(1) | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description='Apply S3 lifecycle rules to a bucket') | ||
parser.add_argument('config_file', help='Path to lifecycle configuration JSON file') | ||
parser.add_argument('bucket_name', help='Name of the S3 bucket') | ||
|
||
args = parser.parse_args() | ||
|
||
# Load the configuration | ||
lifecycle_config = load_lifecycle_config(args.config_file) | ||
|
||
# Apply the rules | ||
apply_lifecycle_rules(args.bucket_name, lifecycle_config) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/*********************************************************************** | ||
| CONFIGURATION FILE FOR NAO VIRAL MGS WORKFLOW - REFERENCES & INDEXES | | ||
***********************************************************************/ | ||
|
||
params { | ||
mode = "index" | ||
|
||
// Directories | ||
base_dir = "s3://nao-testing/index-test" // Parent for working and output directories (can be S3) | ||
|
||
// URLs for downloading reference genomes etc | ||
taxonomy_url = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump_archive/taxdmp_2024-06-01.zip" | ||
virus_host_db_url = "https://www.genome.jp/ftp/db/virushostdb/virushostdb.tsv" | ||
|
||
// 21st chromosome | ||
human_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000021.9&rettype=fasta" | ||
|
||
// Look up genome assembly ncbi | ||
genome_urls = [ | ||
cow_ch28: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_037355.1&rettype=fasta", | ||
ecoli: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_002695.2&rettype=fasta" | ||
] | ||
|
||
ssu_url = "https://www.arb-silva.de/fileadmin/silva_databases/release_138.1/Exports/SILVA_138.1_SSURef_NR99_tax_silva.fasta.gz" | ||
lsu_url = "https://www.arb-silva.de/fileadmin/silva_databases/release_138.1/Exports/SILVA_138.1_LSURef_NR99_tax_silva.fasta.gz" | ||
|
||
// Other reference files | ||
host_taxon_db = "${projectDir}/ref/host-taxa.tsv" | ||
contaminants = "${projectDir}/ref/contaminants.fasta.gz" | ||
genome_patterns_exclude = "${projectDir}/ref/hv_patterns_exclude.txt" | ||
|
||
// Kraken viral DB | ||
kraken_db = "https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20240904.tar.gz" | ||
// Smallest possible BLAST DB | ||
blast_db_name = "nt_others" | ||
|
||
// Pull information from GenBank or Ref Seq | ||
ncbi_viral_params = "--section refseq --assembly-level complete" | ||
|
||
// Other input values | ||
virus_taxid = "10239" | ||
viral_taxids_exclude = "2731619 2732413 2732411" // Exclude Caudoviricetes, Malgrantaviricetes, Faserviricetes | ||
host_taxa_screen = "vertebrate human" // Host taxa to screen for when building reference virus DB | ||
|
||
// Initializing run params to avoid warnings | ||
kraken_memory = "" | ||
classify_dedup_subset = "" | ||
} | ||
|
||
includeConfig "${projectDir}/configs/logging.config" | ||
includeConfig "${projectDir}/configs/containers.config" | ||
includeConfig "${projectDir}/configs/resources.config" | ||
includeConfig "${projectDir}/configs/profiles.config" | ||
includeConfig "${projectDir}/configs/output.config" | ||
process.queue = "harmon-queue" // AWS Batch job queue |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.