Skip to content

Commit

Permalink
[add] nextstrain metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
ibigen committed Oct 25, 2022
1 parent 1b23b8f commit ae7652a
Show file tree
Hide file tree
Showing 25 changed files with 881 additions and 83 deletions.
7 changes: 4 additions & 3 deletions constants/software_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,23 +347,24 @@ class SoftwareNames(object):
SOFTWARE_NEXTSTRAIN = os.path.join(settings.DIR_SOFTWARE, "nextstrain/nextstrain.sh")
SOFTWARE_NEXTSTRAIN_MPX = os.path.join(settings.DIR_SOFTWARE, "nextstrain/nextstrain_mpx.sh")
SOFTWARE_NEXTSTRAIN_BUILDS_BASE = os.path.join(settings.DIR_SOFTWARE, "nextstrain/")

### Several types of builds for NEXTSTRAIN
SOFTWARE_NEXTSTRAIN_BUILDS_generic = "generic"
SOFTWARE_NEXTSTRAIN_BUILDS_ncov = "ncov"
SOFTWARE_NEXTSTRAIN_BUILDS_mpx = "mpx"

SOFTWARE_NEXTSTRAIN_BUILDS_flu_h3n2_12y = "flu_h3n2_12y"
SOFTWARE_NEXTSTRAIN_BUILDS_flu_h1n1pdm_12y = "flu_h1n1pdm_12y"
SOFTWARE_NEXTSTRAIN_BUILDS_flu_vic_12y = "flu_vic_12y"
SOFTWARE_NEXTSTRAIN_BUILDS_flu_yam_12y = "flu_yam_12y"

SOFTWARE_NEXTSTRAIN_BUILDS_flu = [
SOFTWARE_NEXTSTRAIN_BUILDS_flu = [
SOFTWARE_NEXTSTRAIN_BUILDS_flu_h3n2_12y,
SOFTWARE_NEXTSTRAIN_BUILDS_flu_h1n1pdm_12y,
SOFTWARE_NEXTSTRAIN_BUILDS_flu_vic_12y,
SOFTWARE_NEXTSTRAIN_BUILDS_flu_yam_12y
]

SOFTWARE_NEXTSTRAIN_BUILDS_mpx = "mpx"

SOFTWARE_NEXTSTRAIN_BUILDS = [
SOFTWARE_NEXTSTRAIN_BUILDS_generic,
SOFTWARE_NEXTSTRAIN_BUILDS_ncov,
Expand Down
22 changes: 12 additions & 10 deletions datasets/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from constants.constants import Constants
from utils.software import Software
from Bio import SeqIO
from utils.parse_in_files import ParseInFiles
from utils.parse_in_files_nextstrain import ParseNextStrainFiles

class AddReferencesDatasetForm(forms.ModelForm):
"""
Expand Down Expand Up @@ -286,15 +286,15 @@ def __init__(self, *args, **kwargs):
self.helper = FormHelper()
self.helper.form_method = 'POST'
self.helper.layout = Layout(
HTML('<p> </p>'),
HTML('<div class="alert alert-dark"> <span><i class="fa fa-download"></i></span> ' + \
dataset_name.get_global_file_by_dataset_web(Dataset.DATASET_FILE_NAME_RESULT_NEXTSTRAIN_CSV) + \
" Last metadata file 'csv' for '{}' </div>".format(
dataset_name.name)),
# HTML('<p> </p>'),
# HTML('<div class="alert alert-dark"> <span><i class="fa fa-download"></i></span> ' + \
# dataset_name.get_global_file_by_dataset_web(Dataset.DATASET_FILE_NAME_RESULT_NEXTSTRAIN_CSV) + \
# " Last metadata file 'csv' for '{}' dataset</div>".format(
# dataset_name.name)),
HTML('<p> </p>'),
HTML('<div class="alert alert-dark"> <span><i class="fa fa-download"></i></span> ' + \
dataset_name.get_global_file_by_dataset_web(Dataset.DATASET_FILE_NAME_RESULT_NEXTSTRAIN_TSV) + \
" Last metadata file 'tsv' for '{}' </div>".format(
" Last metadata file 'tsv' for '{}' dataset.</div>".format(
dataset_name.name)),
HTML('<p> </p>'),
Div('path_name', css_class="col-lm-3"),
Expand All @@ -310,6 +310,7 @@ def clean(self):
"""
Clean all
"""
software = Software()
cleaned_data = super(DatastesUploadDescriptionMetadataForm, self).clean()

### get path name
Expand All @@ -320,11 +321,12 @@ def clean(self):
temp_file_name.write(path_name.file.read())
temp_file_name.flush()
temp_file_name.close()
software.dos_2_unix(temp_file_name.name)

parse_in_files = ParseInFiles()
parse_in_files = ParseNextStrainFiles()
b_test_char_encoding = True
parse_in_files.parse_sample_files(temp_file_name.name, self.request.user, b_test_char_encoding,\
ParseInFiles.STATE_READ_metadata_only_detect_errors_and_chech_samples)
parse_in_files.parse_nextstrain_files(temp_file_name.name, self.request.user, b_test_char_encoding,\
ParseNextStrainFiles.STATE_READ_metadata_only_detect_errors_and_chech_nexttrain)

os.unlink(temp_file_name.name)
if (parse_in_files.get_errors().has_errors()):
Expand Down
21 changes: 21 additions & 0 deletions datasets/migrations/0012_auto_20221025_1626.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.18 on 2022-10-25 16:26
from __future__ import unicode_literals

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('datasets', '0011_dataset_number_passed_sequences'),
]

operations = [
migrations.AlterField(
model_name='uploadfiles',
name='type_file',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='upload_metadata_files', to='datasets.MetaKey'),
),
]
29 changes: 16 additions & 13 deletions datasets/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.conf import settings
from django.utils.safestring import mark_safe
from django.contrib.auth.models import User
from managing_files.models import ProjectSample, Reference, MetaKey, user_directory_path
from managing_files.models import ProjectSample, Reference, user_directory_path
from fluwebvirus.formatChecker import ContentTypeRestrictedFileField
from constants.constants import Constants, TypePath, FileExtensions
# Create your models here.
Expand Down Expand Up @@ -295,6 +295,17 @@ def get_project_name(self):
if not self.project_sample is None: return self.project_sample.project.name
return ""

class MetaKey(models.Model):
"""
Has meta tags to put values, for example, quality in the files, or samples
"""
name = models.CharField(max_length=200, db_index=True, blank=True, null=True)
def __str__(self):
return self.name

class Meta:
ordering = ['name', ]


class UploadFiles(models.Model):
"""
Expand Down Expand Up @@ -328,6 +339,9 @@ class UploadFiles(models.Model):
dataset = models.ForeignKey(Dataset, related_name='dataset_upload_metadata_file', blank=True, null=True, on_delete=models.CASCADE)
description = models.TextField(default="") ## has a json result.ProcessResults instance with errors or successes

## constants
constants = Constants()

class Meta:
ordering = ['-creation_date']

Expand All @@ -353,20 +367,9 @@ def get_metadata_fasta_web(self):
if (os.path.exists(out_file)):
return mark_safe('<a href="{}" download="{}"> {}</a>'.format(self.get_path_to_file(\
TypePath.MEDIA_URL), os.path.basename(self.get_path_to_file(TypePath.MEDIA_ROOT)),
self.constants.short_name(self.name, Constants.SHORT_NAME_LENGTH)))
self.constants.short_name(self.file_name, Constants.SHORT_NAME_LENGTH)))
return 'File not available.'

class MetaKey(models.Model):
"""
Has meta tags to put values, for example, quality in the files, or samples
"""
name = models.CharField(max_length=200, db_index=True, blank=True, null=True)
def __str__(self):
return self.name

class Meta:
ordering = ['name', ]

class MetaKeyDataset(models.Model):
"""
Relation ManyToMany in
Expand Down
2 changes: 1 addition & 1 deletion datasets/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
url(r'(?P<pk>\d+)/show_dataset_settings$', views.DatasetsSettingsView.as_view(), name='dataset-settings'),

url(r'(?P<pk>\d+)/dataset_update_metadata$', views.UpdateMetadataDataset.as_view(), name='dataset-update-metadata'), ## upload new matadata to replace the exist
url(r'(?P<pk>\d+)/dataset_update_metadata_file$', views.AddSingleMetadataDatasetFile.as_view(), name='dataset-add-single-csv-file-metadata'), ## upload new matadata to replace the exist
url(r'(?P<pk>\d+)/dataset_update_metadata_file$', views.AddSingleMetadataDatasetFile.as_view(), name='dataset-add-single-file-metadata'), ## upload new matadata to replace the exist

url(r'^ajax/remove_dataset$', ajax_views.remove_dataset, name='remove_dataset'), ## remove a dataset
url(r'^ajax/add_dataset_name$', ajax_views.add_dataset_name, name='add_dataset_name'), ## add a dataset
Expand Down
26 changes: 14 additions & 12 deletions datasets/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
from braces.views import LoginRequiredMixin, FormValidMessageMixin
from django.conf import settings
from constants.software_names import SoftwareNames
from managing_files.models import Reference, Project, ProjectSample, MetaKey
from managing_files.models import Reference, Project, ProjectSample
from datasets.forms import ConsensusForm
from django.views.generic import ListView
from utils.utils import Utils
from datasets.models import Dataset, DatasetConsensus, Consensus, UploadFiles
from datasets.models import Dataset, DatasetConsensus, Consensus, UploadFiles, MetaKey
from datasets.tables import DatasetTable, ReferenceTable, ConsensusTable, ProjectTable
from datasets.tables import DatasetConsensusTable, AddDatasetFromCvsFileTableMetadata
from datasets.forms import AddReferencesDatasetForm, AddConsensusDatasetForm, AddProjectsDatasetForm
Expand All @@ -30,7 +30,6 @@
from django.db import transaction
from Bio import SeqIO
from utils.support_django_template import get_link_for_dropdown_item
from utils.process_SGE import ProcessSGE
from settings.models import Software as SoftwareSettings
from settings.constants_settings import ConstantsSettings
from settings.tables import SoftwaresTable
Expand Down Expand Up @@ -789,6 +788,9 @@ def get_context_data(self, **kwargs):
dataset.get_clean_dataset_name(), datetime.datetime.now().strftime(settings.DATE_FORMAT_FOR_SHOW)))

context['different_references'] = dataset.get_number_different_references()
context['number_of_consensus'] = dataset.number_of_sequences_from_consensus
context['number_of_references'] = dataset.number_of_sequences_from_references
context['n_consensus_from_projects'] = dataset.number_of_sequences_from_projects
context['spinner_url'] = os.path.join("/" + Constants.DIR_STATIC, Constants.DIR_ICONS, Constants.AJAX_LOADING_GIF)
context['show_info_main_page'] = ShowInfoMainPage() ## show main information about the institute
return context
Expand Down Expand Up @@ -829,7 +831,8 @@ def get_context_data(self, **kwargs):

tag_search = 'search_datasets'
query_set = UploadFiles.objects.filter(owner__id=self.request.user.id, is_deleted=False,\
type_file__name=TypeFile.TYPE_FILE_dataset_file_metadata).order_by('-creation_date')
type_file__name=TypeFile.TYPE_FILE_dataset_file_metadata, is_valid=True,
dataset=dataset).order_by('-creation_date')
if (self.request.GET.get(tag_search) != None and self.request.GET.get(tag_search)):
query_set = query_set.filter(Q(file_name__icontains=self.request.GET.get(tag_search)) |\
Q(owner__username__icontains=self.request.GET.get(tag_search)))
Expand All @@ -841,14 +844,11 @@ def get_context_data(self, **kwargs):
context['nav_dataset'] = True
context['disable_upload_files'] = disable_upload_files

### test if exists files to process to match with (csv/tsv) file
context['does_not_exists_fastq_files_to_process'] = UploadFiles.objects.filter(owner__id=self.request.user.id, is_deleted=False,\
type_file__name=TypeFile.TYPE_FILE_sample_file_metadata).order_by('-creation_date').count() == 0

### test if can add other csv file
count_not_complete = UploadFiles.objects.filter(owner__id=self.request.user.id, is_deleted=False,\
type_file__name=TypeFile.TYPE_FILE_sample_file_metadata, is_processed=False).count()
if (count_not_complete > 0):
type_file__name=TypeFile.TYPE_FILE_sample_file_metadata, is_processed=False, is_valid=True,
dataset=dataset).count()
if (count_not_complete > 0):
context['can_add_other_file'] = "You cannot add other file because there is a file in pipeline."
context['disable_upload_files'] = True

Expand Down Expand Up @@ -916,8 +916,8 @@ def get_context_data(self, **kwargs):

context['nav_dataset'] = True
context['disable_upload_files'] = disable_upload_files
context['nav_modal'] = True ## short the size of modal window
context['dataset'] = dataset ## dataset in analysis
context['nav_modal'] = True ## short the size of modal window
context['dataset'] = dataset ## dataset in analysis
context['show_info_main_page'] = ShowInfoMainPage() ## show main information about the institute
return context

Expand All @@ -933,6 +933,7 @@ def form_valid(self, form):
pass

utils = Utils()
software = Software()
path_name = form.cleaned_data['path_name']

## create a genbank file
Expand Down Expand Up @@ -968,6 +969,7 @@ def form_valid(self, form):
TypeFile.TYPE_FILE_dataset_file_metadata), upload_files.file_name)
sz_file_to = utils.get_unique_file(sz_file_to) ## get unique file name, user can upload files with same name...
utils.move_file(os.path.join(getattr(settings, "MEDIA_ROOT", None), upload_files.path_name.name), sz_file_to)
software.dos_2_unix(sz_file_to)
upload_files.path_name.name = os.path.join(utils.get_path_upload_file(self.request.user.id,\
TypeFile.TYPE_FILE_dataset_file_metadata), ntpath.basename(sz_file_to))
upload_files.save()
Expand Down
4 changes: 4 additions & 0 deletions managing_files/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,7 @@ def clean(self):
"""
Clean all
"""
software = Software()
cleaned_data = super(SamplesUploadDescriptionForm, self).clean()

### get path name
Expand All @@ -622,6 +623,7 @@ def clean(self):
temp_file_name.write(path_name.file.read())
temp_file_name.flush()
temp_file_name.close()
software.dos_2_unix(temp_file_name.name)

parse_in_files = ParseInFiles()
b_test_char_encoding = True
Expand Down Expand Up @@ -687,6 +689,7 @@ def clean(self):
"""
Clean all
"""
software = Software()
cleaned_data = super(SamplesUploadDescriptionMetadataForm, self).clean()

### get path name
Expand All @@ -697,6 +700,7 @@ def clean(self):
temp_file_name.write(path_name.file.read())
temp_file_name.flush()
temp_file_name.close()
software.dos_2_unix(temp_file_name.name)

parse_in_files = ParseInFiles()
b_test_char_encoding = True
Expand Down
4 changes: 4 additions & 0 deletions managing_files/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ def form_valid(self, form):
pass

utils = Utils()
software = Software()
path_name = form.cleaned_data['path_name']

## create a genbank file
Expand Down Expand Up @@ -620,6 +621,7 @@ def form_valid(self, form):
TypeFile.TYPE_FILE_sample_file), upload_files.file_name)
sz_file_to = utils.get_unique_file(sz_file_to) ## get unique file name, user can upload files with same name...
utils.move_file(os.path.join(getattr(settings, "MEDIA_ROOT", None), upload_files.path_name.name), sz_file_to)
software.dos_2_unix(sz_file_to)
upload_files.path_name.name = os.path.join(utils.get_path_upload_file(self.request.user.id,\
TypeFile.TYPE_FILE_sample_file), ntpath.basename(sz_file_to))
upload_files.save()
Expand Down Expand Up @@ -675,6 +677,7 @@ def form_valid(self, form):
pass

utils = Utils()
software = Software()
path_name = form.cleaned_data['path_name']

## create a genbank file
Expand Down Expand Up @@ -706,6 +709,7 @@ def form_valid(self, form):
TypeFile.TYPE_FILE_sample_file_metadata), upload_files.file_name)
sz_file_to = utils.get_unique_file(sz_file_to) ## get unique file name, user can upload files with same name...
utils.move_file(os.path.join(getattr(settings, "MEDIA_ROOT", None), upload_files.path_name.name), sz_file_to)
software.dos_2_unix(sz_file_to)
upload_files.path_name.name = os.path.join(utils.get_path_upload_file(self.request.user.id,\
TypeFile.TYPE_FILE_sample_file_metadata), ntpath.basename(sz_file_to))
upload_files.save()
Expand Down
10 changes: 10 additions & 0 deletions static/tests/dataset_files/Nextstrain_metadata.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
strain date virus region gisaid_epi_isl genbank_accession length segment sex age host vaccine status week latitude longitude type-subtype putative mixed-infection Lineage (Pangolin) classification Lineage Pangolin Scorpio Pangolin
SARS_CoV_2_Wuhan_Hu_1_MN908947 2019-12-26 ncov Asia EPI_ISL_402125 MN908947 29903 genome ? ? ? ? ? ? ? ? ? ? ? ? ?
to_4 2019-12-26 ncov Asia EPI_ISL_406798 LR757998 29866 genome ? ? ? ? ? ? ? ? ? ? ? ? ?
ERR4082026_covid_minion 2022-06-20 ncov Europe ? ? 29903 genome ? ? ? ? ? ? ? Not assigned No B.1.1 ? ? ?
ERR4082025_covid_minion 2022-06-20 ncov Europe ? ? 29905 genome ? ? ? ? ? ? ? Not assigned No B.1 ? ? ?
ERR4569681 2022-06-20 ncov Europe ? ? 29910 genome ? ? ? ? ? ? ? Not assigned No B.1.1.301 ? ? ?
test_covid_4082026 2022-06-20 ncov Europe ? ? 29903 genome ? ? ? ? ? ? ? ? NA (not applicable) ? NA (not applicable) B.1.1 ?
ERR4082027_covid_minion 2022-02-20 ncov Europe ? ? 2d903 genome ? ? ? ? ? ? ? ? NA (not applicable) ? Not assigned B.1.1 ?
ERR4082026_covid_minion_1 2022-06-20 ncov Europe ? ? 29903 genome ? s ? ? ? ? ? ? NA (not applicable) ? Not assigned B.1.1 ?
ERR4082025_covid_minion_1 2022-06-20 ncov Europe ? ? 29903 genome ? 4 ? ? ? ? ? ? NA (not applicable) ? Not assigned B.1.93 ?
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ xpto,2019-12-26,ncov,Europe,?,?,?,genome,?,?,?,a,b,?
xpto3,2019-12-21,ncov,Europe,?,?,?,genome,?,?,?,a3,b3,?
xpto2,2018-12-21,ncov,Europe,?,?,?,genome,?,?,?,?,a1,b1
xptow2,2015-12-21,ncov,Europe,?,?,?,genome,?,?,?,?,a1,b1
xptow3,2022-10-19,ncov,Europe,?,?,?,genome,?,?,?,?,a1,b1
xptow3,DATE_TO_REPLACE,ncov,Europe,?,?,?,genome,?,?,?,?,a1,b1
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ xpto,2019-12-26,?,Europe,?,a,b,?
xpto3,2019-12-21,?,Europe,?,a3,b3,?
xpto2,2018-12-21,?,Europe,?,?,a1,b1
xptow2,2015-12-21,?,Europe,?,?,a1,b1
xptow3,2022-10-19,?,Europe,?,?,a1,b1
xptow3,DATE_TO_REPLACE,?,Europe,?,?,a1,b1
6 changes: 6 additions & 0 deletions static/tests/dataset_files/nextstrain_metadata_input.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
strain date genbank_accession region host vaccine status week latitude longitude type-subtype putative mixed-infection xpto new_columns
second_stage_2__test_create_tree 2022-10-24 ? Europe ? ? ? ? ? ? ? ? f
EVA001_S66 2022-10-19 ? Europe ? ? ? ? ? xpto, zpto ? xpto xpto g
EVA002_S52 2022-10-19 ? Europe ? ? ? ? ? xpto, zpto ? xpto xpto h
EVA003_S91 2022-10-24 ? Europe ? ? ? ? ? xpto, zpto ? xpto xpto j
EVA011_S54 2022-10-21 ? Europe ? ? ? ? ? xpto, zpto ? xpto xpto
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"strain" "date" "genbank_accession" "region" "host" "vaccine status" "week" "latitude" "longitude" "type-subtype" "putative mixed-infection" "xpto"
"second_stage_2__test_create_tree" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "?" "?" "?"
"EVA001_S66" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto"
"EVA002_S52" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto"
"EVA003_S91" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto"
"EVA011_S54" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto"
"strain" "date" "genbank_accession" "region" "host" "vaccine status" "week" "latitude" "longitude" "type-subtype" "putative mixed-infection" "xpto"
"second_stage_2__test_create_tree" "2022-10-24" "?" "Europe" "?" "?" "?" "?" "?" "?" "?" "?"
"EVA001_S66" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto"
"EVA002_S52" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto"
"EVA003_S91" "2022-10-24" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto"
"EVA011_S54" "2022-10-21" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"strain" "date" "genbank_accession" "region" "host" "vaccine status" "week" "latitude" "longitude" "type-subtype" "putative mixed-infection" "xpto" "new_columns"
"second_stage_2__test_create_tree" "2022-10-24" "?" "Europe" "?" "?" "?" "?" "?" "?" "?" "?" "f"
"EVA001_S66" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto" "g"
"EVA002_S52" "2022-10-19" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto" "h"
"EVA003_S91" "2022-10-24" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto" "j"
"EVA011_S54" "2022-10-21" "?" "Europe" "?" "?" "?" "?" "?" "xpto, zpto" "?" "xpto xpto" "?"
Loading

0 comments on commit ae7652a

Please sign in to comment.