Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ClinVar script to import somatic classifications + patch db schema 115 #1117

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 148 additions & 8 deletions modules/Bio/EnsEMBL/Variation/DBSQL/PhenotypeFeatureAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,41 @@ sub _is_significant_constraint {
}


=head2 _filter_dna_type

Arg [1] : string $constraint
Arg [1] : string $dna_type
Example : $self->_filter_dna_type($constraint, $dna_type)
Description: Internal method to add a constraint on the column "DNA_type".
Phenotype features from ClinVar have DNA_type 'Germline' or 'Somatic'.
For non-human or other human sources DNA_type is NULL.
This method add a constraint to return a specific type of DNA and/or null.
Returntype : string
Exceptions : none
Caller : internal
Status : stable

=cut

sub _filter_dna_type {
my $self = shift;
my $constraint = shift;
my $dna_type = shift;

my $dna_constraint;

if($dna_type eq "Germline") {
$dna_constraint = qq{ (pf.DNA_type='$dna_type' OR pf.DNA_type is NULL) };
}
else {
$dna_constraint = qq{ pf.DNA_type='$dna_type' };
}

$constraint .= (defined($constraint)) ? " AND$dna_constraint" : $dna_constraint;

return $constraint;
}

=head2 _is_class_constraint

Arg [1] : string $constraint
Expand Down Expand Up @@ -153,7 +188,8 @@ sub _fetch_all_by_object {
my $self = shift;
my $object = shift;
my $type = shift;

my $dna_type = shift; # dna_type to include

$type ||= (split '::', ref($object))[-1];
throw("$type is not a valid object type, valid types are: ".(join ", ", sort keys %TYPES)) unless defined $type and defined($TYPES{$type});

Expand All @@ -163,7 +199,12 @@ sub _fetch_all_by_object {
$constraint = $self->_is_significant_constraint($constraint);
# Add the constraint for phenotype class
$constraint = $self->_is_class_constraint($constraint);


# Filter phenotype features by their DNA_type (when searching for germline also includes null DNA_type)
if(defined $dna_type) {
$constraint = $self->_filter_dna_type($constraint, $dna_type);
}

return $self->generic_fetch($constraint);
}

Expand Down Expand Up @@ -400,7 +441,7 @@ sub fetch_all_by_Slice_with_ontology_accession {

Arg [1] : Bio::EnsEMBL::Variation::Variation $var
Example : my @pfs = @{$pfa->fetch_all_by_Variation($var)};
Description: Retrieves all PhenotypeFeatures for a given variation.
Description: Retrieves all PhenotypeFeatures with 'Germline' DNA type for a given variation.
Returntype : reference to list Bio::EnsEMBL::Variation::PhenotypeFeature
Exceptions : throw on bad argument
Caller : general
Expand All @@ -416,7 +457,32 @@ sub fetch_all_by_Variation {
throw('Bio::EnsEMBL::Variation::Variation arg expected');
}

return $self->_fetch_all_by_object($var, 'Variation');
# Only include phenotype features with DNA_type 'Germline' or null
return $self->_fetch_all_by_object($var, 'Variation', 'Germline');
}

=head2 fetch_all_somatic_by_Variation

Arg [1] : Bio::EnsEMBL::Variation::Variation $var
Example : my @pfs = @{$pfa->fetch_all_somatic_by_Variation($var)};
Description: Retrieves all PhenotypeFeatures with 'Somatic' DNA type for a given variation.
Returntype : reference to list Bio::EnsEMBL::Variation::PhenotypeFeature
Exceptions : throw on bad argument
Caller : general
Status : stable

=cut

sub fetch_all_somatic_by_Variation {
my $self = shift;
my $var = shift;

if(!ref($var) || !$var->isa('Bio::EnsEMBL::Variation::Variation')) {
throw('Bio::EnsEMBL::Variation::Variation arg expected');
}

# Only include phenotype features with DNA_type 'Somatic'
return $self->_fetch_all_by_object($var, 'Variation', 'Somatic');
}


Expand Down Expand Up @@ -1183,7 +1249,8 @@ sub get_clinsig_alleles_by_location {
AND pf.seq_region_id = ?
AND pf.seq_region_start >= ?
AND pf.seq_region_end <= ?
AND pf.source_id = ?
AND pf.source_id = ?
AND (DNA_type = 'Germline' OR DNA_type is NULL)
AND EXISTS(select value from phenotype_feature_attrib where phenotype_feature_id = pf.phenotype_feature_id && attrib_type_id = 483)

GROUP BY pf.phenotype_feature_id
Expand Down Expand Up @@ -1215,6 +1282,76 @@ sub get_clinsig_alleles_by_location {
return $hash;
}

sub get_somatic_clin_impact_by_location {
my $self = shift;
my $seq_region_id = shift;
my $seq_region_start = shift;
my $seq_region_end = shift;
my $source_id = shift;
throw("Cannot fetch attributes without seq region information") unless defined($seq_region_id) && defined($seq_region_start) && defined($seq_region_end);

my $extra_sql = $self->_is_significant_constraint();
# Add the constraint for phenotype class
$extra_sql = $self->_is_class_constraint($extra_sql);

my $sth = $self->dbc->prepare(qq{
SELECT
CONCAT(pf.seq_region_id, ':', pf.seq_region_start, '-', pf.seq_region_end),
CONCAT_WS('; ',
CONCAT('id=', pf.object_id), CONCAT('pf_id=', pf.phenotype_feature_id),
GROUP_CONCAT(IF(at.code in ('somatic_clin_sig', 'oncogenic_clin_sig'), at.code, NULL), "=", concat('', pfa.value, '') SEPARATOR '; '),
CONCAT('phenotype=', p.description)
) AS attribute

FROM
phenotype p,
phenotype_feature pf

LEFT JOIN phenotype_feature_attrib pfa
ON pf.phenotype_feature_id = pfa.phenotype_feature_id
LEFT JOIN attrib_type `at`
ON pfa.attrib_type_id = at.attrib_type_id

WHERE pf.phenotype_id = p.phenotype_id
AND pf.seq_region_id = ?
AND pf.seq_region_start >= ?
AND pf.seq_region_end <= ?
AND pf.source_id = ?
AND pf.DNA_type = 'Somatic'

GROUP BY pf.phenotype_feature_id
ORDER BY pf.seq_region_id, pf.seq_region_start, pf.seq_region_end
});

$sth->bind_param(1, $seq_region_id, SQL_VARCHAR);
$sth->bind_param(2, $seq_region_start, SQL_VARCHAR);
$sth->bind_param(3, $seq_region_end, SQL_VARCHAR);
$sth->bind_param(4, $source_id, SQL_VARCHAR);
$sth->execute();

my $pf_id;
my $output_string;
my $hash;
$sth->bind_columns(\$pf_id, \$output_string);

while ($sth->fetch){
$hash->{$pf_id} = [] if !defined($hash->{$pf_id});
my $internal_hash;

# Example: $output_string => "id=rs1555760738; pf_id=266451087; somatic_clin_sig=Tier III - Unknown"
foreach my $id (split/\;/,$output_string){
my ($key, $value) = split /\=/, $id;
$key =~ s/^\s+|\s+$//g;

$internal_hash->{$key} = $value;
}
push(@{$hash->{$pf_id}}, $internal_hash);
}

$sth->finish();

return $hash;
}


# stub method used by web
Expand Down Expand Up @@ -1530,6 +1667,7 @@ sub _obj_from_row {
'_source_id' => $row->{source_id},
'_source_name' => $row->{name},
'is_significant' => $row->{is_significant},
'dna_type' => $row->{dna_type},
}
);

Expand Down Expand Up @@ -1591,8 +1729,9 @@ sub store{
seq_region_id,
seq_region_start,
seq_region_end,
seq_region_strand
) VALUES (?,?,?,?,?,?,?,?,?,?)
seq_region_strand,
dna_type
) VALUES (?,?,?,?,?,?,?,?,?,?,?)
});

$sth->execute(
Expand All @@ -1605,7 +1744,8 @@ sub store{
defined($pf->{slice}) ? $pf->slice()->get_seq_region_id() : undef,
defined($pf->{start}) ? $pf->{start} :undef,
defined($pf->{end}) ? $pf->{end} : undef,
defined($pf->{strand})? $pf->{strand} : undef
defined($pf->{strand})? $pf->{strand} : undef,
defined($pf->{dna_type})? $pf->{dna_type} : undef
);

$sth->finish;
Expand Down
78 changes: 75 additions & 3 deletions modules/Bio/EnsEMBL/Variation/PhenotypeFeature.pm
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ use Exporter;
use vars qw(@EXPORT_OK @ISA);

our @ISA = ('Bio::EnsEMBL::Feature', 'Exporter');
@EXPORT_OK = qw(%TYPES);
@EXPORT_OK = qw(%TYPES %DNA_TYPES);

# define valid object types
# this must correspond to the types defined in the type column of
Expand All @@ -96,6 +96,11 @@ our %TYPES = (
'RegulatoryFeature' => 1,
);

our %DNA_TYPES = (
'Germline' => 1,
'Somatic' => 1
);

=head2 new

Arg [-dbID] :
Expand Down Expand Up @@ -157,12 +162,12 @@ sub new {
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);

my ($dbID,$adaptor,$phenotype_id,$phenotype,$type,$object,$object_id,$source_name,$source_id,$source,$study,$study_id,$is_significant,$attribs, $ontology_accessions) =
my ($dbID,$adaptor,$phenotype_id,$phenotype,$type,$object,$object_id,$source_name,$source_id,$source,$study,$study_id,$is_significant,$dna_type,$attribs, $ontology_accessions) =
rearrange([qw(
dbID ADAPTOR _PHENOTYPE_ID PHENOTYPE
TYPE OBJECT _OBJECT_ID
SOURCE_NAME _SOURCE_ID SOURCE STUDY _STUDY_ID
IS_SIGNIFICANT
IS_SIGNIFICANT DNA_TYPE
ATTRIBS ONTOLOGY_ACCESSIONS
)], @_);

Expand Down Expand Up @@ -207,6 +212,7 @@ sub new {

$self->{type} = $type;
$self->{is_significant} = $is_significant;
$self->{dna_type} = $dna_type || undef;
$self->{attribs} = $attribs || {};
$self->{ontology_accessions} = $ontology_accessions || undef;

Expand Down Expand Up @@ -510,6 +516,30 @@ sub type {
return $self->{'type'};
}

=head2 dna_type

Arg [1] : string $type (optional)
The new value to set the clinical significance type attribute to
Example : $type = $obj->dna_type()
Description: Getter/Setter for the object dna_type of the PhenotypeFeature.
Returntype : string
Exceptions : none
Caller : general
Status : Stable

=cut

sub dna_type {
my $self = shift;
my $type = shift;

if(defined($type)) {
throw("$type is not a valid object type, valid types are: ".(join ", ", sort %DNA_TYPES)) unless defined($DNA_TYPES{$type});
$self->{'dna_type'} = $type;
}

return $self->{'dna_type'};
}

=head2 is_significant

Expand Down Expand Up @@ -938,6 +968,48 @@ sub clinical_significance {
return defined($self->get_all_attributes->{'clinvar_clin_sig'}) ? $self->get_all_attributes->{'clinvar_clin_sig'} : undef;
}

=head2 somatic_classification

Example : $somatic_classification = $obj->somatic_classification()
Description: Getter for the somatic_clin_sig attribute.
Returntype : string
Exceptions : none
Caller : general
Status : Stable

=cut

sub somatic_classification {
my $self = shift;

my $classification = undef;

if(defined $self->get_all_attributes->{'somatic_clin_sig'}) {
$classification = $self->get_all_attributes->{'somatic_clin_sig'};
}

return $classification;
}

=head2 oncogenicity_classification

Example : $oncogenicity = $obj->oncogenicity_classification()
Description: Getter/setter for the oncogenic_clin_sig attribute.
Returntype : string
Exceptions : none
Caller : general
Status : Stable

=cut

sub oncogenicity_classification {
my $self = shift;
my $new = shift;

$self->_set_attribute('oncogenic_clin_sig', $new) if defined($new);

return defined($self->get_all_attributes->{'oncogenic_clin_sig'}) ? $self->get_all_attributes->{'oncogenic_clin_sig'} : undef;
}

=head2 external_id

Expand Down
23 changes: 23 additions & 0 deletions modules/Bio/EnsEMBL/Variation/Utils/Config.pm
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ our @clinvar_clinical_significance_types = (
'established risk allele'
);

our @clinvar_clinical_significance_somatic = (
'Tier I - Strong',
'Tier II - Potential',
'Tier III - Uncertain significance',
dglemos marked this conversation as resolved.
Show resolved Hide resolved
'Tier IV - Benign/likely benign',
'Oncogenic',
'Likely oncogenic',
'Uncertain significance',
'Likely benign ',
'Benign'
);

our @dgva_clinical_significance_types = (
'Not tested',
'Benign',
Expand Down Expand Up @@ -1243,6 +1255,16 @@ our @ATTRIB_TYPES = (
name => 'ClinVar clinical significance',
description => 'The clinical significance of a variant as reported by ClinVar',
},
{
code => 'somatic_clin_sig',
name => 'ClinVar somatic significance',
description => 'The somatic classification of a variant as reported by ClinVar',
},
{
code => 'oncogenic_clin_sig',
name => 'ClinVar somatic classification of oncogenicity',
description => 'The somatic classification of oncogenicity of a variant as reported by ClinVar',
},
{
code => 'prot_func_analysis',
name => 'Protein function analysis ',
Expand Down Expand Up @@ -1363,6 +1385,7 @@ our %ATTRIBS = (
'dbsnp_clin_sig' => \@dbsnp_clinical_significance_types,
'dgva_clin_sig' => \@dgva_clinical_significance_types,
'clinvar_clin_sig' => \@clinvar_clinical_significance_types,
'clinvar_somatic' => \@clinvar_clinical_significance_somatic,
'polyphen_prediction' => ['probably damaging', 'possibly damaging', 'benign', 'unknown'],
'sift_prediction' => ['tolerated', 'deleterious', 'tolerated - low confidence', 'deleterious - low confidence'],
'prot_func_analysis' => [qw(sift polyphen_humvar polyphen_humdiv)],
Expand Down
Loading