From ac66263b645cf17d720433940eec0aa041d59e04 Mon Sep 17 00:00:00 2001
From: Paul-Davis <pad@ebi.ac.uk>
Date: Tue, 3 Oct 2017 17:27:15 +0100
Subject: [PATCH 01/11] Added quick check for invalid isoformer annotations
 that slip through sometimes.

---
 scripts/check_predicted_genes.pl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/scripts/check_predicted_genes.pl b/scripts/check_predicted_genes.pl
index 3ed6d5343..bfe45def7 100755
--- a/scripts/check_predicted_genes.pl
+++ b/scripts/check_predicted_genes.pl
@@ -262,6 +262,20 @@
 	$seen{$gg}=1;
       }
 
+      my @isoformer_genes = $db->fetch (-query => "FIND $qclass where \"*iso*\"");
+      foreach my $g (@isoformer_genes) {
+my $gg=$g->name; 
+	if ($ignore{$gg}) {next}
+	if (exists $seen{$gg}) {
+	  $s=' (seen already)';
+	} else {
+	  $s='';
+	}
+	$log->write_to("Error: $qclass $gg should be removed $s\n"); 
+	$seen{$gg}=1;
+      }
+
+
       my @no_Sparent_genes = $db->fetch (-query => "FIND $qclass where !S_parent");
       foreach my $g (@no_Sparent_genes) {
 	my $gg=$g->name; 

From 44209f665a25941e8a792d78ee0910f602fb1d6b Mon Sep 17 00:00:00 2001
From: Kevin Howe <klh@ebi.ac.uk>
Date: Wed, 11 Oct 2017 13:50:19 +0100
Subject: [PATCH 02/11] Ensembl now supports antisense_RNA biotype, so use that

---
 scripts/ENSEMBL/lib/WormBase2Ensembl.pm | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/scripts/ENSEMBL/lib/WormBase2Ensembl.pm b/scripts/ENSEMBL/lib/WormBase2Ensembl.pm
index 0a35d0733..9f50d05e3 100644
--- a/scripts/ENSEMBL/lib/WormBase2Ensembl.pm
+++ b/scripts/ENSEMBL/lib/WormBase2Ensembl.pm
@@ -457,12 +457,6 @@ sub parse_genes_gff3_fh {
           my $bt = "ncRNA";
           $transcript->biotype($bt);
           $gene_biotypes{$bt}++;
-        }  elsif ( $gff_type eq 'antisense_RNA') {
-          # not acknowledged as a biotype by Ensembl; change to default ncRNA
-          $transcript->analysis($nc_ana);
-          my $bt = "antisense";
-          $transcript->biotype($bt);
-          $gene_biotypes{$bt}++;
         } else {
           $transcript->analysis($nc_ana);
           my $bt = ($gff_type =~ /RNA/) ? $gff_type : 'ncRNA';

From 56829a78ab86b624594feeda6956688a66baa3ac Mon Sep 17 00:00:00 2001
From: Paul-Davis <pad@ebi.ac.uk>
Date: Wed, 11 Oct 2017 16:58:01 +0100
Subject: [PATCH 03/11] Now ignores ensembl gene stubs from reporting.

---
 scripts/NAMEDB/geneace_nameDB_comm.pl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/NAMEDB/geneace_nameDB_comm.pl b/scripts/NAMEDB/geneace_nameDB_comm.pl
index 0e39c2e8c..ddd9d0b81 100644
--- a/scripts/NAMEDB/geneace_nameDB_comm.pl
+++ b/scripts/NAMEDB/geneace_nameDB_comm.pl
@@ -100,8 +100,13 @@
 
 # any genes left in the acedb list are absent from the nameserver
 foreach (keys %ace_genes ){
+    if ($_ =~ /ENS/) {
+	print "Skipping ENSEMBL ID:$_\n" if $debug;
+    }
+    else {
 	$log->error("ERROR: $_ absent from nameserver\n");
 	$errorcount++;
+    }
 }
 $log->write_to("INFO: $errorcount errors found\n") if ($log->report_errors > 0);
 $log->write_to("No errors found\n") if ($log->report_errors == 0);

From b0d3cacd5c6fa9a37ae73f24e36dbdbefe5ac1d9 Mon Sep 17 00:00:00 2001
From: Gary Williams <gw3@ebi.ac.uk>
Date: Thu, 12 Oct 2017 09:41:47 +0100
Subject: [PATCH 04/11] Added caltech_Analysis

---
 autoace_config/elegans.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/autoace_config/elegans.config b/autoace_config/elegans.config
index 1f0930f12..f352487a2 100644
--- a/autoace_config/elegans.config
+++ b/autoace_config/elegans.config
@@ -70,6 +70,7 @@ db=citace	file=caltech_CDS.ace				class=CDS	format="Interaction WBInteraction\d{
 db=citace	file=caltech_Cell.ace			class=Cell	format="Reference WBPaper\d{8}"	format="Anatomy_term WBbt:\d{7}"
 db=citace	file=caltech_Cell_group.ace		class=Cell_group	format="Reference WBPaper\d{8}"	format="Anatomy_term WBbt:\d{7}"
 db=citace	file=caltech_Condition.ace				class=Condition
+db=citace	file=caltech_Analysis.ace		class=Analysis	format="Reference WBPaper\d{8}"	format="Conducted_by WBPerson\d{1,5}"
 db=citace	file=caltech_DO_defs.ace		class=DO_term	format="Reference WBPaper\d{8}"
 db=citace	file=caltech_Database.ace		class=Database
 db=citace	file=caltech_Expr_pattern.ace	class=Expr_pattern	format="Gene WBGene\d{8}"	format="Reference WBPaper\d{8}"

From 09178a0b773995ce3b36e4cdd2f449c4cc5cbdcc Mon Sep 17 00:00:00 2001
From: Gary Williams <gw3@ebi.ac.uk>
Date: Thu, 12 Oct 2017 10:07:23 +0100
Subject: [PATCH 05/11] ensure the caltech_Analysis is linked to a Sample

---
 autoace_config/elegans.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoace_config/elegans.config b/autoace_config/elegans.config
index f352487a2..155002dfa 100644
--- a/autoace_config/elegans.config
+++ b/autoace_config/elegans.config
@@ -70,7 +70,7 @@ db=citace	file=caltech_CDS.ace				class=CDS	format="Interaction WBInteraction\d{
 db=citace	file=caltech_Cell.ace			class=Cell	format="Reference WBPaper\d{8}"	format="Anatomy_term WBbt:\d{7}"
 db=citace	file=caltech_Cell_group.ace		class=Cell_group	format="Reference WBPaper\d{8}"	format="Anatomy_term WBbt:\d{7}"
 db=citace	file=caltech_Condition.ace				class=Condition
-db=citace	file=caltech_Analysis.ace		class=Analysis	format="Reference WBPaper\d{8}"	format="Conducted_by WBPerson\d{1,5}"
+db=citace	file=caltech_Analysis.ace		class=Analysis	query=(Sample)	format="Reference WBPaper\d{8}"	format="Conducted_by WBPerson\d{1,5}"
 db=citace	file=caltech_DO_defs.ace		class=DO_term	format="Reference WBPaper\d{8}"
 db=citace	file=caltech_Database.ace		class=Database
 db=citace	file=caltech_Expr_pattern.ace	class=Expr_pattern	format="Gene WBGene\d{8}"	format="Reference WBPaper\d{8}"

From f266ae2f453e64b621c69a76208b1ef8ba0c7152 Mon Sep 17 00:00:00 2001
From: Gary Williams <gw3@ebi.ac.uk>
Date: Fri, 13 Oct 2017 11:41:52 +0100
Subject: [PATCH 06/11] New utility to produce a Feature_data ACE file from a
 GFF file.

---
 scripts/gff2feature_data.pl | 465 ++++++++++++++++++++++++++++++++++++
 1 file changed, 465 insertions(+)
 create mode 100755 scripts/gff2feature_data.pl

diff --git a/scripts/gff2feature_data.pl b/scripts/gff2feature_data.pl
new file mode 100755
index 000000000..fad0c1ea2
--- /dev/null
+++ b/scripts/gff2feature_data.pl
@@ -0,0 +1,465 @@
+#!/software/bin/perl -w
+#
+# Small script to convert GFF regions to Feature_data objects
+#
+# Last updated by: $Author: klh $     
+# Last updated on: $Date: 2012-06-22 08:56:52 $      
+
+use strict;                                      
+use lib $ENV{'CVS_DIR'};
+use Wormbase;
+use Getopt::Long;
+use Carp;
+use Log_files;
+use Storable;
+#use Ace;
+#use Sequence_extract;
+use Coords_converter;
+use Modules::Remap_Sequence_Change;
+
+######################################
+# variables and command-line options # 
+######################################
+
+my ($help, $debug, $test, $verbose, $store, $wormbase);
+my ($input, $output, $species);
+my %features;
+
+GetOptions ("help"       => \$help,
+            "debug=s"    => \$debug,
+	    "test"       => \$test,
+	    "verbose"    => \$verbose,
+	    "store:s"    => \$store,
+	    "input:s"    => \$input, # the GFF file
+	    "output:s"   => \$output, 
+	    "features:s"  => \%features, # feature(s) to select column 3 of the GFF, takes value of method  
+	    "species:s"  => \$species,
+	    );
+# use multiple features definitions e.g. -features Poly-A=RNASeq_polyA -features SL1=RNASeq_SL1 -features SL2=RNASeq_SL2
+
+if ( $store ) {
+  $wormbase = retrieve( $store ) or croak("Can't restore wormbase from $store\n");
+} else {
+  $wormbase = Wormbase->new( -debug   => $debug,
+                             -test    => $test,
+			     -organism => $species,
+			     );
+}
+
+# Display help if required
+&usage("Help") if ($help);
+
+# in test mode?
+if ($test) {
+  print "In test mode\n" if ($verbose);
+
+}
+
+# establish log file.
+my $log = Log_files->make_build_log($wormbase);
+
+
+#################################
+# check input arguments
+#################################
+
+
+$species = $wormbase->full_name;
+
+
+#################################
+
+my $database = $wormbase->autoace;
+my $coords = Coords_converter->invoke($database, 0, $wormbase);
+my $virtual;
+
+# suck the data in
+open (IN, "<$input") || die "Can't open $input\n";
+open (ACE, ">$output") || die "Can't open $output\n";
+
+my (@tiles, @whole_chromosome);
+
+my $sequence = '';
+my $sequence_len = 0;
+my %scores; # a list of the scores for each feature, so that we can find the median scores
+
+while (my $line = <IN>) {
+  
+  if ($line =~ /^#/) {next;}
+
+  my @cols = split /\s+/, $line;
+  my $chrom = $cols[0];
+  my $ft    = $cols[2];
+  my $start = $cols[3];
+  my $end   = $cols[4];
+  my $reads = $cols[5];
+  my $sense = $cols[6];
+  
+  # ignore lines that are not from the feature we want
+  if (!exists $features{$ft}) {next;}
+  my $method = $features{$ft};
+      
+  # assume we are dealing with strand-sensitive data, in which case we
+  # need to be able to distinguish strands by making the smallest
+  # region 2 bases long so that reverse strand is different to forward
+  # strand.
+  if ($start == $end) {
+    if ($sense eq '+') {$end++}
+    if ($sense eq '-') {$start--}
+  }
+  if ($sense eq '-') {
+    ($end, $start) = ($start, $end);
+  }
+    
+  if ($chrom ne $sequence) { # new sequence
+ 
+    write_tiles(\@tiles, \@whole_chromosome, $sequence, $sequence_len); #  write the old data
+
+    $sequence = $chrom;
+    $sequence_len = initialise_tiles($chrom, \@tiles, $coords);
+
+  }
+
+  store_feature_in_tile(\@tiles, \@whole_chromosome, $method, $start, $end, $reads, $ft);
+
+
+}
+
+# write the last sequence
+write_tiles(\@tiles, \@whole_chromosome, $virtual, $sequence, $sequence_len); #  write the old data
+
+# work out the median score and write the Method object
+write_method();
+
+
+close(ACE);
+close(IN);
+
+
+
+$log->mail();
+print "Finished.\n" if ($verbose);
+exit(0);
+
+
+
+
+
+
+##############################################################
+#
+# Subroutines
+#
+##############################################################
+
+
+
+##########################################
+
+sub usage {
+  my $error = shift;
+
+  if ($error eq "Help") {
+    # Normal help menu
+    system ('perldoc',$0);
+    exit (0);
+  }
+}
+
+##########################################
+# find the tile to store the Feature in
+sub store_feature_in_tile {
+  my ($tiles_aref, $whole_chromosome_aref, $method, $start, $end, $reads, $text) = @_;
+  # $text is the $feature from the GFF column 3
+
+  # store the score so that we can get the median score
+  if ($reads =~ /^\d+$/) {
+    push @{$scores{$method}}, $reads;
+  }
+
+  my $found = 0;
+  for( my $tile_idx = 1; $tile_idx <= @{$tiles_aref}; $tile_idx++) {
+    my $tile = $tiles_aref->[$tile_idx-1];
+    if ($start < $end) {
+      if ($start > $tile->{start} && $end <= $tile->{end}) { # find the tile containing this forward Feature
+        push @{$tile->{segs}}, [$method, $start - $tile->{start} + 1, $end - $tile->{start} + 1, $reads, $text];
+        $found = 1;
+      }
+    } else {
+      if ($end > $tile->{start} && $start <= $tile->{end}) { # find the tile containing this reverse Feature
+        push @{$tile->{segs}}, [$method, $start - $tile->{start} + 1, $end - $tile->{start} + 1, $reads, $text];
+        $found = 1;
+      }
+    }
+  }
+  if (!$found) { # it falls between two tiles, so place it on the top-level Sequence
+    push @{$whole_chromosome_aref}, [$method, $start, $end, $reads, $text];
+  }
+}
+
+##########################################
+sub write_tiles {
+  my ($tiles_aref, $whole_chromosome_aref, $sequence, $sequence_len) = @_;
+
+
+  # output the new Sequence lines
+
+  foreach my $method (values %features) {
+    if (!defined $sequence || $sequence eq '') {last}
+    my $virtual = "${sequence}:${method}";
+    
+    my @sequence_out;
+    my @feature_out;
+
+    if (scalar @{$tiles_aref}) {
+      push @sequence_out, "\nSequence : \"${sequence}\"\n";
+
+      for(my $tile_idx = 1; $tile_idx <= @{$tiles_aref}; $tile_idx++) {
+	my $tile = $tiles_aref->[$tile_idx-1];
+	
+	my $vseq = "${virtual}:$tile_idx";
+	
+	if (@{$tile->{segs}}) {
+	  push @sequence_out, "S_Child Feature_data ". $vseq ." ". $tile->{start} ." ". $tile->{end} ."\n";
+	  
+	  push @feature_out, "\nFeature_data : \"$vseq\"\n";
+	  foreach my $seg (@{$tile->{segs}}) {
+	    if ($seg->[0] eq $method) {
+	      push @feature_out, "Feature @$seg\n";
+	    }
+	  }
+	}
+      }
+    }
+    
+    
+    if (scalar @{$whole_chromosome_aref}) {
+      push @sequence_out, "\nSequence : \"${sequence}\"\n";
+      push @sequence_out, "S_Child Feature_data ${virtual} 1 $sequence_len\n";
+      push @feature_out, "\nFeature_data : ${virtual}\n";
+      foreach my $seg (@{$whole_chromosome_aref}) {
+	if ($seg->[0] eq $method) {
+	  push @feature_out,  "Feature @$seg\n";
+	}
+      }
+    }
+    
+    print ACE @sequence_out;
+    print ACE "\n"; # acezip.pl concatenates another line to the last line if this is not blank
+    
+    print ACE @feature_out;
+    print ACE "\n";
+
+  }
+
+  @{$tiles_aref} = ();
+  @{$whole_chromosome_aref} = ();
+}
+##########################################
+sub initialise_tiles {
+  my ($sequence, $tiles_aref, $coords) = @_;
+
+  my $chr_len = $coords->Superlink_length($sequence);
+  if (!defined $chr_len) {$log->log_and_die("Can't find the length of the Sequence $sequence\n")}
+
+  for(my $i=0; $i < $chr_len; $i += 300000) {
+    my $chr_start = $i + 1;
+    my $chr_end = $chr_start + 300000 - 1;
+    $chr_end = $chr_len if $chr_end > $chr_len;
+    push @{$tiles_aref}, {
+                  start => $chr_start, 
+                  end   => $chr_end,
+                  segs  => [],
+    }
+  }
+  return $chr_len;
+}
+##########################################
+# work out the median score and write the Method object
+
+sub write_method {
+
+  foreach my $feature (keys %features) {
+    my $method = $features{$feature};
+    my $median_score = median(@{$scores{$method}});
+    my $score_max = 10 * $median_score;
+    print "$method\t median score: $median_score\n";
+    my $colour = get_colour($method); # convert the method name into a colour value
+
+    print ACE "\n";
+    print ACE "Method : $method\n";
+    print ACE "Remark \"This data was produced by $ENV{USER} with the script gff2feature_data.gff from the data file $input using the feature column '$feature'.\"\n";
+    print ACE "Show_up_strand\n";
+    print ACE "Score_by_width\n";
+    print ACE "Score_bounds 1 $score_max\n";
+    print ACE "Overlap\n";
+    print ACE "Right_priority 1.5\n";
+    print ACE "Colour $colour\n";
+    print ACE "\n";
+
+  }
+
+
+}
+##########################################
+# return the median value of a list of values
+sub median {
+
+    my @vals = sort {$a <=> $b} @_;
+    my $len = @vals;
+    if($len%2) #odd?
+    {
+        return $vals[int($len/2)];
+    }
+    else #even
+    {
+        return ($vals[int($len/2)-1] + $vals[int($len/2)])/2;
+    }
+}
+##########################################
+# return colour from a string that is hased to a value from 1 to 16
+sub get_colour {
+  my ($text) = @_;
+
+  my @colours = qw(
+		    WHITE
+		    BLACK
+		    LIGHTGRAY
+		    DARKGRAY
+		    RED
+		    GREEN
+		    BLUE
+		    YELLOW
+		    CYNA
+		    MAGENTA
+		    LIGHTRED
+		    LIGHTGREEN
+		    LIGHTBLUE
+		    DARKRED
+		    DARKGREEN
+		    DARKBLUE
+		    PALERED
+		    PALEGREEN
+		    PALEBLUE
+		    PALEYELLOW
+		    PALECYAN
+		    PALEMAGENTA
+		    BROWN
+		    ORANGE
+		    PALEORANGE
+		    PURPLE
+		    VIOLET
+		    PALEVIOLET
+		    GRAY
+		    PALEGRAY
+		    CERISE
+		    MIDBLUE
+		);
+
+  my $value = 0;
+  foreach my $letter (split //, $text) {
+    $value += ord $letter
+  }
+  $value %= 32;
+  return $colours[$value];
+}
+##########################################
+
+# Add perl documentation in POD format
+# This should expand on your brief description above and 
+# add details of any options that can be used with the program.  
+# Such documentation can be viewed using the perldoc command.
+
+
+__END__
+
+=pod
+
+=head2 NAME - gff3ace.pl
+
+=head1 USAGE
+
+=over 4
+
+=item gff3ace.pl  [-options]
+
+=back
+
+This script reads in a GFF file of locations and writes out an Feature_data ACE file.
+
+
+
+
+script_template.pl MANDATORY arguments:
+
+=over 4
+
+=item -input input file of gene predictions in GFF3 format
+
+=back
+
+=over 4
+
+=item -output output ACE file of CDS objects
+
+=back
+
+=over 4
+
+=item -feature feature GFF3 field name to fnd and use and the method to use. specify as many times as you wish e.g. -features Poly-A=RNASeq_polyA -features SL1=RNASeq_SL1 -features SL2=RNASeq_SL2
+
+=back
+
+
+
+
+script_template.pl  OPTIONAL arguments:
+
+=over 4
+
+=item -species species_name. By default, this script will write ACE data specifying the species as 'elegans'. This specifies a different species.
+
+=back
+
+=over 4
+
+=item -h, Help
+
+=back
+
+=over 4
+ 
+=item -debug, Debug mode, set this to the username who should receive the emailed log messages. The default is that everyone in the group receives them.
+ 
+=back
+
+=over 4
+
+=item -test, Test mode, run the script, but don't change anything.
+
+=back
+
+=over 4
+    
+=item -verbose, output lots of chatty test messages
+
+=back
+
+
+=head1 REQUIREMENTS
+
+=over 4
+
+=item None at present.
+
+=back
+
+=head1 AUTHOR
+
+=over 4
+
+=item Gary Williams
+
+=back
+
+=cut

From fdbd991597e3c7ffc1753d98350ae23bba26929d Mon Sep 17 00:00:00 2001
From: Paul-Davis <pad@ebi.ac.uk>
Date: Mon, 16 Oct 2017 15:53:03 +0100
Subject: [PATCH 07/11] fixed the previous commit code to actually test for the
 presence of the real file including location.

---
 scripts/confirm_genes.pl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/confirm_genes.pl b/scripts/confirm_genes.pl
index f9ea6848e..034fd4f9a 100755
--- a/scripts/confirm_genes.pl
+++ b/scripts/confirm_genes.pl
@@ -1,3 +1,6 @@
+
+
+
 #!/usr/local/bin/perl5.8.0 -w
 #
 # confirm_genes.pl
@@ -620,7 +623,7 @@ sub create_transcript_file {
 	}else {
 		$prefix = $wormbase->chromosome_prefix."${chrom}_";
 	}
-        my $trinity = -e "${prefix}BLAT_Trinity_BEST.gff" ? "${prefix}BLAT_Trinity_BEST.gff" : '';
+        my $trinity = -e "${gffdir}/${prefix}BLAT_Trinity_BEST.gff" ? "${prefix}BLAT_Trinity_BEST.gff" : '';
 	$wormbase->run_command("cd $gffdir; cat ${prefix}BLAT_EST_BEST.gff ${prefix}BLAT_mRNA_BEST.gff $trinity >  ${prefix}BLAT_TRANSCRIPT_BEST.gff", $log) unless (-e "$gffdir/${prefix}BLAT_TRANSCRIPT_BEST.gff");
 
 	# only elegans has OSTs and RSTs

From 8508eacd1c2bf3edd68cebb0e1e98bc551572568 Mon Sep 17 00:00:00 2001
From: Gary Williams <gw3@ebi.ac.uk>
Date: Mon, 16 Oct 2017 16:16:59 +0100
Subject: [PATCH 08/11] check for yeast and worm in Homol_data, not
 Feature_data

---
 scripts/next_builder_checks.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/next_builder_checks.pl b/scripts/next_builder_checks.pl
index 108bc76f8..5c6183b1b 100755
--- a/scripts/next_builder_checks.pl
+++ b/scripts/next_builder_checks.pl
@@ -159,7 +159,7 @@
     # human, japonica, pristionchus, remanei, slimSwissProt,
     # worm, yeast)
     my @expected = qw(fly brenneri briggsae human japonica pristionchus remanei slimSwissProt worm yeast);
-    &check_for_missing_data2(\@hd, \@expected, 'Feature_data', 'what is expected');
+    &check_for_missing_data2(\@hd, \@expected, 'Homol_data', 'what is expected');
 
 #    if($count < 11) {
 #      $log->error("\tERROR: $clone has wublastx Homol_data objects missing\n");

From e90108d772f7c07dcd3314568a6488bf04e5e4d3 Mon Sep 17 00:00:00 2001
From: Paul-Davis <pad@ebi.ac.uk>
Date: Mon, 16 Oct 2017 16:36:42 +0100
Subject: [PATCH 09/11] Removed some of the sub_classes that dont exist for non
 elegans/briggsae species

---
 scripts/check_class.pl | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/scripts/check_class.pl b/scripts/check_class.pl
index 2508a3a3b..730705784 100755
--- a/scripts/check_class.pl
+++ b/scripts/check_class.pl
@@ -601,14 +601,10 @@ sub set_classes {
     @classes = (
 		"Sequence",
 		"CDS", 
-		"Transposon",
 		"Transcript",
 		"Pseudogene",
 		"Transposon_CDS",
 		"cDNA_sequence",
-		"${species}_CDS",
-		"${species}_pseudogenes",
-		"${species}_RNA_genes",
 		"Class",
 		"Model",
 		"Method",
@@ -628,8 +624,6 @@ sub set_classes {
 		"Peptide",
 		"Protein",
 		"Species",
-		"Transposon_family",
-		"Comment",
 		"Database",
 		"Display",
 		"DNA",

From 3dbff487ec186af125467e6bc4d31bd1ee95bff1 Mon Sep 17 00:00:00 2001
From: Paul-Davis <pad@ebi.ac.uk>
Date: Tue, 17 Oct 2017 12:41:21 +0100
Subject: [PATCH 10/11] Updated the Allele/Engineered_allele xace query.

---
 scripts/data_checks.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/data_checks.pl b/scripts/data_checks.pl
index bffc13b53..975c4eb6a 100644
--- a/scripts/data_checks.pl
+++ b/scripts/data_checks.pl
@@ -258,7 +258,7 @@ sub read_GFF_queries {
     $i++;
     $queries[$i]{'DESC'}  = "Allele";
     $queries[$i]{'GFF'}   = "\tAllele\tsequence_alteration";
-    $queries[$i]{'QUERY'} = 'find Variation flanking_sequences AND method = "Allele"';
+    $queries[$i]{'QUERY'} = 'find Variation Flanking_sequences AND (method = "Allele" OR method = "Engineered_allele")';
 
     $i++;
     $queries[$i]{'DESC'}  = "Transposon_insertion";

From a79a2afadc758d905c85c94f01b4b7edafdced70 Mon Sep 17 00:00:00 2001
From: Paul-Davis <pad@ebi.ac.uk>
Date: Fri, 20 Oct 2017 14:18:21 +0100
Subject: [PATCH 11/11] moved the tablemaker query inside the script as the
 previous path failed to resolve for all users/envs.

---
 scripts/NAMEDB/variation_server_compare.pl | 51 +++++++++++++++++++++-
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/scripts/NAMEDB/variation_server_compare.pl b/scripts/NAMEDB/variation_server_compare.pl
index 9196d053d..64278f2f0 100644
--- a/scripts/NAMEDB/variation_server_compare.pl
+++ b/scripts/NAMEDB/variation_server_compare.pl
@@ -36,9 +36,13 @@
 my $acedb = ($database or $wormbase->database('geneace'));
 $log->write_to("Checking $acedb for errors\n");
 
-my $def = "$ENV{CVS_DIR}/../wquery/geneace/variation_nameserver_comm.def";
+#my $def = "$ENV{CVS_DIR}/../wquery/geneace/variation_nameserver_comm.def";
+my $tmdef = &get_table_maker_def();
+my $command = "Table-maker -p $tmdef\nquit\n";
+$log->write_to("\nRetrieving Variation data, using Table-maker and query ${tmdef}...\n");
 
-my $TABLE = $wormbase->table_maker_query($acedb, $def);
+
+my $TABLE = $wormbase->table_maker_query($acedb, $tmdef);
 my %ace_ids;
 while(<$TABLE>) {
   next unless /WBVar/;
@@ -127,3 +131,46 @@
 $log->write_to("Work Done!\n");
 $log->mail();
 exit(0);
+
+
+sub get_table_maker_def {
+
+  my $def = '/tmp/nsvar.def';
+  open TMP,">$def" or $log->log_and_die("cant write $def: $!\n");
+  my $txt = <<END;
+
+Sortcolumn 1
+
+Colonne 1 
+Width 12 
+Optional 
+Visible 
+Class 
+Class Variation 
+From 1 
+ 
+Colonne 2 
+Width 12 
+Mandatory
+Visible 
+Next_Tag 
+From 1 
+Tag Status
+ 
+Colonne 3 
+Width 12 
+Optional
+Visible 
+Class 
+Class Variation_name 
+From 1 
+Tag Public_name  
+
+// End of these definitions
+END
+
+  print TMP $txt;
+  close TMP;
+
+  return $def;
+}