Skip to content

Commit

Permalink
Merge changes from 'develop' reaching version 0.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
iimog committed Jul 16, 2015
2 parents ad64439 + f27d566 commit e9e70f9
Show file tree
Hide file tree
Showing 85 changed files with 26,226 additions and 867 deletions.
11 changes: 11 additions & 0 deletions README.org
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,14 @@ hesitate to contact us. Either report [[https://github.com/BioInf-Wuerzburg/AliT
#+LaTeX_HEADER: \setlength{\parindent}{0pt}
#+LaTeX_HEADER: \setlength{\parskip}{1.5ex}
#+LATEX_HEADER: \renewcommand{\familydefault}{\sfdefault}
** Changelog
*** 0.2.0
- allow for input as tsv and bed files as alternative to fasta files
- demo data added - seven chloroplast genomes
- documentation added
- test cases added
- added interactive JavaScript output
- renamed to Alignment Toolbox and Visualization (AliTV)
*** 0.1.0
- First release of the wgaPipeline code.
- Automated whole genome alignment and circos visualization from two fasta files.
75 changes: 55 additions & 20 deletions bin/generateJSONfiles.pl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use Getopt::Long;
use Pod::Usage;
use Log::Log4perl qw(:no_extra_logdie_message);
use POSIX; # for rounding
use Math::Round;
use JSON;
use File::Copy qw(cp);
use File::Path qw(make_path);
Expand Down Expand Up @@ -125,9 +125,18 @@ =head1 CODE

my $L = Log::Log4perl::get_logger();

my %filters = ('karyo' => {'chromosomes' => {}, 'order' => [], 'genome_order' => []},
'links' => {'minLinkIdentity' => 70, 'maxLinkIdentity' => 100, 'minLinkLength' => 0, 'maxLinkLength' => 1000000},
'onlyShowAdjacentLinks' => JSON::true,
'showAllChromosomes' => JSON::false,
'skipChromosomesWithoutVisibleLinks' => JSON::false,
);

my %conf = ('graphicalParameters' => {'tickDistance' => 100, 'karyoDistance' => 10});

create_dir_structure();

my ($karyo, $karyo_filters) = parse_karyo($opt_karyo);
my ($karyo) = parse_karyo($opt_karyo);

my $features = {};
my $links = [];
Expand All @@ -141,7 +150,10 @@ =head1 CODE
print OUT encode_json {'data' => {'karyo' => $karyo, 'features' => {'link' => $features}, 'links' => $links}};
close OUT or die "$!";
open(OUT, '>', "$opt_prefix.d3/data/filters.json") or $L->logdie("Can not open file $opt_prefix.d3/data/filters.json\n$!");
print OUT encode_json {'filters' => $karyo_filters};
print OUT encode_json {'filters' => \%filters};
close OUT or die "$!";
open(OUT, '>', "$opt_prefix.d3/data/conf.json") or $L->logdie("Can not open file $opt_prefix.d3/data/conf.json\n$!");
print OUT encode_json {'conf' => \%conf};
close OUT or die "$!";

=head2 create_dir_structure
Expand Down Expand Up @@ -225,27 +237,22 @@ =head2 parse_karyo
sub parse_karyo{
my $file = $_[0];
my %karyo = ('chromosomes' => {});
my %filters = ('karyo' => {'chromosomes' => {}, 'order' => [], 'genome_order' => []},
'links' => {'minLinkIdentity' => 70, 'maxLinkIdentity' => 100, 'minLinkLength' => 0, 'maxLinkLength' => 1000000},
'onlyShowAdjacentLinks' => JSON::true,
'showAllChromosomes' => JSON::false,
'skipChromosomesWithoutLinks' => JSON::false,
'skipChromosomesWithoutVisibleLinks' => JSON::false,
);
my %genome_ids = ();
my %genome_info = ();
open(IN, '<', $file) or $L->logdie("Can not open file $file\n$!");
while(<IN>){
chomp;
my($id, $gid, $len, $seq) = split(/\t/);
$karyo{'chromosomes'}{$id} = {"genome_id" => $gid, "length" => $len+0, "seq" => $seq};
$filters{'karyo'}{'chromosomes'}{$id} = {'reverse' => JSON::false, 'visible' => JSON::true};
push(@{$filters{'karyo'}{'order'}}, $id);
$genome_ids{$gid} = 1;
$genome_info{$gid} = {'length' => 0, 'elements' => 0} unless(exists $genome_info{$gid});
$genome_info{$gid}{'length'} += $len;
$genome_info{$gid}{'elements'}++;
}
$filters{'karyo'}{'genome_order'} = [sort map {$_} keys %genome_ids];
$filters{'karyo'}{'genome_order'} = [sort map {$_} keys %genome_info];
optimize_filters_and_conf(\%genome_info);
close IN or $L->logdie("Can not close file $file\n$!");
return (\%karyo, \%filters);
print Dumper(\%karyo);
return (\%karyo);
}

=head2 parse_bed
Expand All @@ -270,6 +277,27 @@ sub parse_bed{
return \%features;
}

=head2 optimize_filters_and_conf
Sub to optimize values in filters and conf objects. Input: hash in the form {genome_id1 => {length => 1000, elements => 5}}
Directly manipulates %conf (tickDistance, karyoDistance) and %filters (minLinkLength, maxLinkLength)
=cut

sub optimize_filters_and_conf{
my %genome_info = %{$_[0]};
my $maxTotalSize = $genome_info{(sort {$genome_info{$b}{'length'} <=> $genome_info{$a}{'length'}} keys %genome_info)[0]}{'length'};
my $maxNumElements = $genome_info{(sort {$genome_info{$b}{'elements'} <=> $genome_info{$a}{'elements'}} keys %genome_info)[0]}{'elements'};
# Set total karyoDistance to 5% of maximum genomeLength
$conf{'graphicalParameters'}{'karyoDistance'} = round(($maxTotalSize * 0.05) / ($maxNumElements - 1));
# Set tickDistance to nearest multiple of a power of 10 of 1% of maximum genomeLength
$conf{'graphicalParameters'}{'tickDistance'} = nearest(10**(int(log($maxTotalSize*0.01)/log(10))), $maxTotalSize * 0.01);
# Set minLinkLength to 0.05% of maximum genomeLength
$filters{'links'}{'minLinkLength'} = round($maxTotalSize * 0.0005);
# Set maxLinkLength to maximum genomeLength + 1 (so even the largest possible links are drawn)
$filters{'links'}{'maxLinkLength'} = round($maxTotalSize + 1);
}


=head2 parse_link
Expand All @@ -281,7 +309,8 @@ =head2 parse_link
sub parse_links{
my $file = $_[0];
my $features = $_[1];
my @links = ();
my %links = ();
my $linkcounter=0;
open(IN, '<', $file) or $L->logdie("Can not open file $file\n$!");
my $line = <IN>;
my @header = ('fida', 'type', 'fidb');
Expand All @@ -298,13 +327,17 @@ sub parse_links{
$L->logdie("Link header is present but does not contain fida column (fida and fidb column are mandatory)") unless($fidapresent);
$L->logdie("Link header is present but does not contain fidb column (fida and fidb column are mandatory)") unless($fidbpresent);
} else {
push(@links, parse_link_line($line, \@header, $features));
my($genome0, $genome1, $linkobject) = parse_link_line($line, \@header, $features);
$links{$genome0}{$genome1}{$linkcounter} = $linkobject;
$linkcounter++;
}
while(<IN>){
push(@links, parse_link_line($_, \@header, $features));
my($genome0, $genome1, $linkobject) = parse_link_line($_, \@header, $features);
$links{$genome0}{$genome1}{$linkcounter} = $linkobject;
$linkcounter++;
}
close IN or $L->logdie("Can not close file $file\n$!");
return \@links;
return \%links;
}

sub parse_link_line{
Expand All @@ -329,7 +362,9 @@ sub parse_link_line{
$properties{'source'} = $fida;
$L->warn("There is no feature id $fidb in the bed file (but used in link file)") unless(exists $features->{$fidb});
$properties{'target'} = $fidb;
return \%properties;
my $genome0 = $karyo->{chromosomes}{$features->{$fida}{karyo}}{genome_id};
my $genome1 = $karyo->{chromosomes}{$features->{$fidb}{karyo}}{genome_id};
return ($genome0, $genome1, \%properties);
}

=head1 LIMITATIONS
Expand Down
Loading

0 comments on commit e9e70f9

Please sign in to comment.