Skip to content

Commit

Permalink
Play with Berkeley database
Browse files Browse the repository at this point in the history
  • Loading branch information
nigelhorne committed Dec 8, 2023
1 parent 00450ad commit 7d19a50
Showing 1 changed file with 52 additions and 48 deletions.
100 changes: 52 additions & 48 deletions createdatabase.PL
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ use autodie; # Don't want system() to die, we catch failures
use Cwd;
use Data::Dumper;
# use BerkeleyDB;
# use DB_File;
use DB_File;

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.30 on ubuntu-20.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.30.3/x64/lib/site_perl/5.30.3/x86_64-linux /opt/hostedtoolcache/perl/5.30.3/x64/lib/site_perl/5.30.3 /opt/hostedtoolcache/perl/5.30.3/x64/lib/5.30.3/x86_64-linux /opt/hostedtoolcache/perl/5.30.3/x64/lib/5.30.3)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.30 on ubuntu-20.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.34 on ubuntu-20.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.34.3/x64/lib/site_perl/5.34.3/x86_64-linux /opt/hostedtoolcache/perl/5.34.3/x64/lib/site_perl/5.34.3 /opt/hostedtoolcache/perl/5.34.3/x64/lib/5.34.3/x86_64-linux /opt/hostedtoolcache/perl/5.34.3/x64/lib/5.34.3)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.34 on ubuntu-20.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.36 on ubuntu-20.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.36.3/x64/lib/site_perl/5.36.3/x86_64-linux /opt/hostedtoolcache/perl/5.36.3/x64/lib/site_perl/5.36.3 /opt/hostedtoolcache/perl/5.36.3/x64/lib/5.36.3/x86_64-linux /opt/hostedtoolcache/perl/5.36.3/x64/lib/5.36.3)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.36 on ubuntu-20.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.30 on ubuntu-22.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.30.3/x64/lib/site_perl/5.30.3/x86_64-linux /opt/hostedtoolcache/perl/5.30.3/x64/lib/site_perl/5.30.3 /opt/hostedtoolcache/perl/5.30.3/x64/lib/5.30.3/x86_64-linux /opt/hostedtoolcache/perl/5.30.3/x64/lib/5.30.3)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.30 on ubuntu-22.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.32 on ubuntu-20.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.32.1/x64/lib/site_perl/5.32.1/x86_64-linux /opt/hostedtoolcache/perl/5.32.1/x64/lib/site_perl/5.32.1 /opt/hostedtoolcache/perl/5.32.1/x64/lib/5.32.1/x86_64-linux /opt/hostedtoolcache/perl/5.32.1/x64/lib/5.32.1)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.32 on ubuntu-20.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.28 on ubuntu-20.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.28.3/x64/lib/site_perl/5.28.3/x86_64-linux /opt/hostedtoolcache/perl/5.28.3/x64/lib/site_perl/5.28.3 /opt/hostedtoolcache/perl/5.28.3/x64/lib/5.28.3/x86_64-linux /opt/hostedtoolcache/perl/5.28.3/x64/lib/5.28.3)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.28 on ubuntu-20.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.28 on ubuntu-22.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.28.3/x64/lib/site_perl/5.28.3/x86_64-linux /opt/hostedtoolcache/perl/5.28.3/x64/lib/site_perl/5.28.3 /opt/hostedtoolcache/perl/5.28.3/x64/lib/5.28.3/x86_64-linux /opt/hostedtoolcache/perl/5.28.3/x64/lib/5.28.3)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.28 on ubuntu-22.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.34 on ubuntu-22.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.34.3/x64/lib/site_perl/5.34.3/x86_64-linux /opt/hostedtoolcache/perl/5.34.3/x64/lib/site_perl/5.34.3 /opt/hostedtoolcache/perl/5.34.3/x64/lib/5.34.3/x86_64-linux /opt/hostedtoolcache/perl/5.34.3/x64/lib/5.34.3)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.34 on ubuntu-22.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.32 on ubuntu-22.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.32.1/x64/lib/site_perl/5.32.1/x86_64-linux /opt/hostedtoolcache/perl/5.32.1/x64/lib/site_perl/5.32.1 /opt/hostedtoolcache/perl/5.32.1/x64/lib/5.32.1/x86_64-linux /opt/hostedtoolcache/perl/5.32.1/x64/lib/5.32.1)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.32 on ubuntu-22.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.22 on ubuntu-22.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.22.4/x64/lib/site_perl/5.22.4/x86_64-linux /opt/hostedtoolcache/perl/5.22.4/x64/lib/site_perl/5.22.4 /opt/hostedtoolcache/perl/5.22.4/x64/lib/5.22.4/x86_64-linux /opt/hostedtoolcache/perl/5.22.4/x64/lib/5.22.4 .)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.22 on ubuntu-22.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.36 on ubuntu-22.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.36.3/x64/lib/site_perl/5.36.3/x86_64-linux /opt/hostedtoolcache/perl/5.36.3/x64/lib/site_perl/5.36.3 /opt/hostedtoolcache/perl/5.36.3/x64/lib/5.36.3/x86_64-linux /opt/hostedtoolcache/perl/5.36.3/x64/lib/5.36.3)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.36 on ubuntu-22.04

BEGIN failed--compilation aborted

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.22 on ubuntu-20.04

Can't locate DB_File.pm in @inc (you may need to install the DB_File module) (@inc contains: blib/arch blib/lib /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5 /home/runner/work/Geo-Coder-Free/Geo-Coder-Free/local/lib/perl5/x86_64-linux /home/runner/work/_actions/shogo82148/actions-setup-perl/v1/scripts/lib /opt/hostedtoolcache/perl/5.22.4/x64/lib/site_perl/5.22.4/x86_64-linux /opt/hostedtoolcache/perl/5.22.4/x64/lib/site_perl/5.22.4 /opt/hostedtoolcache/perl/5.22.4/x64/lib/5.22.4/x86_64-linux /opt/hostedtoolcache/perl/5.22.4/x64/lib/5.22.4 .)

Check failure on line 61 in createdatabase.PL

View workflow job for this annotation

GitHub Actions / Perl 5.22 on ubuntu-20.04

BEGIN failed--compilation aborted
use DBD::SQLite::Constants qw/:file_open/; # For SQLITE_OPEN_READONLY
use DBI;
use File::Copy;
Expand Down Expand Up @@ -1062,19 +1062,47 @@ my $oa = $ENV{'OPENADDR_HOME'};

exit(0) if((!defined($oa)) || (length($oa) == 0));

if(!-d $oa) {
mkdir $oa;
}

$Data::Dumper::Sortkeys = 1;

# Import openaddresses.io data into an SQLite database
# TODO: download and unzip the files from results.openaddresses.io
# TODO: only Australian, US and Canadian data is supported at the moment
# TODO Make the database choice configurable

my $sqlite_file = "$oa/openaddresses.sql";
my $db_file = "$oa/openaddresses.db";
exit(0) if((-r $sqlite_file) || (-r $db_file));

# TODO Make this configurable
my $dbh;
if(my $e = $ENV{'MARIADB_SERVER'}) {
my $redis;
my $mongodb;

if($ENV{'REDIS_SERVER'}) {
# Warning: Redis stores the database in RAM which makes it
# slow and unrealistic for most scenarios as it will
# use all the memory on your machine
require Redis;
Redis->import();

$redis = Redis->new(reconnect => 1200, every => 5_000_000);
$redis->select(1);
$redis->flushdb();
} elsif(my $e = $ENV{'MONGODB_SERVER'}) {
# TODO
# I'm hoping this will require less RAM than SQLite which just chews
# it up
require MongoDB;
MongoDB->import();

my $db_name = 'Geo::Coder::Free';
my ($server, $port) = split (/:/, $e);

$mongodb = MongoDB::MongoClient->new(host => $server, port => $port)->get_database($db_name)->get_collection('data');
} elsif(my $e = $ENV{'MARIADB_SERVER'}) {
# MariaDB/MySQL
require DBD::MariaDB;
DBD::MariaDB->import();
Expand All @@ -1087,7 +1115,7 @@ if(my $e = $ENV{'MARIADB_SERVER'}) {
{ RaiseError => 1, AutoCommit => 0 }
);
$| = 1;
print "removing old data\r";
print "Removing old data\r";
print "\n" if(DEBUG);
$| = 0;
$dbh->do('DROP DATABASE IF EXISTS geo_coder_free');
Expand All @@ -1098,24 +1126,28 @@ if(my $e = $ENV{'MARIADB_SERVER'}) {
open(my $fout, '>', $sqlite_file);
close($fout);
truncate $sqlite_file, 0;
} elsif($dbh = DBI->connect("dbi:SQLite:dbname=$sqlite_file", undef, undef, { RaiseError => 1, AutoCommit => 0, synchronous => 0, locking_mode => 'EXCLUSIVE' })) {
# SQLite
$dbh->do('PRAGMA cache_size = -65536'); # 64MB
$dbh->do('PRAGMA journal_mode = OFF');
$dbh->do('PRAGMA soft_heap_limit = 16777216'); # 16 MB
$dbh->do('PRAGMA hard_heap_limit = 33554432'); # 32 MB
# } elsif($dbh = DBI->connect("dbi:SQLite:dbname=$sqlite_file", undef, undef, { RaiseError => 1, AutoCommit => 0, synchronous => 0, locking_mode => 'EXCLUSIVE' })) {
# # SQLite
# $dbh->do('PRAGMA cache_size = -65536'); # 64MB
# $dbh->do('PRAGMA journal_mode = OFF');
# $dbh->do('PRAGMA soft_heap_limit = 16777216'); # 16 MB
# $dbh->do('PRAGMA hard_heap_limit = 33554432'); # 32 MB
}

if(!-d $oa) {
mkdir $oa;
}

my $berkeley_db;
my %dbtie;
my %db_tie;
my $db_file = "$oa/openaddresses.db";
unlink $db_file if(-r $db_file);
# if($berkeley_db = DB_File::HASHINFO->new()) {
# $berkeley_db->{'cachesize'} = 65536;
# tie %dbtie, 'DB_File', "$oa/openaddresses.db", 0, 0644, $berkeley_db;
# tie %db_tie, 'DB_File', "$oa/openaddresses.db", 0, 0644, $berkeley_db;
# }
# my $berkeley_db = BerkeleyDB::Hash->new(
# -Filename => $db_file,
# -Flags => DB_CREATE)
# or die "Cannot open file $db_file: $! $BerkeleyDB::Error";
$DB_HASH->{'cachesize'} = 65536;
my $berkeley_db = tie %db_tie, 'DB_File', $db_file, O_RDWR|O_CREAT, 0644, $DB_HASH
or die "Cannot open file $db_file: $!";

# $sqlite->do('CREATE TABLE cities(sequence INTEGER, city VARCHAR, county VARCHAR, state VARCHAR NOT NULL, country CHAR(2) NOT NULL)');
# $sqlite->do('CREATE TABLE openaddresses(md5 CHAR(16), lat DECIMAL, lon DECIMAL, name VARCHAR, number VARCHAR, street VARCHAR, city INTEGER, FOREIGN KEY (city) REFERENCES cities(sequence))');
Expand All @@ -1137,34 +1169,6 @@ if($dbh) {

print "This will take some time.\nBest to do it last thing at night and go to sleep, it should be ready in the morning.\n";

my $redis;

if($ENV{'REDIS_SERVER'}) {
# Warning: Redis stores the database in RAM which makes it
# slow and unrealistic for most scenarios as it will
# use all the memory on your machine
require Redis;
Redis->import();

$redis = Redis->new(reconnect => 1200, every => 5_000_000);
$redis->select(1);
$redis->flushdb();
}

my $mongodb;
if(my $e = $ENV{'MONGODB_SERVER'}) {
# TODO
# I'm hoping this will require less RAM than SQLite which just chews
# it up
require MongoDB;
MongoDB->import();

my $db_name = 'Geo::Coder::Free';
my ($server, $port) = split (/:/, $e);

$mongodb = MongoDB::MongoClient->new(host => $server, port => $port)->get_database($db_name)->get_collection('data');
}

my $au = Locale::AU->new();
my $ca = Locale::CA->new();
my $us = Locale::US->new();
Expand Down Expand Up @@ -2158,7 +2162,7 @@ if($dbh) {
$dbh->disconnect();
}
if($berkeley_db) {
untie %dbtie;
untie %db_tie;
}
if($redis) {
$redis->bgsave();
Expand Down Expand Up @@ -2944,7 +2948,7 @@ sub flush_queue
if($berkeley_db) {
while(my($k, $v) = each(%queued_commits)) {
# $berkeley_db->put($k, $v);
$dbtie{$k} = $v;
$db_tie{$k} = $v;
}
}
}
Expand Down Expand Up @@ -2989,7 +2993,7 @@ sub create_md5
if(DEBUG&DEBUG_MD5) {
print "$digest => $rc\n";
$global_md5s{$rc} = $digest;
} else {
} elsif(defined($dbh)) {
$global_md5s{$rc} = 1;
}
return $rc;
Expand Down

0 comments on commit 7d19a50

Please sign in to comment.