diff --git a/.vim.custom b/.vim.custom deleted file mode 100644 index 4355dce1..00000000 --- a/.vim.custom +++ /dev/null @@ -1,28 +0,0 @@ -function! Layout() - " Rough num columns to decide between laptop and big monitor screens - let numcol = 2 - if winwidth(0) >= 180 - let numcol = 3 - endif - - if numcol == 3 - e term://bash - vnew - endif - - set colorcolumn=81 - vsp term://docker-compose\ up - sp term://bash - resize 8 - wincmd l - vertical resize 81 - edit README.md - tabe README.md - vsp term://bash - wincmd l - vertical resize 81 - tabfirst -endfunction - -command! -register Layout call Layout() - diff --git a/CHANGELOG.md b/CHANGELOG.md index 0265988d..4b79ccb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## Unreleased +## [v1.0.0-RC4] - 2023-01-10 Tue + +- Add [#62]: dump odds vs verification stats. - Add [#61]: shortened and filtered dump for BHL-related data. - Add [#60]: normalize odds according to verification results. diff --git a/cmd/dump.go b/cmd/dump.go index 9f9daa07..013a17f7 100644 --- a/cmd/dump.go +++ b/cmd/dump.go @@ -86,6 +86,14 @@ default format is CSV`, err = fmt.Errorf("DumpOccurrences %w", err) log.Fatal().Err(err).Msg("Dump of occurrences failed") } + + err = bhli.DumpOddsVerification(dmp) + if err != nil { + err = fmt.Errorf("DumpOccurrences %w", err) + log.Fatal().Err(err).Msg("Dump of occurrences failed") + } + + log.Info().Msg("All files are created successfully") }, } diff --git a/internal/bhlindex.go b/internal/bhlindex.go index 1406dd14..778a2b4b 100644 --- a/internal/bhlindex.go +++ b/internal/bhlindex.go @@ -197,6 +197,37 @@ func (bi *bhlindex) DumpOccurrences(dmp output.Dumper) error { return nil } +func (bi *bhlindex) DumpOddsVerification(dmp output.Dumper) error { + var w *os.File + outs, err := dmp.DumpOddsVerification() + if err != nil { + err = fmt.Errorf("-> DumpOddsVerification: %w", err) + return err + } + for i := range outs { + o := outs[i] + if i == 0 { + path := filepath.Join(bi.OutputDir, o.Name()+bi.extension()) + w, err = os.Create(path) + if err != nil { + return err + } + if bi.OutputFormat != gnfmt.CompactJSON { + _, err = w.WriteString(output.CSVHeader(o, bi.OutputFormat) + "\n") + if err != nil { + return err + } + } + } + _, err = w.WriteString(output.Format(outs[i], bi.OutputFormat) + "\n") + if err != nil { + return err + } + } + log.Info().Msgf("Dumped %d odds/verification records", len(outs)) + return nil +} + // GetVersion outputs the version of BHLindex. func (bi *bhlindex) GetVersion() gnvers.Version { return gnvers.Version{Version: Version, Build: Build} diff --git a/internal/ent/output/interface.go b/internal/ent/output/interface.go index 8fdfaa80..dca5b1ac 100644 --- a/internal/ent/output/interface.go +++ b/internal/ent/output/interface.go @@ -12,6 +12,10 @@ type Dumper interface { // DumpOccurrences traverses database and outputs names occurrences in JSON, TSV or CSV format. DumpOccurrences(context.Context, chan<- []Output, []int) error + + // DumpOddsVerification gets result of mapping between Odds values and the percentage of + // successful verifications. + DumpOddsVerification() ([]Output, error) } // Output interface provides generic functions outputs of verified names, and diff --git a/internal/ent/output/odds-verif-format.go b/internal/ent/output/odds-verif-format.go index dd5b1bfb..d6ba1665 100644 --- a/internal/ent/output/odds-verif-format.go +++ b/internal/ent/output/odds-verif-format.go @@ -7,7 +7,7 @@ import ( ) func (o OutputOddsVerification) Name() string { - return "odds-verification" + return "odds_verification" } func (o OutputOddsVerification) header() []string { diff --git a/internal/interface.go b/internal/interface.go index 6131ad67..ac29dce3 100644 --- a/internal/interface.go +++ b/internal/interface.go @@ -31,6 +31,10 @@ type BHLindex interface { // TSV, or JSON formats. DumpNames(output.Dumper) error + // DumpOddsVerification exports mapping of Odds values to verification + // percentage + DumpOddsVerification(output.Dumper) error + // GetVersion outputs the version of BHLindex. GetVersion() gnvers.Version diff --git a/internal/io/dumpio/db-dump.go b/internal/io/dumpio/db-dump.go index 26d4c7ae..9290d27f 100644 --- a/internal/io/dumpio/db-dump.go +++ b/internal/io/dumpio/db-dump.go @@ -140,6 +140,38 @@ func (d *dumpio) outputOccurs(id, limit int, ds []int) ([]output.Output, error) return res, nil } +func (d dumpio) getOccurVerif() ([]output.Output, error) { + var rows *sql.Rows + var err error + + q := ` + SELECT + odds_log10, names_num, verif_percent + FROM odds_verifications + ORDER BY odds_log10` + rows, err = d.db.Query(q) + if err != nil { + return nil, fmt.Errorf("-> Query %w", err) + } + defer rows.Close() + + var count int + var res []output.Output + for rows.Next() { + o := output.OutputOddsVerification{} + err := rows.Scan( + &o.OddsLog10, &o.NamesNum, &o.VerifPercent, + ) + if err != nil { + return nil, fmt.Errorf("-> Scan %w", err) + } + res = append(res, o) + count++ + } + + return res, nil +} + func getDataSources(ds []int) string { var dataSources string if len(ds) > 0 { diff --git a/internal/io/dumpio/dumpio.go b/internal/io/dumpio/dumpio.go index 1ced431d..c76ab372 100644 --- a/internal/io/dumpio/dumpio.go +++ b/internal/io/dumpio/dumpio.go @@ -122,9 +122,14 @@ func (d *dumpio) DumpOccurrences(ctx context.Context, ch chan<- []output.Output, ) } } - fmt.Fprintf(os.Stderr, "\r%s", strings.Repeat(" ", 80)) + fmt.Fprintf(os.Stderr, "\r%s\r", strings.Repeat(" ", 80)) log.Info().Msgf("Dumped %s occurrences", humanize.Comma(int64(count)), ) return nil } + +func (d dumpio) DumpOddsVerification() ([]output.Output, error) { + log.Info().Msg("Dumping odds vs verification percentage") + return d.getOccurVerif() +} diff --git a/scripts/filter.rb b/scripts/filter.rb index 5fe0d1ac..168938ec 100644 --- a/scripts/filter.rb +++ b/scripts/filter.rb @@ -32,7 +32,7 @@ r.close w.close -puts "\n\nFiltering occurrences.csv to occurrences_filtered.csv\n\n" +puts "\nFiltering occurrences.csv to occurrences_filtered.csv\n\n" r = File.open('occurrences.csv') w = File.open('occurrences_filtered.csv', 'w:utf-8')