Skip to content

Commit

Permalink
dump relation odds/verification (close #62)
Browse files Browse the repository at this point in the history
  • Loading branch information
dimus committed Jan 11, 2023
1 parent d73aee7 commit 0f6ad41
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 31 deletions.
28 changes: 0 additions & 28 deletions .vim.custom

This file was deleted.

3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Unreleased

## [v1.0.0-RC4] - 2023-01-10 Tue

- Add [#62]: dump odds vs verification stats.
- Add [#61]: shortened and filtered dump for BHL-related data.
- Add [#60]: normalize odds according to verification results.

Expand Down
8 changes: 8 additions & 0 deletions cmd/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ default format is CSV`,
err = fmt.Errorf("DumpOccurrences %w", err)
log.Fatal().Err(err).Msg("Dump of occurrences failed")
}

err = bhli.DumpOddsVerification(dmp)
if err != nil {
err = fmt.Errorf("DumpOccurrences %w", err)
log.Fatal().Err(err).Msg("Dump of occurrences failed")
}

log.Info().Msg("All files are created successfully")
},
}

Expand Down
31 changes: 31 additions & 0 deletions internal/bhlindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,37 @@ func (bi *bhlindex) DumpOccurrences(dmp output.Dumper) error {
return nil
}

func (bi *bhlindex) DumpOddsVerification(dmp output.Dumper) error {
var w *os.File
outs, err := dmp.DumpOddsVerification()
if err != nil {
err = fmt.Errorf("-> DumpOddsVerification: %w", err)
return err
}
for i := range outs {
o := outs[i]
if i == 0 {
path := filepath.Join(bi.OutputDir, o.Name()+bi.extension())
w, err = os.Create(path)
if err != nil {
return err
}
if bi.OutputFormat != gnfmt.CompactJSON {
_, err = w.WriteString(output.CSVHeader(o, bi.OutputFormat) + "\n")
if err != nil {
return err
}
}
}
_, err = w.WriteString(output.Format(outs[i], bi.OutputFormat) + "\n")
if err != nil {
return err
}
}
log.Info().Msgf("Dumped %d odds/verification records", len(outs))
return nil
}

// GetVersion outputs the version of BHLindex.
func (bi *bhlindex) GetVersion() gnvers.Version {
return gnvers.Version{Version: Version, Build: Build}
Expand Down
4 changes: 4 additions & 0 deletions internal/ent/output/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ type Dumper interface {

// DumpOccurrences traverses database and outputs names occurrences in JSON, TSV or CSV format.
DumpOccurrences(context.Context, chan<- []Output, []int) error

// DumpOddsVerification gets result of mapping between Odds values and the percentage of
// successful verifications.
DumpOddsVerification() ([]Output, error)
}

// Output interface provides generic functions outputs of verified names, and
Expand Down
2 changes: 1 addition & 1 deletion internal/ent/output/odds-verif-format.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
)

func (o OutputOddsVerification) Name() string {
return "odds-verification"
return "odds_verification"
}

func (o OutputOddsVerification) header() []string {
Expand Down
4 changes: 4 additions & 0 deletions internal/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ type BHLindex interface {
// TSV, or JSON formats.
DumpNames(output.Dumper) error

// DumpOddsVerification exports mapping of Odds values to verification
// percentage
DumpOddsVerification(output.Dumper) error

// GetVersion outputs the version of BHLindex.
GetVersion() gnvers.Version

Expand Down
32 changes: 32 additions & 0 deletions internal/io/dumpio/db-dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,38 @@ func (d *dumpio) outputOccurs(id, limit int, ds []int) ([]output.Output, error)
return res, nil
}

func (d dumpio) getOccurVerif() ([]output.Output, error) {
var rows *sql.Rows
var err error

q := `
SELECT
odds_log10, names_num, verif_percent
FROM odds_verifications
ORDER BY odds_log10`
rows, err = d.db.Query(q)
if err != nil {
return nil, fmt.Errorf("-> Query %w", err)
}
defer rows.Close()

var count int
var res []output.Output
for rows.Next() {
o := output.OutputOddsVerification{}
err := rows.Scan(
&o.OddsLog10, &o.NamesNum, &o.VerifPercent,
)
if err != nil {
return nil, fmt.Errorf("-> Scan %w", err)
}
res = append(res, o)
count++
}

return res, nil
}

func getDataSources(ds []int) string {
var dataSources string
if len(ds) > 0 {
Expand Down
7 changes: 6 additions & 1 deletion internal/io/dumpio/dumpio.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,14 @@ func (d *dumpio) DumpOccurrences(ctx context.Context, ch chan<- []output.Output,
)
}
}
fmt.Fprintf(os.Stderr, "\r%s", strings.Repeat(" ", 80))
fmt.Fprintf(os.Stderr, "\r%s\r", strings.Repeat(" ", 80))
log.Info().Msgf("Dumped %s occurrences",
humanize.Comma(int64(count)),
)
return nil
}

func (d dumpio) DumpOddsVerification() ([]output.Output, error) {
log.Info().Msg("Dumping odds vs verification percentage")
return d.getOccurVerif()
}
2 changes: 1 addition & 1 deletion scripts/filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
r.close
w.close

puts "\n\nFiltering occurrences.csv to occurrences_filtered.csv\n\n"
puts "\nFiltering occurrences.csv to occurrences_filtered.csv\n\n"

r = File.open('occurrences.csv')
w = File.open('occurrences_filtered.csv', 'w:utf-8')
Expand Down

0 comments on commit 0f6ad41

Please sign in to comment.