Skip to content

Commit

Permalink
add ranks and ids to classification dump (close #53)
Browse files Browse the repository at this point in the history
  • Loading branch information
dimus committed Sep 12, 2022
1 parent ff29566 commit 48d819e
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

## [v0.13.2] - 2022-09-12 Mon

- Add [#53] - classification ranks and ids in dump files.

## [v0.13.1] - 2022-09-08 Thu

- Add [#52]: dump pages information
Expand Down
10 changes: 6 additions & 4 deletions ent/output/name-format.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ func (on OutputName) header() []string {
"NameID", "DetectedName", "Cardinality", "OccurrencesNumber", "OddsLog10",
"MatchType", "EditDistance", "StemEditDistance", "MatchedCanonical",
"MatchedFullName", "MatchedCardinality", "CurrentCanonical",
"CurrentFullName", "CurrentCardinality", "Classification", "RecordID",
"CurrentFullName", "CurrentCardinality", "Classification",
"ClassificationRanks", "ClassificationIDs", "RecordID",
"DataSourceID", "DataSource", "DataSourcesNumber", "Curation", "Error",
}
}
Expand All @@ -37,10 +38,11 @@ func (on OutputName) csvOutput(sep rune) string {
dsNum := strconv.Itoa(on.DataSourcesNumber)

s := []string{
on.NameID, on.DetectedName, card, occNum, odds, on.MatchType,
eDist, stEDist, on.MatchedCanonical, on.MatchedFullName, matchCard,
on.NameID, on.DetectedName, card, occNum, odds, on.MatchType, eDist,
stEDist, on.MatchedCanonical, on.MatchedFullName, matchCard,
on.CurrentCanonical, on.CurrentFullName, currCard, on.Classification,
on.RecordID, dsID, on.DataSource, dsNum, on.Curation, on.VerifError,
on.ClassificationRanks, on.ClassificationIDs, on.RecordID, dsID,
on.DataSource, dsNum, on.Curation, on.VerifError,
}

return gnfmt.ToCSV(s, sep)
Expand Down
9 changes: 9 additions & 0 deletions ent/output/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ type OutputName struct {
// DataSource.
Classification string `json:"classification"`

// ClassificationRanks provide data about ranks used in the classificaiton.
ClassificationRanks string `json:"classificationRanks"`

// ClassificationIDs provides data about IDs a DataSource assigns to
// taxons in the classification.
ClassificationIDs string `json:"classificationIDs"`

// RecordID is the ID assigned by the DataSource to the name.
RecordID string `json:"recordID"`

Expand Down Expand Up @@ -150,6 +157,8 @@ type OutputOccurrence struct {
Annotation string `json:"annotNomen"`
}

// CSVHeader takes any object that implements Output interface
// and generates TSV or CSV header.
func CSVHeader[O Output](o O, f gnfmt.Format) string {
sep := rune(',')
if f == gnfmt.TSV {
Expand Down
12 changes: 7 additions & 5 deletions io/dumpio/db-dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,10 @@ func (d *dumpio) outputNames(id, limit int, ds []int) ([]output.OutputName, erro
q := fmt.Sprintf(`
SELECT
name, cardinality, occurrences, odds_log10, match_type, edit_distance,
stem_edit_distance, matched_canonical, matched_name, matched_cardinality,
current_canonical, current_name, current_cardinality, classification,
record_id, data_source_id, data_source_title, data_sources_number,
stem_edit_distance, matched_canonical, matched_name, matched_cardinality,
current_canonical, current_name, current_cardinality, classification,
classification_ranks, classification_ids, record_id, data_source_id,
data_source_title, data_sources_number,
curation, error
FROM verified_names
WHERE name_id >= $1 and name_id < $2
Expand All @@ -125,8 +126,9 @@ ORDER by name_id
&o.MatchType, &o.EditDistance, &o.StemEditDistance, &o.MatchedCanonical,
&o.MatchedFullName, &o.MatchedCardinality, &o.CurrentCanonical,
&o.CurrentFullName, &o.CurrentCardinality, &o.Classification,
&o.RecordID, &o.DataSourceID, &o.DataSource, &o.DataSourcesNumber,
&o.Curation, &o.VerifError,
&o.ClassificationRanks, &o.ClassificationIDs, &o.RecordID,
&o.DataSourceID, &o.DataSource, &o.DataSourcesNumber, &o.Curation,
&o.VerifError,
)
if err != nil {
return nil, fmt.Errorf("outputNames: %w", err)
Expand Down
1 change: 1 addition & 0 deletions io/dumpio/dumpio.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ func (d *dumpio) DumpPages(ctx context.Context, ch chan<- []output.OutputPage) e
return nil
}

// DumpNames outputs data about verified names.
func (d *dumpio) DumpNames(ctx context.Context, ch chan<- []output.OutputName, ds []int) error {
err := d.checkForVerifiedNames()
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion version.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package bhlindex

var (
Version = "v0.13.1+"
Version = "v0.13.2+"
Build string
)

0 comments on commit 48d819e

Please sign in to comment.