Skip to content

Commit

Permalink
remove duplicate occurrences from short version of dump (fix #65)
Browse files Browse the repository at this point in the history
  • Loading branch information
dimus committed Jan 31, 2023
1 parent b51b9fb commit 4e5894b
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 0 deletions.
20 changes: 20 additions & 0 deletions internal/bhlindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"reflect"
"strings"
"sync"

Expand Down Expand Up @@ -257,9 +258,12 @@ func processOutput[O output.Output](
ch <-chan []O,
) error {
var o O
var shortOccur bool
var err error
var w *os.File
var count int
var pageID string
pageName := make(map[string]bool)
for rows := range ch {
select {
case <-ctx.Done():
Expand All @@ -268,6 +272,10 @@ func processOutput[O output.Output](
for i := range rows {
o = rows[i]
if count == 0 {
t := reflect.TypeOf(o).String()
if t == "output.OutputOccurrenceShort" {
shortOccur = true
}
path := filepath.Join(bi.OutputDir, o.Name()+bi.extension())
w, err = os.Create(path)
if err != nil {
Expand All @@ -285,6 +293,18 @@ func processOutput[O output.Output](
fmt.Fprintf(os.Stderr, "\r%s\r", strings.Repeat(" ", 80))
log.Info().Msgf("Processed %d %s", count, o.Name())
}
if shortOccur {
pg, nm := o.PageNameIDs()
if pageID != pg {
pageName = make(map[string]bool)
pageID = pg
}
key := pg + "|" + nm
if pageName[key] {
continue
}
pageName[key] = true
}
_, err = w.WriteString(output.Format(rows[i], bi.OutputFormat) + "\n")
if err != nil {
return err
Expand Down
4 changes: 4 additions & 0 deletions internal/ent/output/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,8 @@ type Output interface {

// jsonOutput provides a method to generate a JSON output.
jsonOutput(bool) string

// PageNameIDs returns PageID and Name, if available, empty int and string
// otherwize
PageNameIDs() (string, string)
}
4 changes: 4 additions & 0 deletions internal/ent/output/name-format.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,7 @@ func (on OutputName) jsonOutput(pretty bool) string {
res, _ := enc.Encode(on)
return string(res)
}

func (on OutputName) PageNameIDs() (string, string) {
return "", ""
}
4 changes: 4 additions & 0 deletions internal/ent/output/name-short-format.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,7 @@ func (on OutputNameShort) jsonOutput(pretty bool) string {
res, _ := enc.Encode(out)
return string(res)
}

func (on OutputNameShort) PageNameIDs() (string, string) {
return "", ""
}
4 changes: 4 additions & 0 deletions internal/ent/output/occur-format.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,7 @@ func (o OutputOccurrence) jsonOutput(pretty bool) string {
res, _ := enc.Encode(o)
return string(res)
}

func (o OutputOccurrence) PageNameIDs() (string, string) {
return strconv.Itoa(o.PageID), o.NameID
}
4 changes: 4 additions & 0 deletions internal/ent/output/occur-short-format.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,7 @@ func (o OutputOccurrenceShort) jsonOutput(pretty bool) string {
res, _ := enc.Encode(oShort)
return string(res)
}

func (o OutputOccurrenceShort) PageNameIDs() (string, string) {
return strconv.Itoa(o.PageID), o.NameID
}
4 changes: 4 additions & 0 deletions internal/ent/output/odds-verif-format.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,7 @@ func (o OutputOddsVerification) jsonOutput(pretty bool) string {
res, _ := enc.Encode(o)
return string(res)
}

func (o OutputOddsVerification) PageNameIDs() (string, string) {
return "", ""
}

0 comments on commit 4e5894b

Please sign in to comment.