Skip to content

Commit

Permalink
Add dynamic-programming tree matching strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
msuozzo committed May 31, 2024
1 parent f321d75 commit 896e999
Showing 1 changed file with 83 additions and 1 deletion.
84 changes: 83 additions & 1 deletion tools/indexscan/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ var (
version = flag.String("version", "", "package version")
repo = flag.String("repo", "", "package repo")
repoPath = flag.String("repo-path", "", "local path from which to load the package repo")
strategy = flag.String("strategy", "dynamic", "strategy to use to search and rank commits. {dynamic, commits-near-publish}")
)

func getRepo(ctx context.Context, uri, path string) (*git.Repository, error) {
Expand Down Expand Up @@ -109,6 +110,81 @@ type searchStrategy interface {
Search(ctx context.Context, r *git.Repository, hashes []string) (closest []string, matched, total int, err error)
}

// DynamicTreeSearchStrategy searches all TreeObjects in the repository for
// the number of matches against the input files provided.
type DynamicTreeSearchStrategy struct {
}

// Search returns the set of matching commits along with the number of matches are returned.
func (DynamicTreeSearchStrategy) Search(ctx context.Context, r *git.Repository, hashes []string) (closest []string, matched, total int, err error) {
files := make(map[plumbing.Hash]bool)
for _, h := range hashes {
_, err = r.BlobObject(plumbing.NewHash(h))
if err == plumbing.ErrObjectNotFound {
// Skip files not present in repo.
err = nil
} else if err != nil {
return
} else {
files[plumbing.NewHash(h)] = true
}
}
total = len(files)
if total == 0 {
err = errors.New("repo contains no matching files")
return
}
// Construct cache of all trees and their match count.
cache := make(map[plumbing.Hash]int)
ti, _ := r.TreeObjects()
ti.ForEach(func(t *object.Tree) error {
countTree(t, files, cache)
return nil
})
// Search through all commits for tree with closest match.
ci, _ := r.CommitObjects()
err = ci.ForEach(func(c *object.Commit) error {
count := cache[c.TreeHash]
if matched < count {
matched = count
closest = closest[:0]
}
if matched == count {
closest = append(closest, c.Hash.String())
}
return nil
})
return
}

// countTree counts the number of matching files in the given git Tree and records them in "cache".
func countTree(t *object.Tree, toMatch map[plumbing.Hash]bool, cache map[plumbing.Hash]int) (count int) {
if val, ok := cache[t.Hash]; ok {
return val
}
for _, e := range t.Entries {
switch e.Mode {
case filemode.Dir:
if val, ok := cache[e.Hash]; ok {
count += val
} else {
t, _ := t.Tree(e.Name)
subcount := countTree(t, toMatch, cache)
cache[e.Hash] = subcount
count += subcount
}
case filemode.Submodule, filemode.Symlink:
continue
default:
if _, ok := toMatch[e.Hash]; ok {
count++
}
}
}
cache[t.Hash] = count
return
}

// CommitsNearPublishStrategy searches across repository for the input files
// provided for the nearest matching commit(s).
type CommitsNearPublishStrategy struct {
Expand Down Expand Up @@ -234,7 +310,13 @@ func main() {
if err != nil {
log.Fatal(errors.Wrap(err, "hash calculation"))
}
s := CommitsNearPublishStrategy{Published: published, Window: 7 * 24 * time.Hour}
var s searchStrategy
switch *strategy {
case "dynamic":
s = &DynamicTreeSearchStrategy{}
case "commits-near-publish":
s = &CommitsNearPublishStrategy{Published: published, Window: 7 * 24 * time.Hour}
}
closest, matched, total, err := s.Search(ctx, r, hashes)
if err != nil {
log.Fatal(errors.Wrap(err, "identity search"))
Expand Down

0 comments on commit 896e999

Please sign in to comment.