Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add go grep package to replace OS-specific grep commands for builtin provider #737

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/antchfx/jsonquery v1.3.0
github.com/antchfx/xmlquery v1.3.12
github.com/bombsimon/logrusr/v3 v3.0.0
github.com/dlclark/regexp2 v1.11.4
github.com/go-logr/logr v1.2.3
github.com/golang-jwt/jwt/v5 v5.2.1
github.com/jhump/protoreflect v1.16.0
Expand All @@ -18,6 +19,7 @@ require (
go.lsp.dev/uri v0.3.0
go.opentelemetry.io/otel/trace v1.11.2
golang.org/x/oauth2 v0.16.0
golang.org/x/sync v0.6.0
google.golang.org/grpc v1.62.1
google.golang.org/protobuf v1.33.1-0.20240408130810-98873a205002
gopkg.in/yaml.v2 v2.4.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
Expand Down
201 changes: 93 additions & 108 deletions provider/internal/builtin/service_client.go
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
package builtin

import (
"bufio"
"context"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"sync"

"github.com/antchfx/jsonquery"
"github.com/antchfx/xmlquery"
"github.com/antchfx/xpath"
"github.com/dlclark/regexp2"
"github.com/go-logr/logr"
"github.com/konveyor/analyzer-lsp/lsp/protocol"
"github.com/konveyor/analyzer-lsp/provider"
"github.com/konveyor/analyzer-lsp/tracing"
"go.lsp.dev/uri"
"golang.org/x/sync/errgroup"
"gopkg.in/yaml.v2"
)

Expand Down Expand Up @@ -107,52 +108,31 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi
return response, fmt.Errorf("could not parse provided regex pattern as string: %v", conditionInfo)
}

var outputBytes []byte
//Runs on Windows using PowerShell.exe and Unix based systems using grep
outputBytes, err := runOSSpecificGrepCommand(c.Pattern, p.config.Location, cond.ProviderContext)
patternRegex, err := regexp2.Compile(c.Pattern, regexp2.None)
if err != nil {
return response, err
return response, fmt.Errorf("could not compile provided regex pattern '%s': %v", c.Pattern, err)
}
matches := []string{}
outputString := strings.TrimSpace(string(outputBytes))
if outputString != "" {
matches = append(matches, strings.Split(outputString, "\n")...)

matches, err := parallelWalk(p.config.Location, patternRegex)
if err != nil {
return response, err
}

for _, match := range matches {
var pieces []string
pieces, err := parseGrepOutputForFileContent(match)
if err != nil {
return response, fmt.Errorf("could not parse grep output '%s' for the Pattern '%v': %v ", match, c.Pattern, err)
}

containsFile, err := provider.FilterFilePattern(c.FilePattern, pieces[0])
containsFile, err := provider.FilterFilePattern(c.FilePattern, match.positionParams.TextDocument.URI)
if err != nil {
return response, err
}
if !containsFile {
continue
}

absPath, err := filepath.Abs(pieces[0])
if err != nil {
absPath = pieces[0]
}

if !p.isFileIncluded(absPath) {
continue
}

lineNumber, err := strconv.Atoi(pieces[1])
if err != nil {
return response, fmt.Errorf("cannot convert line number string to integer")
}
lineNumber := int(match.positionParams.Position.Line)

response.Incidents = append(response.Incidents, provider.IncidentContext{
FileURI: uri.File(absPath),
FileURI: uri.URI(match.positionParams.TextDocument.URI),
LineNumber: &lineNumber,
Variables: map[string]interface{}{
"matchingText": pieces[2],
"matchingText": match.match,
},
CodeLocation: &provider.Location{
StartPosition: provider.Position{Line: float64(lineNumber)},
Expand Down Expand Up @@ -571,89 +551,94 @@ func (b *builtinServiceClient) isFileIncluded(absolutePath string) bool {
return false
}

func parseGrepOutputForFileContent(match string) ([]string, error) {
// This will parse the output of the PowerShell/grep in the form
// "Filepath:Linenumber:Matchingtext" to return string array of path, line number and matching text
// works with handling both windows and unix based file paths eg: "C:\path\to\file" and "/path/to/file"
re, err := regexp.Compile(`^(.*?):(\d+):(.*)$`)
type walkResult struct {
positionParams protocol.TextDocumentPositionParams
match string
}

func parallelWalk(location string, regex *regexp2.Regexp) ([]walkResult, error) {
var positions []walkResult
var positionsMu sync.Mutex
var eg errgroup.Group

// Set a parallelism limit to avoid hitting limits related to opening too many files.
// On Windows, this can show up as a runtime failure due to a thread limit.
eg.SetLimit(256)

err := filepath.Walk(location, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}

if f.Mode().IsRegular() {
eg.Go(func() error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to put a limit on this across the engine this could spawn a bunch of go routines I think just setting the limit on the errorgroup.Group should be enough

pos, err := processFile(path, regex)
if err != nil {
return err
}

positionsMu.Lock()
defer positionsMu.Unlock()
positions = append(positions, pos...)
return nil
})
}

return nil
})
if err != nil {
return nil, fmt.Errorf("failed to compile regular expression: %v", err)
return nil, err
}
submatches := re.FindStringSubmatch(match)
if len(submatches) != 4 {
return nil, fmt.Errorf(
"malformed response from file search, cannot parse result '%s' with pattern %#q", match, re)

if err := eg.Wait(); err != nil {
return nil, err
}
return submatches[1:], nil

return positions, nil
}

func runOSSpecificGrepCommand(pattern string, location string, providerContext provider.ProviderContext) ([]byte, error) {
var outputBytes []byte
var err error
var utilName string

if runtime.GOOS == "windows" {
utilName = "powershell.exe"
// Windows does not have grep, so we use PowerShell.exe's Select-String instead
// This is a workaround until we can find a better solution
psScript := `
$pattern = $env:PATTERN
$location = $env:FILEPATH
Get-ChildItem -Path $location -Recurse -File | ForEach-Object {
$file = $_
# Search for the pattern in the file
Select-String -Path $file.FullName -Pattern $pattern -AllMatches | ForEach-Object {
foreach ($match in $_.Matches) {
"{0}:{1}:{2}" -f $file.FullName, $_.LineNumber, $match.Value
}
}
}`
findstr := exec.Command(utilName, "-Command", psScript)
findstr.Env = append(os.Environ(), "PATTERN="+pattern, "FILEPATH="+location)
outputBytes, err = findstr.Output()

// TODO eventually replace with platform agnostic solution
} else if runtime.GOOS == "darwin" {
isEscaped := isSlashEscaped(pattern)
escapedPattern := pattern
// some rules already escape '/' while others do not
if !isEscaped {
escapedPattern = strings.ReplaceAll(escapedPattern, "/", "\\/")
}
// escape other chars used in perl pattern
escapedPattern = strings.ReplaceAll(escapedPattern, "'", "'\\''")
escapedPattern = strings.ReplaceAll(escapedPattern, "$", "\\$")
cmd := fmt.Sprintf(
`find %v -type f -exec perl -ne 'print "$ARGV:$.:$1\n" if /%v/; close ARGV if eof;' {} +`,
location, escapedPattern,
)
findstr := exec.Command("/bin/sh", "-c", cmd)
outputBytes, err = findstr.Output()

} else {
grep := exec.Command("grep", "-o", "-n", "-R", "-P", pattern)
if ok, paths := providerContext.GetScopedFilepaths(); ok {
grep.Args = append(grep.Args, paths...)
} else {
grep.Args = append(grep.Args, location)
}
outputBytes, err = grep.Output()
}
func processFile(path string, regex *regexp2.Regexp) ([]walkResult, error) {
f, err := os.Open(path)
if err != nil {
if exitError, ok := err.(*exec.ExitError); ok && exitError.ExitCode() == 1 {
return nil, nil
}
return nil, fmt.Errorf("could not run '%s' with provided pattern %+v", utilName, err)
return nil, err
}
defer f.Close()

return outputBytes, nil
}
var r []walkResult

func isSlashEscaped(str string) bool {
for i := 0; i < len(str); i++ {
if str[i] == '/' && i > 0 && str[i-1] == '\\' {
return true
scanner := bufio.NewScanner(f)
lineNumber := 1
for scanner.Scan() {
line := scanner.Text()
match, err := regex.FindStringMatch(line)
if err != nil {
return nil, err
}
for match != nil {
absPath, err := filepath.Abs(path)
if err != nil {
return nil, err
}

r = append(r, walkResult{
positionParams: protocol.TextDocumentPositionParams{
TextDocument: protocol.TextDocumentIdentifier{
URI: fmt.Sprintf("file:///%s", filepath.ToSlash(absPath)),
},
Position: protocol.Position{
Line: uint32(lineNumber),
Character: uint32(match.Index),
},
},
match: match.String(),
})
match, err = regex.FindNextMatch(match)
if err != nil {
return nil, err
}
}
lineNumber++
}
return false

return r, nil
}
14 changes: 0 additions & 14 deletions provider/internal/builtin/service_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ package builtin

import (
"context"
"path/filepath"
"reflect"
"sync"
"testing"

"github.com/go-logr/logr/testr"
"github.com/konveyor/analyzer-lsp/engine"
"github.com/konveyor/analyzer-lsp/provider"
)

Expand Down Expand Up @@ -124,15 +122,3 @@ func Test_builtinServiceClient_filterByIncludedPaths(t *testing.T) {
})
}
}

func BenchmarkRunOSSpecificGrepCommand(b *testing.B) {
for i := 0; i < b.N; i++ {
path, err := filepath.Abs("../../../external-providers/java-external-provider/examples/customers-tomcat-legacy/")
if err != nil {
return
}
runOSSpecificGrepCommand("Apache License 1.1",
path,
provider.ProviderContext{Template: map[string]engine.ChainTemplate{}})
}
}
Loading