diff --git a/GO-LICENSE b/GO-LICENSE new file mode 100644 index 0000000..7448756 --- /dev/null +++ b/GO-LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2012 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index c6c2d99..0d6ec4d 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,12 @@ go-pathspec =========== A package for pattern matching of file paths. Currently the gitignore and chefignore patterns are included in the package. + +## Author + +Sander van Harmelen () + +## License + +The file 'chefignore.go' is a customized version of which is licensed under a BSD-style license. See the file header and/or the GO-LICENSE file for more info. +The rest of the package is licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at diff --git a/chefignore.go b/chefignore.go new file mode 100644 index 0000000..0f49fa0 --- /dev/null +++ b/chefignore.go @@ -0,0 +1,265 @@ +// +// Copyright (c) 2010, 2012 The Go Authors. All rights reserved. +// +// Copyright (c) 2014 Sander van Harmelen. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Orginal file [1] customized to fit our needs for the pathspec package. +// [1] https://code.google.com/p/go/source/browse/src/pkg/path/match.go +// + +package pathspec + +import ( + "bufio" + "errors" + "io" + "runtime" + "strings" + "unicode/utf8" +) + +// ErrBadPattern indicates a globbing pattern was malformed. +var ErrBadPattern = errors.New("syntax error in pattern") + +// ChefIgnore returns true if name matches the shell file name pattern. +// The pattern syntax is: +// +// pattern: +// { term } +// term: +// '*' matches any sequence characters +// '?' matches any single character +// '[' [ '^' ] { character-range } ']' +// character class (must be non-empty) +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range: +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// ChefIgnore requires pattern to match all of name, not just a +// substring. The only possible returned error is ErrBadPattern, +// when pattern is malformed. +// +// On Windows, escaping is disabled. Instead, '\\' is treated as +// path separator. + +func ChefIgnore(content io.Reader, name string) (ignore bool, err error) { + scanner := bufio.NewScanner(content) + for scanner.Scan() { + pattern := strings.TrimSpace(scanner.Text()) + if len(pattern) == 0 || pattern[0] == '#' { + continue + } + if ignore, err = match(pattern, name); ignore || err != nil { + return ignore, err + } + } + return false, scanner.Err() +} + +func match(pattern, name string) (ignore bool, err error) { +Pattern: + for len(pattern) > 0 { + var star bool + var chunk string + star, chunk, pattern = scanChunk(pattern) + if star && chunk == "" { + // Trailing * matches rest of string + return true, nil + } + // Look for match at current position. + t, ok, err := matchChunk(chunk, name) + // if we're the last chunk, make sure we've exhausted the name + // otherwise we'll give a false result even if we could still match + // using the star + if ok && (len(t) == 0 || len(pattern) > 0) { + name = t + continue + } + if err != nil { + return false, err + } + if star { + // Look for match skipping i+1 bytes. + for i := 0; i < len(name); i++ { + t, ok, err := matchChunk(chunk, name[i+1:]) + if ok { + // if we're the last chunk, make sure we exhausted the name + if len(pattern) == 0 && len(t) > 0 { + continue + } + name = t + continue Pattern + } + if err != nil { + return false, err + } + } + } + return false, nil + } + return len(name) == 0, nil +} + +// scanChunk gets the next segment of pattern, which is a non-star string +// possibly preceded by a star. +func scanChunk(pattern string) (star bool, chunk, rest string) { + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:] + star = true + } + inrange := false + var i int +Scan: + for i = 0; i < len(pattern); i++ { + switch pattern[i] { + case '\\': + if runtime.GOOS != "windows" { + // error check handled in matchChunk: bad pattern. + if i+1 < len(pattern) { + i++ + } + } + case '[': + inrange = true + case ']': + inrange = false + case '*': + if !inrange { + break Scan + } + } + } + return star, pattern[0:i], pattern[i:] +} + +// matchChunk checks whether chunk matches the beginning of s. +// If so, it returns the remainder of s (after the match). +// Chunk is all single-character operators: literals, char classes, and ?. +func matchChunk(chunk, s string) (rest string, ok bool, err error) { + for len(chunk) > 0 { + if len(s) == 0 { + return + } + switch chunk[0] { + case '[': + // character class + r, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + // We can't end right after '[', we're expecting at least + // a closing bracket and possibly a caret. + if len(chunk) == 0 { + err = ErrBadPattern + return + } + // possibly negated + negated := chunk[0] == '^' + if negated { + chunk = chunk[1:] + } + // parse all ranges + match := false + nrange := 0 + for { + if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 { + chunk = chunk[1:] + break + } + var lo, hi rune + if lo, chunk, err = getEsc(chunk); err != nil { + return + } + hi = lo + if chunk[0] == '-' { + if hi, chunk, err = getEsc(chunk[1:]); err != nil { + return + } + } + if lo <= r && r <= hi { + match = true + } + nrange++ + } + if match == negated { + return + } + + case '?': + _, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + + case '\\': + if runtime.GOOS != "windows" { + chunk = chunk[1:] + if len(chunk) == 0 { + err = ErrBadPattern + return + } + } + fallthrough + + default: + if chunk[0] != s[0] { + return + } + s = s[1:] + chunk = chunk[1:] + } + } + return s, true, nil +} + +// getEsc gets a possibly-escaped character from chunk, for a character class. +func getEsc(chunk string) (r rune, nchunk string, err error) { + if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { + err = ErrBadPattern + return + } + if chunk[0] == '\\' && runtime.GOOS != "windows" { + chunk = chunk[1:] + if len(chunk) == 0 { + err = ErrBadPattern + return + } + } + r, n := utf8.DecodeRuneInString(chunk) + if r == utf8.RuneError && n == 1 { + err = ErrBadPattern + } + nchunk = chunk[n:] + if len(nchunk) == 0 { + err = ErrBadPattern + } + return +} diff --git a/gitignore.go b/gitignore.go new file mode 100644 index 0000000..67ba297 --- /dev/null +++ b/gitignore.go @@ -0,0 +1,267 @@ +// +// Copyright 2014, Sander van Harmelen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package pathspec + +import ( + "bufio" + "bytes" + "io" + "regexp" + "strings" +) + +type GitIgnorePattern struct { + Regex string + Include bool +} + +// A blank line matches no files, so it can serve as a separator for readability. +// +// A line starting with # serves as a comment. Put a backslash ("\") in front of +// the first hash for patterns that begin with a hash. +// +// An optional prefix "!" which negates the pattern; any matching file excluded +// by a previous pattern will become included again. If a negated pattern matches, +// this will override lower precedence patterns sources. Put a backslash ("\") in +// front of the first "!" for patterns that begin with a literal "!", for example, +// "\!important!.txt". +// +// If the pattern ends with a slash, it is removed for the purpose of the following +// description, but it would only find a match with a directory. In other words, +// foo/ will match a directory foo and paths underneath it, but will not match a +// regular file or a symbolic link foo (this is consistent with the way how pathspec +// works in general in Git). +// +// If the pattern does not contain a slash /, Git treats it as a shell glob pattern +// and checks for a match against the pathname relative to the location of the +// .gitignore file (relative to the toplevel of the work tree if not from a +// .gitignore file). +// +// Otherwise, Git treats the pattern as a shell glob suitable for consumption by +// fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will not match +// a / in the pathname. For example, "Documentation/*.html" matches +// "Documentation/git.html" but not "Documentation/ppc/ppc.html" or/ +// "tools/perf/Documentation/perf.html". +// +// A leading slash matches the beginning of the pathname. For example, "/*.c" +// matches "cat-file.c" but not "mozilla-sha1/sha1.c". +// +// Two consecutive asterisks ("**") in patterns matched against full pathname +// may have special meaning: +// +// A leading "**" followed by a slash means match in all directories. For example, +// "**/foo" matches file or directory "foo" anywhere, the same as pattern "foo". +// "**/foo/bar" matches file or directory "bar" anywhere that is directly under +// directory "foo". +// +// A trailing "/" matches everything inside. For example, "abc/" matches all files +// inside directory "abc", relative to the location of the .gitignore file, with +// infinite depth. +// +// A slash followed by two consecutive asterisks then a slash matches zero or more +// directories. For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on. +// +// Other consecutive asterisks are considered invalid. + +func GitIgnore(content io.Reader, name string) (ignore bool, err error) { + scanner := bufio.NewScanner(content) + + for scanner.Scan() { + pattern := strings.TrimSpace(scanner.Text()) + if len(pattern) == 0 || pattern[0] == '#' { + continue + } + p := parsePattern(pattern) + match, err := regexp.MatchString(p.Regex, name) + if err != nil { + return ignore, err + } + if match { + if p.Include { + return false, scanner.Err() + } + ignore = true + } + } + return ignore, scanner.Err() +} + +func parsePattern(pattern string) *GitIgnorePattern { + p := &GitIgnorePattern{} + + // An optional prefix "!" which negates the pattern; any matching file + // excluded by a previous pattern will become included again. + if strings.HasPrefix(pattern, "!") { + pattern = pattern[1:] + p.Include = true + } else { + p.Include = false + } + + // Remove leading back-slash escape for escaped hash ('#') or + // exclamation mark ('!'). + if strings.HasPrefix(pattern, "\\") { + pattern = pattern[1:] + } + + // Split pattern into segments. + pattern_segs := strings.Split(pattern, "/") + + // A pattern beginning with a slash ('/') will only match paths + // directly on the root directory instead of any descendant paths. + // So remove empty first segment to make pattern absoluut to root. + // A pattern without a beginning slash ('/') will match any + // descendant path. This is equivilent to "**/{pattern}". So + // prepend with double-asterisks to make pattern relative to + // root. + if pattern_segs[0] == "" { + pattern_segs = pattern_segs[1:] + } else if pattern_segs[0] != "**" { + pattern_segs = append([]string{"**"}, pattern_segs...) + } + + // A pattern ending with a slash ('/') will match all descendant + // paths of if it is a directory but not if it is a regular file. + // This is equivilent to "{pattern}/**". So, set last segment to + // double asterisks to include all descendants. + if pattern_segs[len(pattern_segs)-1] == "" { + pattern_segs[len(pattern_segs)-1] = "**" + } + + // Build regular expression from pattern. + var expr bytes.Buffer + expr.WriteString("^") + need_slash := false + + for i, seg := range pattern_segs { + switch seg { + case "**": + switch { + case i == 0 && i == len(pattern_segs)-1: + // A pattern consisting solely of double-asterisks ('**') + // will match every path. + expr.WriteString(".+") + case i == 0: + // A normalized pattern beginning with double-asterisks + // ('**') will match any leading path segments. + expr.WriteString("(?:.+/)?") + need_slash = false + case i == len(pattern_segs)-1: + // A normalized pattern ending with double-asterisks ('**') + // will match any trailing path segments. + expr.WriteString("/.+") + default: + // A pattern with inner double-asterisks ('**') will match + // multiple (or zero) inner path segments. + expr.WriteString("(?:/.+)?") + need_slash = true + } + case "*": + // Match single path segment. + if need_slash { + expr.WriteString("/") + } + expr.WriteString("[^/]+") + need_slash = true + default: + // Match segment glob pattern. + if need_slash { + expr.WriteString("/") + } + expr.WriteString(translateGlob(seg)) + need_slash = true + } + } + expr.WriteString("$") + p.Regex = expr.String() + return p +} + +// NOTE: This is derived from `fnmatch.translate()` and is similar to +// the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. +func translateGlob(glob string) string { + var regex bytes.Buffer + escape := false + + for i := 0; i < len(glob); i++ { + char := glob[i] + // Escape the character. + switch { + case escape: + escape = false + regex.WriteString(regexp.QuoteMeta(string(char))) + case char == '\\': + // Escape character, escape next character. + escape = true + case char == '*': + // Multi-character wildcard. Match any string (except slashes), + // including an empty string. + regex.WriteString("[^/]*") + case char == '?': + // Single-character wildcard. Match any single character (except + // a slash). + regex.WriteString("[^/]") + case char == '[': + regex.WriteString(translateBraketExpression(&i, glob)) + default: + // Regular character, escape it for regex. + regex.WriteString(regexp.QuoteMeta(string(char))) + } + } + return regex.String() +} + +// Braket expression wildcard. Except for the beginning +// exclamation mark, the whole braket expression can be used +// directly as regex but we have to find where the expression +// ends. +// - "[][!]" matchs ']', '[' and '!'. +// - "[]-]" matchs ']' and '-'. +// - "[!]a-]" matchs any character except ']', 'a' and '-'. +func translateBraketExpression(i *int, glob string) string { + regex := string(glob[*i]) + *i++ + j := *i + + // Pass brack expression negation. + if j < len(glob) && glob[j] == '!' { + j++ + } + // Pass first closing braket if it is at the beginning of the + // expression. + if j < len(glob) && glob[j] == ']' { + j++ + } + // Find closing braket. Stop once we reach the end or find it. + for j < len(glob) && glob[j] != ']' { + j++ + } + + if j < len(glob) { + if glob[*i] == '!' { + regex = regex + "^" + *i++ + } + regex = regexp.QuoteMeta(glob[*i:j]) + *i = j + } else { + // Failed to find closing braket, treat opening braket as a + // braket literal instead of as an expression. + regex = regexp.QuoteMeta(string(glob[*i])) + } + return regex +}