-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Follow git logic when parsing patch identities (#44)
When GitHub creates patches for Dependabot PRs, it generates a "From:" line that is not valid according to RFC 5322: the address spec contains unquoted special characters (the "[bot]" in "dependabot[bot]"). While the 'net/mail' parser makes some exceptions to the spec, this is not one of them, so parsing these patch headers fails. Git's 'mailinfo' command avoids this by only implementing the unquoting part of RFC 5322 and then applying a heuristic to separate the string in to name and email values that seem reasonable. This commit does two things: 1. Reimplements ParsePatchIdentity to follow Git's logic, so that it can accept a wider range of inputs, including quoted strings. Strings accepted by the previous implementation parse in the same way with one exception: inputs that contain whitespace inside the angle brackets for an email address now use the email address as the name and drop any separate name component. 2. When parsing mail-formatted patches, use ParsePatchIdentity to parse the "From:" line instead of the 'net/mail' function.
- Loading branch information
Showing
4 changed files
with
321 additions
and
142 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,83 +5,6 @@ import ( | |
"time" | ||
) | ||
|
||
func TestParsePatchIdentity(t *testing.T) { | ||
tests := map[string]struct { | ||
Input string | ||
Output PatchIdentity | ||
Err interface{} | ||
}{ | ||
"simple": { | ||
Input: "Morton Haypenny <[email protected]>", | ||
Output: PatchIdentity{ | ||
Name: "Morton Haypenny", | ||
Email: "[email protected]", | ||
}, | ||
}, | ||
"extraWhitespace": { | ||
Input: " Morton Haypenny <[email protected] > ", | ||
Output: PatchIdentity{ | ||
Name: "Morton Haypenny", | ||
Email: "[email protected]", | ||
}, | ||
}, | ||
"trailingCharacters": { | ||
Input: "Morton Haypenny <[email protected]> unrelated garbage", | ||
Output: PatchIdentity{ | ||
Name: "Morton Haypenny", | ||
Email: "[email protected]", | ||
}, | ||
}, | ||
"onlyEmail": { | ||
Input: "<[email protected]>", | ||
Output: PatchIdentity{ | ||
Name: "[email protected]", | ||
Email: "[email protected]", | ||
}, | ||
}, | ||
"emptyEmail": { | ||
Input: "Morton Haypenny <>", | ||
Output: PatchIdentity{ | ||
Name: "Morton Haypenny", | ||
Email: "", | ||
}, | ||
}, | ||
"missingEmail": { | ||
Input: "Morton Haypenny", | ||
Err: "invalid identity", | ||
}, | ||
"missingNameAndEmptyEmail": { | ||
Input: "<>", | ||
Err: "invalid identity", | ||
}, | ||
"empty": { | ||
Input: "", | ||
Err: "invalid identity", | ||
}, | ||
"unclosedEmail": { | ||
Input: "Morton Haypenny <[email protected]", | ||
Err: "unclosed email", | ||
}, | ||
} | ||
|
||
for name, test := range tests { | ||
t.Run(name, func(t *testing.T) { | ||
id, err := ParsePatchIdentity(test.Input) | ||
if test.Err != nil { | ||
assertError(t, test.Err, err, "parsing identity") | ||
return | ||
} | ||
if err != nil { | ||
t.Fatalf("unexpected error parsing identity: %v", err) | ||
} | ||
|
||
if test.Output != id { | ||
t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func TestParsePatchDate(t *testing.T) { | ||
expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC) | ||
|
||
|
@@ -349,6 +272,28 @@ Another body line. | |
Body: expectedBody, | ||
}, | ||
}, | ||
"mailboxRFC5322SpecialCharacters": { | ||
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 | ||
From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com> | ||
Date: Sat, 11 Apr 2020 15:21:23 -0700 | ||
Subject: [PATCH] A sample commit to test header parsing | ||
The medium format shows the body, which | ||
may wrap on to multiple lines. | ||
Another body line. | ||
`, | ||
Header: PatchHeader{ | ||
SHA: expectedSHA, | ||
Author: &PatchIdentity{ | ||
Name: "dependabot[bot]", | ||
Email: "12345+dependabot[bot]@users.noreply.github.com", | ||
}, | ||
AuthorDate: expectedDate, | ||
Title: expectedTitle, | ||
Body: expectedBody, | ||
}, | ||
}, | ||
"mailboxAppendix": { | ||
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 | ||
From: Morton Haypenny <[email protected]> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
package gitdiff | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
) | ||
|
||
// PatchIdentity identifies a person who authored or committed a patch. | ||
type PatchIdentity struct { | ||
Name string | ||
Email string | ||
} | ||
|
||
func (i PatchIdentity) String() string { | ||
name := i.Name | ||
if name == "" { | ||
name = `""` | ||
} | ||
return fmt.Sprintf("%s <%s>", name, i.Email) | ||
} | ||
|
||
// ParsePatchIdentity parses a patch identity string. A patch identity contains | ||
// an email address and an optional name in [RFC 5322] format. This is either a | ||
// plain email adddress or a name followed by an address in angle brackets: | ||
// | ||
// [email protected] | ||
// Author Name <[email protected]> | ||
// | ||
// If the input is not one of these formats, ParsePatchIdentity applies a | ||
// heuristic to separate the name and email portions. If both the name and | ||
// email are missing or empty, ParsePatchIdentity returns an error. It | ||
// otherwise does not validate the result. | ||
// | ||
// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 | ||
func ParsePatchIdentity(s string) (PatchIdentity, error) { | ||
s = normalizeSpace(s) | ||
s = unquotePairs(s) | ||
|
||
var name, email string | ||
if at := strings.IndexByte(s, '@'); at >= 0 { | ||
start, end := at, at | ||
for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { | ||
start-- | ||
} | ||
for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { | ||
end++ | ||
} | ||
email = s[start+1 : end] | ||
|
||
// Adjust the boundaries so that we drop angle brackets, but keep | ||
// spaces when removing the email to form the name. | ||
if start < 0 || s[start] != '<' { | ||
start++ | ||
} | ||
if end >= len(s) || s[end] != '>' { | ||
end-- | ||
} | ||
name = s[:start] + s[end+1:] | ||
} else { | ||
start, end := 0, 0 | ||
for i := 0; i < len(s); i++ { | ||
if s[i] == '<' && start == 0 { | ||
start = i + 1 | ||
} | ||
if s[i] == '>' && start > 0 { | ||
end = i | ||
break | ||
} | ||
} | ||
if start > 0 && end >= start { | ||
email = strings.TrimSpace(s[start:end]) | ||
name = s[:start-1] | ||
} | ||
} | ||
|
||
// After extracting the email, the name might contain extra whitespace | ||
// again and may be surrounded by comment characters. The git source gives | ||
// these examples of when this can happen: | ||
// | ||
// "Name <email@domain>" | ||
// "email@domain (Name)" | ||
// "Name <email@domain> (Comment)" | ||
// | ||
name = normalizeSpace(name) | ||
if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { | ||
name = name[1 : len(name)-1] | ||
} | ||
name = strings.TrimSpace(name) | ||
|
||
// If the name is empty or contains email-like characters, use the email | ||
// instead (assuming one exists) | ||
if name == "" || strings.ContainsAny(name, "@<>") { | ||
name = email | ||
} | ||
|
||
if name == "" && email == "" { | ||
return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) | ||
} | ||
return PatchIdentity{Name: name, Email: email}, nil | ||
} | ||
|
||
// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to | ||
// remove any "quoted-pairs" (backslash-espaced characters). It also removes | ||
// the quotes from any quoted strings, but leaves the comment delimiters. | ||
func unquotePairs(s string) string { | ||
quote := false | ||
comments := 0 | ||
escaped := false | ||
|
||
var out strings.Builder | ||
for i := 0; i < len(s); i++ { | ||
if escaped { | ||
escaped = false | ||
} else { | ||
switch s[i] { | ||
case '\\': | ||
// quoted-pair is only allowed in quoted-string/comment | ||
if quote || comments > 0 { | ||
escaped = true | ||
continue // drop '\' character | ||
} | ||
|
||
case '"': | ||
if comments == 0 { | ||
quote = !quote | ||
continue // drop '"' character | ||
} | ||
|
||
case '(': | ||
if !quote { | ||
comments++ | ||
} | ||
case ')': | ||
if comments > 0 { | ||
comments-- | ||
} | ||
} | ||
} | ||
out.WriteByte(s[i]) | ||
} | ||
return out.String() | ||
} | ||
|
||
// normalizeSpace trims leading and trailing whitespace from s and converts | ||
// inner sequences of one or more whitespace characters to single spaces. | ||
func normalizeSpace(s string) string { | ||
var sb strings.Builder | ||
for i := 0; i < len(s); i++ { | ||
c := s[i] | ||
if !isRFC5332Space(c) { | ||
if sb.Len() > 0 && isRFC5332Space(s[i-1]) { | ||
sb.WriteByte(' ') | ||
} | ||
sb.WriteByte(c) | ||
} | ||
} | ||
return sb.String() | ||
} | ||
|
||
func isRFC5332Space(c byte) bool { | ||
switch c { | ||
case '\t', '\n', '\r', ' ': | ||
return true | ||
} | ||
return false | ||
} |
Oops, something went wrong.