-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update parser to support editions syntax #2000
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
syntax = "proto3"; | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
syntax = "proto3"; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
syntax = "proto3"; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,8 +24,7 @@ var base10Re = /^[1-9][0-9]*$/, | |
base8NegRe = /^-?0[0-7]+$/, | ||
numberRe = /^(?![eE])[0-9]*(?:\.[0-9]*)?(?:[eE][+-]?[0-9]+)?$/, | ||
nameRe = /^[a-zA-Z_][a-zA-Z_0-9]*$/, | ||
typeRefRe = /^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)(?:\.[a-zA-Z_][a-zA-Z_0-9]*)*$/, | ||
fqTypeRefRe = /^(?:\.[a-zA-Z_][a-zA-Z_0-9]*)+$/; | ||
typeRefRe = /^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)(?:\.[a-zA-Z_][a-zA-Z_0-9]*)*$/; | ||
|
||
/** | ||
* Result object returned from {@link parse}. | ||
|
@@ -82,6 +81,7 @@ function parse(source, root, options) { | |
imports, | ||
weakImports, | ||
syntax, | ||
edition, | ||
isProto3 = false; | ||
|
||
var ptr = root; | ||
|
@@ -111,7 +111,59 @@ function parse(source, root, options) { | |
return values.join(""); | ||
} | ||
|
||
function readValue(acceptTypeRef) { | ||
function readIdentifier(optionalFirstToken) { | ||
var token = optionalFirstToken || next(); | ||
var identifier = token; | ||
|
||
if (token === ".") { // fully qualified name | ||
token = next(); | ||
identifier += token; | ||
} | ||
|
||
/* istanbul ignore if */ | ||
if (!nameRe.test(token)) | ||
throw illegal(identifier, "identifier"); | ||
|
||
while (skip(".", true)) { | ||
if (skip("(", true)) { | ||
push("."); | ||
push("("); | ||
break; | ||
} | ||
identifier += "."; | ||
token = next(); | ||
identifier += token; | ||
|
||
/* istanbul ignore if */ | ||
if (!nameRe.test(token)) | ||
throw illegal(identifier, "identifier"); | ||
} | ||
return identifier; | ||
} | ||
|
||
function readOptionIdentifier() { | ||
var identifier = ""; | ||
do { | ||
if (skip("(", true)) { | ||
identifier += "("; | ||
identifier += readIdentifier(); | ||
identifier += next(); | ||
|
||
/* istanbul ignore if */ | ||
if (!identifier.endsWith(")")) | ||
throw illegal(identifier, "identifier"); | ||
} else { | ||
identifier += readIdentifier(); | ||
} | ||
if (!skip(".", true)) { | ||
break; | ||
} | ||
identifier += "."; | ||
} while (true); // eslint-disable-line | ||
return identifier; | ||
} | ||
|
||
function readValue(acceptIdentifier) { | ||
var token = next(); | ||
switch (token) { | ||
case "'": | ||
|
@@ -128,8 +180,8 @@ function parse(source, root, options) { | |
} catch (e) { | ||
|
||
/* istanbul ignore else */ | ||
if (acceptTypeRef && typeRefRe.test(token)) | ||
return token; | ||
if (acceptIdentifier && nameRe.test(token)) | ||
return readIdentifier(token); // `ENUM_VALUE` | ||
|
||
/* istanbul ignore next */ | ||
throw illegal(token, "value"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think readRanges needs to be updated as well. In edition 2023 we changed the grammar of reserved field/enum value names to be identifiers instead of strings. This avoids accidental concatenation |
||
|
@@ -170,6 +222,9 @@ function parse(source, root, options) { | |
sign = -1; | ||
token = token.substring(1); | ||
} | ||
if (skip(".", true)) { | ||
token += "." + next(); | ||
} | ||
switch (token) { | ||
case "inf": case "INF": case "Inf": | ||
return sign * Infinity; | ||
|
@@ -224,12 +279,7 @@ function parse(source, root, options) { | |
if (pkg !== undefined) | ||
throw illegal("package"); | ||
|
||
pkg = next(); | ||
|
||
/* istanbul ignore if */ | ||
if (!typeRefRe.test(pkg)) | ||
throw illegal(pkg, "name"); | ||
|
||
pkg = readIdentifier(); | ||
ptr = ptr.define(pkg); | ||
skip(";"); | ||
} | ||
|
@@ -260,12 +310,26 @@ function parse(source, root, options) { | |
isProto3 = syntax === "proto3"; | ||
|
||
/* istanbul ignore if */ | ||
if (!isProto3 && syntax !== "proto2") | ||
if (!isProto3 && syntax !== "proto2" || edition) | ||
throw illegal(syntax, "syntax"); | ||
|
||
skip(";"); | ||
} | ||
|
||
function parseEdition() { | ||
skip("="); | ||
edition = readString(); | ||
isProto3 = true; | ||
|
||
/* istanbul ignore if */ | ||
if (syntax) | ||
throw illegal(syntax, "edition"); | ||
|
||
syntax = "proto3"; | ||
isProto3 = true; | ||
Comment on lines
+328
to
+329
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems strange. Why treat editions files like proto3? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only added the fallback to produce valid schemas that can be tested, including during resolve. I guess another option is to test via the parser only, and for now produce an "editions are not supported" error during resolve? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see, this is just a temporary test hook? That makes sense, but I definitely like the failure approach better. That way nobody starts depending on this behavior |
||
skip(";"); | ||
} | ||
|
||
function parseCommon(parent, token) { | ||
switch (token) { | ||
|
||
|
@@ -360,7 +424,7 @@ function parse(source, root, options) { | |
|
||
default: | ||
/* istanbul ignore if */ | ||
if (!isProto3 || !typeRefRe.test(token)) | ||
if (!isProto3 || !nameRe.test(token)) | ||
throw illegal(token); | ||
|
||
push(token); | ||
|
@@ -372,26 +436,12 @@ function parse(source, root, options) { | |
} | ||
|
||
function parseField(parent, rule, extend) { | ||
var type = next(); | ||
if (type === "group") { | ||
if (skip("group", true)) { | ||
parseGroup(parent, rule); | ||
return; | ||
} | ||
// Type names can consume multiple tokens, in multiple variants: | ||
// package.subpackage field tokens: "package.subpackage" [TYPE NAME ENDS HERE] "field" | ||
// package . subpackage field tokens: "package" "." "subpackage" [TYPE NAME ENDS HERE] "field" | ||
// package. subpackage field tokens: "package." "subpackage" [TYPE NAME ENDS HERE] "field" | ||
// package .subpackage field tokens: "package" ".subpackage" [TYPE NAME ENDS HERE] "field" | ||
// Keep reading tokens until we get a type name with no period at the end, | ||
// and the next token does not start with a period. | ||
while (type.endsWith(".") || peek().startsWith(".")) { | ||
type += next(); | ||
} | ||
|
||
/* istanbul ignore if */ | ||
if (!typeRefRe.test(type)) | ||
throw illegal(type, "type"); | ||
|
||
var type = readIdentifier(); | ||
var name = next(); | ||
|
||
/* istanbul ignore if */ | ||
|
@@ -489,19 +539,14 @@ function parse(source, root, options) { | |
|
||
function parseMapField(parent) { | ||
skip("<"); | ||
var keyType = next(); | ||
var keyType = readIdentifier(); | ||
|
||
/* istanbul ignore if */ | ||
if (types.mapKey[keyType] === undefined) | ||
throw illegal(keyType, "type"); | ||
|
||
skip(","); | ||
var valueType = next(); | ||
|
||
/* istanbul ignore if */ | ||
if (!typeRefRe.test(valueType)) | ||
throw illegal(valueType, "type"); | ||
|
||
var valueType = readIdentifier(); | ||
skip(">"); | ||
var name = next(); | ||
|
||
|
@@ -602,30 +647,26 @@ function parse(source, root, options) { | |
} | ||
|
||
function parseOption(parent, token) { | ||
var isCustom = skip("(", true); | ||
|
||
/* istanbul ignore if */ | ||
if (!typeRefRe.test(token = next())) | ||
throw illegal(token, "name"); | ||
|
||
var name = token; | ||
var option = name; | ||
var identifier = readOptionIdentifier(); | ||
|
||
// Historically, `(some.option).prop` has been interpreted as a property | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sounds like exactly what it is today. What's the change in behavior here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is mostly a comment I found useful to explain why the somewhat strange code exists in the first place. Say, someone stumbles over this part, wonders why it's there, and decides to make a PR to remove it because it doesn't seem useful to them. Other than that, there's indeed no change in behavior other than also applying the fallback to syntax previously not supported, e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I think I misunderstood what was happening here. FWIW it looks like you still don't support multiple extensions (e.g. |
||
// assignment on `some.option`. While the parser understands additional | ||
// option syntax nowadays, there's still no structural knowledge of the | ||
// respective extension. Backwards compatibility can be retained, though: | ||
var optionName = identifier; | ||
var propStart = identifier.lastIndexOf(")."); | ||
var propName; | ||
|
||
if (isCustom) { | ||
skip(")"); | ||
name = "(" + name + ")"; | ||
option = name; | ||
token = peek(); | ||
if (fqTypeRefRe.test(token)) { | ||
propName = token.slice(1); //remove '.' before property name | ||
name += token; | ||
next(); | ||
if (~propStart) { | ||
token = identifier.substring(propStart + 2); | ||
if (typeRefRe.test(token)) { | ||
propName = token; | ||
optionName = identifier.substring(0, propStart + 1); | ||
} | ||
} | ||
|
||
skip("="); | ||
var optionValue = parseOptionValue(parent, name); | ||
setParsedOption(parent, option, optionValue, propName); | ||
var optionValue = parseOptionValue(parent, identifier); | ||
setParsedOption(parent, optionName, optionValue, propName); | ||
} | ||
|
||
function parseOptionValue(parent, name) { | ||
|
@@ -638,6 +679,7 @@ function parse(source, root, options) { | |
if (!nameRe.test(token = next())) { | ||
throw illegal(token, "name"); | ||
} | ||
/* istanbul ignore if */ | ||
if (token === null) { | ||
throw illegal(token, "end of input"); | ||
} | ||
|
@@ -750,20 +792,12 @@ function parse(source, root, options) { | |
if (skip("stream", true)) | ||
requestStream = true; | ||
|
||
/* istanbul ignore if */ | ||
if (!typeRefRe.test(token = next())) | ||
throw illegal(token); | ||
|
||
requestType = token; | ||
requestType = readIdentifier(); | ||
skip(")"); skip("returns"); skip("("); | ||
if (skip("stream", true)) | ||
responseStream = true; | ||
|
||
/* istanbul ignore if */ | ||
if (!typeRefRe.test(token = next())) | ||
throw illegal(token); | ||
|
||
responseType = token; | ||
responseType = readIdentifier(); | ||
skip(")"); | ||
|
||
var method = new Method(name, type, requestType, responseType, requestStream, responseStream); | ||
|
@@ -782,11 +816,7 @@ function parse(source, root, options) { | |
} | ||
|
||
function parseExtension(parent, token) { | ||
|
||
/* istanbul ignore if */ | ||
if (!typeRefRe.test(token = next())) | ||
throw illegal(token, "reference"); | ||
|
||
token = readIdentifier(); | ||
var reference = token; | ||
ifBlock(null, function parseExtension_block(token) { | ||
switch (token) { | ||
|
@@ -807,7 +837,7 @@ function parse(source, root, options) { | |
|
||
default: | ||
/* istanbul ignore if */ | ||
if (!isProto3 || !typeRefRe.test(token)) | ||
if (!isProto3 || !nameRe.test(token)) | ||
throw illegal(token); | ||
push(token); | ||
parseField(parent, "optional", reference); | ||
|
@@ -847,6 +877,15 @@ function parse(source, root, options) { | |
parseSyntax(); | ||
break; | ||
|
||
case "edition": | ||
|
||
/* istanbul ignore if */ | ||
if (!head) | ||
throw illegal(token); | ||
|
||
parseEdition(); | ||
break; | ||
|
||
case "option": | ||
|
||
parseOption(ptr, token); | ||
|
@@ -872,6 +911,7 @@ function parse(source, root, options) { | |
"imports" : imports, | ||
weakImports : weakImports, | ||
syntax : syntax, | ||
edition : edition, | ||
root : root | ||
}; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the purpose of these dummies? It seems like this could conflict with the actual versions. Also, it doesn't actually define any custom features, so anyone referencing those would have problems
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The purpose I had in mind was that an import of these files may be present in a schema originating in another language or toolchain, so having dummies prevents "file not found" errors in pbjs. Left them blank because the definitions within are not of relevance to protobuf.js anyway. But now that you say it, given that the dummies might leak out to other toolchains, it seems like a good idea to include their full contents.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea, also usually people would import them and reference the features defined in them. So I think you'd still hit errors from missing options definitions?
OTOH copying them verbatim here seems like a good way to get version skew issues. How do you deal with descriptor.proto? We've been modifying that pretty rapidly in the last few years
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The missing definitions aren't an issue for protobuf.js itself. It currently parses any options literally without looking up schemas. Hence the historic comment in the parser on how properties are assumed. Similarly, descriptor.proto is optional and only updated sporadically, as it is not needed internally for protobuf.js to function. Perhaps a data point that descriptor.proto hasn't leaked out so far.