From 483317668341e0317e2d9cf43adaa4c501b789e6 Mon Sep 17 00:00:00 2001 From: Edwin Hoogerbeets Date: Wed, 5 Mar 2025 18:14:05 -0800 Subject: [PATCH 1/5] Use the escaper factory and classes from ilib-tools-common - Now unescapes strings that it extracts according to the escape style set into each mapping. This allows you to do different escaping for different types of files. --- packages/ilib-loctool-regex/README.md | 8 ++ packages/ilib-loctool-regex/RegexFile.js | 87 +++++++------------ packages/ilib-loctool-regex/package.json | 1 + .../ilib-loctool-regex/test/RegexFile.test.js | 7 +- pnpm-lock.yaml | 3 + 5 files changed, 48 insertions(+), 58 deletions(-) diff --git a/packages/ilib-loctool-regex/README.md b/packages/ilib-loctool-regex/README.md index 4c23f0bcd..cebed905f 100644 --- a/packages/ilib-loctool-regex/README.md +++ b/packages/ilib-loctool-regex/README.md @@ -48,6 +48,14 @@ used within the `regex` property: how to use path name templates. - sourceLocale - the locale of the source strings. This is the locale in which the strings are written in the source files. + - escapeStyle - the style of unescaping when collecting strings + for translations and the type of escaping to use when writing the + localized strings to the resource file. The valid styles incude + "csharp" and "js" (the default), as well as many others. The + full list of styles available is given in the documentation + for the [ilib-tools-common library](https://github.com/iLib-js/ilib-mono/blob/main/packages/ilib-tools-common/docs/ilibToolsCommon.md#escaperFactory). + In addition to the styles listed there, the escapeStyle setting + can also be set to "none" to disable escaping altogether. - expressions - an array of objects that document the regular expressions to use to extract strings from the source file and some additional information. The strings diff --git a/packages/ilib-loctool-regex/RegexFile.js b/packages/ilib-loctool-regex/RegexFile.js index e02bfed30..23af7a7a4 100644 --- a/packages/ilib-loctool-regex/RegexFile.js +++ b/packages/ilib-loctool-regex/RegexFile.js @@ -21,6 +21,11 @@ var fs = require("fs"); var path = require("path"); var Locale = require("ilib-locale"); var IString = require("ilib-istring"); +var escaperFactory = require("ilib-tools-common").escaperFactory; + +function identity(str) { + return str; +} /** * Create a new Regex file with the given path name and within @@ -56,44 +61,16 @@ var RegexFile = function(props) { }); } this.resourceIndex = 0; + + // set up the unescaper to use after we have found the strings. The same unescaper + // is used for all strings that match the same mapping. If this.escaper is undefined, + // then the strings are not unescaped. + var escapeStyle = (this.mapping && this.mapping.escapeStyle) || "js"; + this.escaper = escapeStyle !== "none" ? escaperFactory(escapeStyle) : identity; }; var reUnicodeChar = /\\u([a-fA-F0-9]{1,4})/g; -/** - * Unescape the string to make the same string that would be - * in memory in the target programming language. - * - * @static - * @param {String} string the string to unescape - * @returns {String} the unescaped string - */ -function unescapeString(string) { - if (!string) return string; - var unescaped = string; - - // first, unescape unicode characters - while ((match = reUnicodeChar.exec(unescaped))) { - if (match && match.length > 1) { - var value = parseInt(match[1], 16); - unescaped = unescaped.replace(match[0], IString.fromCodePoint(value)); - reUnicodeChar.lastIndex = 0; - } - } - - unescaped = unescaped. - replace(/\\\\n/g, ""). // line continuation - replace(/\\\n/g, ""). // line continuation - replace(/^\\\\/, "\\"). // unescape backslashes - replace(/([^\\])\\\\/g, "$1\\"). - replace(/^\\'/, "'"). // unescape quotes - replace(/([^\\])\\'/g, "$1'"). - replace(/^\\"/, '"'). - replace(/([^\\])\\"/g, '$1"'); - - return unescaped; -}; - /** * If the given string is surrounded by quotes, remove the quotes. * Otherwise, return the string unchanged. @@ -119,13 +96,12 @@ function stripQuotes(str) { * the string from what it looks like in the source * code but increases matching. * - * @static * @param {String} string the string to clean * @returns {String} the cleaned string */ -function cleanString(string) { +RegexFile.prototype.cleanString = function(string) { if (!string) return string; - var unescaped = unescapeString(string); + var unescaped = this.escaper.unescape(string); unescaped = unescaped. replace(/\\[btnfr]/g, " "). @@ -136,11 +112,10 @@ function cleanString(string) { }; /** - * Make a new key for the given string. This must correspond - * exactly with the code in htglob jar file so that the - * resources match up. See the class IResourceBundle in - * this project under the java directory for the corresponding - * code. + * Make a new key for the given source string. This key is a + * hash of the source string that is has a high probability of being + * unique for this source string and can be used to identify the + * resource. * * @private * @param {String} source the source string to make a resource @@ -158,14 +133,14 @@ RegexFile.prototype.makeKey = function(source) { * @param {String} data the string to parse * @returns {Array.} the array of strings */ -function parseArray(data) { +RegexFile.prototype.parseArray = function(data) { var arr; if (data) { arr = data.split(","); arr = arr.map(function(item) { - return cleanString(item); - }); + return stripQuotes(this.escaper.unescape(item).trim()); + }.bind(this)); } return arr; @@ -218,24 +193,26 @@ RegexFile.prototype.matchExpression = function(data, exp, cb) { if (result.groups) { if (result.groups.sourcePlural) { - sourcePlural = cleanString(result.groups.sourcePlural); + sourcePlural = this.escaper.unescape(result.groups.sourcePlural); } if (result.groups.comment) { - comment = cleanString(result.groups.comment); + comment = this.escaper.unescape(result.groups.comment); } if (result.groups.context) { - context = cleanString(result.groups.context); + context = this.escaper.unescape(result.groups.context); } if (result.groups.flavor) { - flavor = cleanString(result.groups.flavor); + flavor = this.escaper.unescape(result.groups.flavor); } if (result.groups.key) { - key = cleanString(result.groups.key); + // clean string unescapes the key, but also removes things + // that foster greater matching, like compressing white space + key = this.cleanString(result.groups.key); } } if (exp.resourceType === "array") { - array = parseArray(source); + array = this.parseArray(source); } if (!key) { @@ -244,10 +221,10 @@ RegexFile.prototype.matchExpression = function(data, exp, cb) { switch (exp.resourceType) { default: case "string": - src = cleanString(source); + src = this.escaper.unescape(source); break; case "plural": - src = sourcePlural; + src = this.escaper.unescape(sourcePlural); break; case "array": src = array.join(""); @@ -270,7 +247,7 @@ RegexFile.prototype.matchExpression = function(data, exp, cb) { switch (exp.resourceType) { case "string": - source = cleanString(source); + source = this.escaper.unescape(source); r = this.API.newResource({ resType: exp.resourceType, project: this.project.getProjectId(), @@ -295,7 +272,7 @@ RegexFile.prototype.matchExpression = function(data, exp, cb) { sourceLocale: this.project.sourceLocale, source: source, sourcePlurals: { - one: cleanString(source), + one: this.escaper.unescape(source), other: sourcePlural }, pathName: this.pathName, diff --git a/packages/ilib-loctool-regex/package.json b/packages/ilib-loctool-regex/package.json index b3b945263..4c7112bc0 100644 --- a/packages/ilib-loctool-regex/package.json +++ b/packages/ilib-loctool-regex/package.json @@ -68,6 +68,7 @@ "dependencies": { "ilib-istring": "workspace:^", "ilib-locale": "workspace:^", + "ilib-tools-common": "workspace:^", "micromatch": "^4.0.8" } } diff --git a/packages/ilib-loctool-regex/test/RegexFile.test.js b/packages/ilib-loctool-regex/test/RegexFile.test.js index 80c76a339..17895365d 100644 --- a/packages/ilib-loctool-regex/test/RegexFile.test.js +++ b/packages/ilib-loctool-regex/test/RegexFile.test.js @@ -94,11 +94,12 @@ var p = new CustomProject({ "resourceFileType": "javascript", "template": "resources/Translation[locale].json", "sourceLocale": "en-US", + "escapeStyle": "smarty", "expressions": [ { // example: // {* @L10N This comment is on the same line *} {'Your password change is cancelled.'|f:'login_password_change_cancelled'} - "expression": "\\{\\*.*@L10N\\s*(?[^*]*)\\*\\}.*\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'.*\\}", + "expression": "\\{\\*.*@L10N\\s*(?[^*]*?)\\s*\\*\\}.*\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'.*\\}", "flags": "g", "datatype": "template", "resourceType": "string" @@ -107,7 +108,7 @@ var p = new CustomProject({ // example: // {* @L10N The message shown to users whose passwords have just been changed *} // {'Your password was changed. Please log in again.'|f:'login_success_password_changed'} - "expression": "\\{\\*.*@L10N\\s*(?[^*]*)\\*\\}.*\\n.*\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'.*\\}", + "expression": "\\{\\*.*@L10N\\s*(?[^*]*?)\\s*\\*\\}.*\\n.*\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'.*\\}", "flags": "g", "datatype": "template", "resourceType": "string" @@ -329,7 +330,7 @@ describe("regex file tests", function() { var set = rf.getTranslationSet(); expect(set).toBeTruthy(); - +debugger; var resources = set.getBy({ reskey: "r523019971" }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fca72553f..3b3debcc4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1636,6 +1636,9 @@ importers: ilib-locale: specifier: workspace:^ version: link:../ilib-locale + ilib-tools-common: + specifier: workspace:^ + version: link:../ilib-tools-common micromatch: specifier: ^4.0.8 version: 4.0.8 From 922d71c5a0f58cdf4036ad0016a653b9bd769746 Mon Sep 17 00:00:00 2001 From: Edwin Hoogerbeets Date: Wed, 5 Mar 2025 21:45:08 -0800 Subject: [PATCH 2/5] Implement custom unescaping per mapping - Each mapping maps a particular set of glob expressions to settings for files of a particular type. These settings can now contains an escapeStyle parameter that does unescaping in the manner of the named programming language. --- .../ilib-loctool-regex/test/RegexFile.test.js | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/packages/ilib-loctool-regex/test/RegexFile.test.js b/packages/ilib-loctool-regex/test/RegexFile.test.js index 17895365d..c7ddee22d 100644 --- a/packages/ilib-loctool-regex/test/RegexFile.test.js +++ b/packages/ilib-loctool-regex/test/RegexFile.test.js @@ -330,7 +330,7 @@ describe("regex file tests", function() { var set = rf.getTranslationSet(); expect(set).toBeTruthy(); -debugger; + var resources = set.getBy({ reskey: "r523019971" }); @@ -869,4 +869,49 @@ debugger; new Error("No expressions found in project.json for ./testfiles/js/t1.js") ); }); + + test("RegexFile gets the right unescaped source string in a javascript file", function() { + expect.assertions(5); + + var rf = new RegexFile({ + project: p, + pathName: "./testfiles/js/t1.js", + type: rft + }); + expect(rf).toBeTruthy(); + + rf.parse("$t('foob`\\n\\r\\t\\\\a\\u317Dr\\u{1D11E}');"); + + var set = rf.getTranslationSet(); + expect(set).toBeTruthy(); + + // javascript escaping is the default, so it doesn't need to be + // specified in the mapping + var r = set.getBySource("foob`\n\r\t\\a\u317Dr\u{1D11E}"); + expect(r).toBeTruthy(); + expect(r.getSource()).toBe("foob`\n\r\t\\a\u317Dr\u{1D11E}"); + expect(r.getKey()).toBe("r157823627"); + }); + + test("RegexFile gets the right unescaped source string in a Smarty template file", function() { + expect.assertions(5); + + var rf = new RegexFile({ + project: p, + pathName: "./testfiles/templates/t1.tmpl", + type: rft + }); + expect(rf).toBeTruthy(); + + rf.parse("{\'abc \\\"e\\\" \\$\\n\\r\\t\\f\\vT \\u{317D}r\\u{1D11E}\'|f:\'key\'}"); + + var set = rf.getTranslationSet(); + expect(set).toBeTruthy(); + + // smarty escaping doesn't do Unicode characters + var r = set.getBySource("abc \"e\" $\n\r\t\f\vT \\u{317D}r\\u{1D11E}"); + expect(r).toBeTruthy(); + expect(r.getSource()).toBe("abc \"e\" $\n\r\t\f\vT \\u{317D}r\\u{1D11E}"); + expect(r.getKey()).toBe("key"); + }); }); From 353a317f919d213c109fa7744c2069c5e2ef578b Mon Sep 17 00:00:00 2001 From: Edwin Hoogerbeets Date: Wed, 5 Mar 2025 21:48:18 -0800 Subject: [PATCH 3/5] Fix the pre-push git hook - will deny a README file that mentions the debugger. Now only denies it if it finds "debugger;" with the semi-colon and only if the file path ends with ".js" --- git-hooks/pre-push | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git-hooks/pre-push b/git-hooks/pre-push index c2d0bc20c..dd9d5f7fe 100755 --- a/git-hooks/pre-push +++ b/git-hooks/pre-push @@ -1,9 +1,9 @@ #!/bin/bash -files=$(git diff --cached --name-only --diff-filter=ACM main) +files=$(git diff --cached --name-only --diff-filter=ACM main | grep '\.js$') # Check for debugger statements in JavaScript files -lines=$(grep -n 'debugger' $files) +lines=$(grep -n 'debugger;' $files) if [ "$lines" != "" ] then echo "Debugger statement found in:" From 534251f39d7463855fb0148543b2788fb79abcdc Mon Sep 17 00:00:00 2001 From: Edwin Hoogerbeets Date: Thu, 6 Mar 2025 11:00:52 -0800 Subject: [PATCH 4/5] Added changeset for ilib-loctool-regex --- .changeset/mighty-foxes-breathe.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .changeset/mighty-foxes-breathe.md diff --git a/.changeset/mighty-foxes-breathe.md b/.changeset/mighty-foxes-breathe.md new file mode 100644 index 000000000..369ca9f26 --- /dev/null +++ b/.changeset/mighty-foxes-breathe.md @@ -0,0 +1,9 @@ +--- +"ilib-loctool-regex": minor +--- + +- Added the ability to specify the escaping style for + strings that are extracted by the regular expressions + - supports all escaping styles published by + ilib-tools-common + - supports extra "none" style to turn off unescaping From 49c1a9eb29befc1ea950f288e49815badc1312f0 Mon Sep 17 00:00:00 2001 From: Edwin Hoogerbeets Date: Thu, 6 Mar 2025 11:36:01 -0800 Subject: [PATCH 5/5] Switch to specifying the escape style in each expression - The reason is that in many programming languages, some types of strings are unescaped differently than others, even within the same programming language. eg. double- vs. single-quoted strings in PHP --- packages/ilib-loctool-regex/README.md | 61 +++++++++++++------ packages/ilib-loctool-regex/RegexFile.js | 59 ++++++++++-------- .../ilib-loctool-regex/test/RegexFile.test.js | 43 +++++++++++-- 3 files changed, 112 insertions(+), 51 deletions(-) diff --git a/packages/ilib-loctool-regex/README.md b/packages/ilib-loctool-regex/README.md index cebed905f..ab3168580 100644 --- a/packages/ilib-loctool-regex/README.md +++ b/packages/ilib-loctool-regex/README.md @@ -48,14 +48,6 @@ used within the `regex` property: how to use path name templates. - sourceLocale - the locale of the source strings. This is the locale in which the strings are written in the source files. - - escapeStyle - the style of unescaping when collecting strings - for translations and the type of escaping to use when writing the - localized strings to the resource file. The valid styles incude - "csharp" and "js" (the default), as well as many others. The - full list of styles available is given in the documentation - for the [ilib-tools-common library](https://github.com/iLib-js/ilib-mono/blob/main/packages/ilib-tools-common/docs/ilibToolsCommon.md#escaperFactory). - In addition to the styles listed there, the escapeStyle setting - can also be set to "none" to disable escaping altogether. - expressions - an array of objects that document the regular expressions to use to extract strings from the source file and some additional information. The strings @@ -122,6 +114,14 @@ used within the `regex` property: quite long, but it is always unique. - "truncate" - use the first 32 characters of the source string as the key. This fixed-length key is usually unique. + - escapeStyle - the style of unescaping to use when the regular expression + matches. The valid styles incude + "csharp" and "js" (the default), as well as many others. The + full list of styles available is given in the documentation + for the [ilib-tools-common library](https://github.com/iLib-js/ilib-mono/blob/main/packages/ilib-tools-common/docs/ilibToolsCommon.md#escaperFactory). + In addition to the styles listed there, the escapeStyle setting + can also be set to "none" to disable escaping altogether for strings + that are extracted using this regular expression. ### Example Configuration @@ -143,29 +143,41 @@ Example configuration for a web project with PHP and JavaScript files: "sourceLocale": "en-US", "expressions": [ { - "expression": "translate\\s*(\\s*['\"](?[^'\"]*)['\"]\\s*\\)", + "expression": "translate\\s*(\\s*\"(?[^\"]*)\"\\s*\\)", "flags": "g", "datatype": "php", "resourceType": "string", - "keyStrategy": "source" + "keyStrategy": "source", + "escapeStyle": "php-double" }, { - "expression": "translate\\s*\\(\\s*['\"](?[^'\"]*)['\"]\\s*,\\s*['\"](?[^'\"]*)['\"]\\s*\\)", + "expression": "translate\\s*(\\s*'(?[^']*)'\\s*\\)", "flags": "g", "datatype": "php", - "resourceType": "string" + "resourceType": "string", + "keyStrategy": "source", + "escapeStyle": "php-single" }, { - "expression": "translateArray\\s*\\(\\s*\\[\\s*(?['\"][^'\"]*['\"](\\s*,\\s*['\"][^'\"]*['\"])*)\\s*\\]\\s*\\)", + "expression": "translate\\s*\\(\\s*\"(?[^\"]*)\"\\s*,\\s*\"(?[^\"]*)\"\\s*\\)", "flags": "g", "datatype": "php", - "resourceType": "array" + "resourceType": "string", + "escapeStyle": "php-double" }, { - "expression": "translatePlural\\s*\\(\\s*['\"](?[^'\"]*)['\"]\\s*,\\s*['\"](?[^'\"]*)['\"]", + "expression": "translateArray\\s*\\(\\s*\\[\\s*(?\"[^\"]*\"(\\s*,\\s*\"[^\"]*\")*)\\s*\\]\\s*\\)", "flags": "g", "datatype": "php", - "resourceType": "plural" + "resourceType": "array", + "escapeStyle": "php-double" + }, + { + "expression": "translatePlural\\s*\\(\\s*\"(?[^\"]*)\"]\\s*,\\s*\"(?[^\"]*)\"", + "flags": "g", + "datatype": "php", + "resourceType": "plural", + "escapeStyle": "php-double" } ] }, @@ -196,15 +208,21 @@ given regular expressions. Explanation of the above regexes: that are passed as the first parameter to the `translate` function. It will match a string like `translate("string to translate")`. Since the string does not have a unique id, one is generated using the `source` strategy. That is, the source -string itself is re-used as its own unique id. -1. The second regular expression extracts strings that are passed as the first +string itself is re-used as its own unique id. Note that this regular expression +extracts strings with double quotes around them. The `escapeStyle` setting is +used to specify that the `php-double` style should be used to unescape the string. +1. The second regular expression is similar to the first, but extracts strings +that use single quotes instead of double quotes. The `escapeStyle` setting is +used to specify that the `php-single` style should be used to unescape the string. +(Unescaping is different between single and double quoted strings in PHP.) +1. The third regular expression extracts strings that are passed as the first parameter to the `translate` function and the second parameter is the key of the string. It will match a string like `translate("string to translate", "unique.id")`. -1. The third regular expression is an example of an array translation. The +1. The fourth regular expression is an example of an array translation. The `source` capturing group will have a value like `"a", "b", "c"` which this plugin will transform into an array of 3 strings. This will match a string like `translateArray(["a", "b", "c"])`. -1. The fourth regular expression is an example of a plural translation. The +1. The fifth regular expression is an example of a plural translation. The first parameter to the `translatePlural` function is the singular string and is assigned to the `source` capturing group. The second parameter is the plural string and is assigned to the `sourcePlural` capturing group. This creates a plural @@ -233,6 +251,9 @@ the `hash` strategy. That is, the hash of the source string is calculated and prepended with an "r" for "resource" (eg. "r34523234") and that is used as the unique id for that string. +Note that the default escape style is `js` which is used when the `escapeStyle` +setting is not given, which is why it is not specified in the last mapping example. + ### Resource Type Field Mapping The `resourceType` setting for each mapping specifies the type of the diff --git a/packages/ilib-loctool-regex/RegexFile.js b/packages/ilib-loctool-regex/RegexFile.js index 23af7a7a4..bf77bb91b 100644 --- a/packages/ilib-loctool-regex/RegexFile.js +++ b/packages/ilib-loctool-regex/RegexFile.js @@ -20,12 +20,17 @@ var fs = require("fs"); var path = require("path"); var Locale = require("ilib-locale"); -var IString = require("ilib-istring"); var escaperFactory = require("ilib-tools-common").escaperFactory; -function identity(str) { - return str; -} +// fake escaper for the identity escaper +var identity = { + escape: function(str) { + return str; + }, + unescape: function(str) { + return str; + } +}; /** * Create a new Regex file with the given path name and within @@ -58,19 +63,19 @@ var RegexFile = function(props) { exp.regex = new RegExp(exp.expression, exp.flags); } exp.regex.lastIndex = 0; + + // set up the unescaper to use after we have found the strings. The same unescaper + // is used for all strings that match this expression. Escapers vary by expression + // because different types of strings might have different escaping rules, even within + // the same a programming language. + // (e.g. double quoted strings in PHP are escaped differently than single quoted strings) + var escapeStyle = exp.escapeStyle || "js"; + exp.escaper = escapeStyle !== "none" ? escaperFactory(escapeStyle) : identity; }); } this.resourceIndex = 0; - - // set up the unescaper to use after we have found the strings. The same unescaper - // is used for all strings that match the same mapping. If this.escaper is undefined, - // then the strings are not unescaped. - var escapeStyle = (this.mapping && this.mapping.escapeStyle) || "js"; - this.escaper = escapeStyle !== "none" ? escaperFactory(escapeStyle) : identity; }; -var reUnicodeChar = /\\u([a-fA-F0-9]{1,4})/g; - /** * If the given string is surrounded by quotes, remove the quotes. * Otherwise, return the string unchanged. @@ -97,11 +102,12 @@ function stripQuotes(str) { * code but increases matching. * * @param {String} string the string to clean + * @param {Escaper} escaper the escaper to use to unescape * @returns {String} the cleaned string */ -RegexFile.prototype.cleanString = function(string) { +RegexFile.prototype.cleanString = function(string, escaper) { if (!string) return string; - var unescaped = this.escaper.unescape(string); + var unescaped = escaper.unescape(string); unescaped = unescaped. replace(/\\[btnfr]/g, " "). @@ -131,15 +137,16 @@ RegexFile.prototype.makeKey = function(source) { * the array of strings as an actual array. * * @param {String} data the string to parse + * @param {Escaper} escaper the escaper to use to unescape the strings * @returns {Array.} the array of strings */ -RegexFile.prototype.parseArray = function(data) { +RegexFile.prototype.parseArray = function(data, escaper) { var arr; if (data) { arr = data.split(","); arr = arr.map(function(item) { - return stripQuotes(this.escaper.unescape(item).trim()); + return stripQuotes(escaper.unescape(item).trim()); }.bind(this)); } @@ -193,26 +200,26 @@ RegexFile.prototype.matchExpression = function(data, exp, cb) { if (result.groups) { if (result.groups.sourcePlural) { - sourcePlural = this.escaper.unescape(result.groups.sourcePlural); + sourcePlural = exp.escaper.unescape(result.groups.sourcePlural); } if (result.groups.comment) { - comment = this.escaper.unescape(result.groups.comment); + comment = exp.escaper.unescape(result.groups.comment); } if (result.groups.context) { - context = this.escaper.unescape(result.groups.context); + context = exp.escaper.unescape(result.groups.context); } if (result.groups.flavor) { - flavor = this.escaper.unescape(result.groups.flavor); + flavor = exp.escaper.unescape(result.groups.flavor); } if (result.groups.key) { // clean string unescapes the key, but also removes things // that foster greater matching, like compressing white space - key = this.cleanString(result.groups.key); + key = this.cleanString(result.groups.key, exp.escaper); } } if (exp.resourceType === "array") { - array = this.parseArray(source); + array = this.parseArray(source, exp.escaper); } if (!key) { @@ -221,10 +228,10 @@ RegexFile.prototype.matchExpression = function(data, exp, cb) { switch (exp.resourceType) { default: case "string": - src = this.escaper.unescape(source); + src = exp.escaper.unescape(source); break; case "plural": - src = this.escaper.unescape(sourcePlural); + src = exp.escaper.unescape(sourcePlural); break; case "array": src = array.join(""); @@ -247,7 +254,7 @@ RegexFile.prototype.matchExpression = function(data, exp, cb) { switch (exp.resourceType) { case "string": - source = this.escaper.unescape(source); + source = exp.escaper.unescape(source); r = this.API.newResource({ resType: exp.resourceType, project: this.project.getProjectId(), @@ -272,7 +279,7 @@ RegexFile.prototype.matchExpression = function(data, exp, cb) { sourceLocale: this.project.sourceLocale, source: source, sourcePlurals: { - one: this.escaper.unescape(source), + one: exp.escaper.unescape(source), other: sourcePlural }, pathName: this.pathName, diff --git a/packages/ilib-loctool-regex/test/RegexFile.test.js b/packages/ilib-loctool-regex/test/RegexFile.test.js index c7ddee22d..90901b33e 100644 --- a/packages/ilib-loctool-regex/test/RegexFile.test.js +++ b/packages/ilib-loctool-regex/test/RegexFile.test.js @@ -94,7 +94,6 @@ var p = new CustomProject({ "resourceFileType": "javascript", "template": "resources/Translation[locale].json", "sourceLocale": "en-US", - "escapeStyle": "smarty", "expressions": [ { // example: @@ -102,7 +101,8 @@ var p = new CustomProject({ "expression": "\\{\\*.*@L10N\\s*(?[^*]*?)\\s*\\*\\}.*\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'.*\\}", "flags": "g", "datatype": "template", - "resourceType": "string" + "resourceType": "string", + "escapeStyle": "smarty" }, { // example: @@ -111,15 +111,26 @@ var p = new CustomProject({ "expression": "\\{\\*.*@L10N\\s*(?[^*]*?)\\s*\\*\\}.*\\n.*\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'.*\\}", "flags": "g", "datatype": "template", - "resourceType": "string" + "resourceType": "string", + "escapeStyle": "smarty" }, { // example: // {'Your password was changed. Please log in again.'|f:'login_success_password_changed'} - "expression": "\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'.*\\}", + "expression": "\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'\\s*\\}", "flags": "g", "datatype": "template", - "resourceType": "string" + "resourceType": "string", + "escapeStyle": "smarty" + }, + { + // example: + // {'Your password was changed. Please log in again.'|f:'login_success_password_changed'|noescape} + "expression": "\\{.*'(?[^']*)'\\s*\\|\\s*f:\\s*'(?[^']*)'\\s*\\|\\s*noescape\\s*\\}", + "flags": "g", + "datatype": "template", + "resourceType": "string", + "escapeStyle": "none" } ] } @@ -914,4 +925,26 @@ describe("regex file tests", function() { expect(r.getSource()).toBe("abc \"e\" $\n\r\t\f\vT \\u{317D}r\\u{1D11E}"); expect(r.getKey()).toBe("key"); }); + + test("RegexFile does not unescape the string if the escapeStyle is set to none", function() { + expect.assertions(5); + + var rf = new RegexFile({ + project: p, + pathName: "./testfiles/templates/t1.tmpl", + type: rft + }); + expect(rf).toBeTruthy(); + + rf.parse("{\'abc \\\"e\\\" \\$\\n\\r\\t\\f\\vT \\u{317D}r\\u{1D11E}\'|f:\'key\'|noescape}"); + + var set = rf.getTranslationSet(); + expect(set).toBeTruthy(); + + // noescape means don't unescape anything + var r = set.getBySource("abc \\\"e\\\" \\$\\n\\r\\t\\f\\vT \\u{317D}r\\u{1D11E}"); + expect(r).toBeTruthy(); + expect(r.getSource()).toBe("abc \\\"e\\\" \\$\\n\\r\\t\\f\\vT \\u{317D}r\\u{1D11E}"); + expect(r.getKey()).toBe("key"); + }); });