diff --git a/CHANGELOG.md b/CHANGELOG.md index 81d744c..e0a8a38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Change Log +## 1.0.3 + +* Special handling for em dashes: In smart spacing mode, em dashes become " - " instead of "--" when found between words. + ## 1.0.2 * German mode now works with combining umlaut (diaeresis). diff --git a/package.json b/package.json index 10db031..c84e9b9 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "transliteration", "emoji" ], - "version": "1.0.2", + "version": "1.0.3", "dependencies": {}, "repository": "https://github.com/kshetline/unidecode-plus.git", "author": "Kerry Shetline ", diff --git a/test/unidecode.mocha.js b/test/unidecode.mocha.js index 7af0e93..2c16163 100644 --- a/test/unidecode.mocha.js +++ b/test/unidecode.mocha.js @@ -81,6 +81,13 @@ describe('Smart spacing', function() { it('should replace an em-dash straddled by word characters with " - " instead of "--"', function() { assert.equal( unidecode("No—I mean yes!", { smartSpacing: true }), "No - I mean yes!"); + assert.equal( + unidecode("#—#", { smartSpacing: true }), "#--#"); + }); + + it('should leave ASCII double dashes unchanged', function() { + assert.equal( + unidecode("No--I mean yes!", { smartSpacing: true }), "No--I mean yes!"); }); it('should handle deferred smart spacing', function() { diff --git a/unidecode.js b/unidecode.js index 4a03bdb..29e819e 100644 --- a/unidecode.js +++ b/unidecode.js @@ -69,6 +69,7 @@ function unidecode_internal_replace(ch) { var high = cp >> 8; var row = high + (high === 0 && german ? 0.5 : 0); var low = cp & 0xFF; + var emDash = cp === 0x2014; // This doesn't cover all emoji, just those currently defined. var emoji = (high === 0x1F4 || high === 0x1F6 || high === 0x1F9); @@ -96,6 +97,8 @@ function unidecode_internal_replace(ch) { ch = tr[row][low]; + if (smartSpacing && emDash) + return '\x80--\x80'; if (!smartSpacing || ch === '[?]' || ch === '_' || /^\w+$/.test(ch)) return ch; else if (emoji) @@ -106,13 +109,13 @@ function unidecode_internal_replace(ch) { function resolveSpacing(str) { return str + .replace(/(\w)(\x80--\x80)(\w)/g, function(_, p1, _2, p3) { return p1 + ' - ' + p3; }) .replace(/\x80(?!\w)/g, "") .replace(/\x80\x80|(\w)\x80/g, "$1\x81") .replace(/\x80/g, "") .replace(/^\x81+|\x81+$/g, "") .replace(/\x81 \x81/g, " ") - .replace(/\s?\x81+/g, " ") - .replace(/(\w)(--)(\w)/g, (_, p1, _2, p3) => `${p1} - ${p3}`); + .replace(/\s?\x81+/g, " "); } module.exports.resolveSpacing = resolveSpacing;