From 93f3b2b114877af17db219e501ae4551df61738d Mon Sep 17 00:00:00 2001 From: "Alex Lam S.L" Date: Mon, 11 Dec 2017 01:15:44 +0800 Subject: escape consecutive unpaired surrogates (#2576) fixes #2569 --- lib/output.js | 15 ++++++++++----- lib/parse.js | 12 ++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/output.js b/lib/output.js index 1aa63450..a4c41f11 100644 --- a/lib/output.js +++ b/lib/output.js @@ -121,11 +121,16 @@ function OutputStream(options) { } }); } : function(str) { - return str.replace(/[\ud800-\udbff](?![\udc00-\udfff])/g, function(ch) { - return "\\u" + ch.charCodeAt(0).toString(16); - }).replace(/(^|[^\ud800-\udbff])([\udc00-\udfff])/g, function(match, prefix, ch) { - return prefix + "\\u" + ch.charCodeAt(0).toString(16); - }); + var s = ""; + for (var i = 0, len = str.length; i < len; i++) { + if (is_surrogate_pair_head(str[i]) && !is_surrogate_pair_tail(str[i + 1]) + || is_surrogate_pair_tail(str[i]) && !is_surrogate_pair_head(str[i - 1])) { + s += "\\u" + str.charCodeAt(i).toString(16); + } else { + s += str[i]; + } + } + return s; }; function make_string(str, quote) { diff --git a/lib/parse.js b/lib/parse.js index 099fc49a..f0098c75 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -132,6 +132,18 @@ function is_letter(code) { || (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code))); }; +function is_surrogate_pair_head(code) { + if (typeof code == "string") + code = code.charCodeAt(0); + return code >= 0xd800 && code <= 0xdbff; +} + +function is_surrogate_pair_tail(code) { + if (typeof code == "string") + code = code.charCodeAt(0); + return code >= 0xdc00 && code <= 0xdfff; +} + function is_digit(code) { return code >= 48 && code <= 57; }; -- cgit v1.2.3