aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAnthony Van de Gejuchte <anthonyvdgent@gmail.com>2016-06-13 12:36:47 +0200
committerAnthony Van de Gejuchte <anthonyvdgent@gmail.com>2016-06-30 22:12:50 +0200
commita97690fc724a7beba77d7fde449ea56676804933 (patch)
tree10a19039600293238e1b113c78d81ee9219a64d2 /lib
parent02c638209ee22816b1324ff0c0f47b27db1336af (diff)
downloadtracifyjs-a97690fc724a7beba77d7fde449ea56676804933.tar.gz
tracifyjs-a97690fc724a7beba77d7fde449ea56676804933.zip
Various LineTerminator changes
* Escaped newlines should also produce SyntaxError * Fix multiline comment parsing and add tests * Adapt makePredicate to handle \u2028 and \u2029 * Move up nlb check in regex so it's checked before any escape handling * Change error messages to conform ecma standard * Find_eol not recornizing \u2028 and \u2029 as line terminator * Remove \u180e as it is removed in unicode 6.3.0 from the category zs
Diffstat (limited to 'lib')
-rw-r--r--lib/parse.js46
-rw-r--r--lib/utils.js13
2 files changed, 31 insertions, 28 deletions
diff --git a/lib/parse.js b/lib/parse.js
index c7089b2d..bfbd14d5 100644
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -107,7 +107,9 @@ var OPERATORS = makePredicate([
"||"
]);
-var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000\uFEFF"));
+var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\uFEFF"));
+
+var NEWLINE_CHARS = makePredicate(characters("\n\r\u2028\u2029"));
var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
@@ -234,7 +236,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
var ch = S.text.charAt(S.pos++);
if (signal_eof && !ch)
throw EX_EOF;
- if ("\r\n\u2028\u2029".indexOf(ch) >= 0) {
+ if (NEWLINE_CHARS(ch)) {
S.newline_before = S.newline_before || !in_string;
++S.line;
S.col = 0;
@@ -261,7 +263,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
var text = S.text;
for (var i = S.pos, n = S.text.length; i < n; ++i) {
var ch = text[i];
- if (ch == '\n' || ch == '\r')
+ if (NEWLINE_CHARS(ch))
return i;
}
return -1;
@@ -313,8 +315,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
};
function skip_whitespace() {
- var ch;
- while (WHITESPACE_CHARS(ch = peek()) || ch == "\u2028" || ch == "\u2029")
+ while (WHITESPACE_CHARS(peek()))
next();
};
@@ -352,7 +353,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
if (!isNaN(valid)) {
return token("num", valid);
} else {
- parse_error("Invalid syntax: " + num);
+ parse_error("SyntaxError: Invalid syntax: " + num);
}
};
@@ -400,18 +401,18 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
for (; n > 0; --n) {
var digit = parseInt(next(true), 16);
if (isNaN(digit))
- parse_error("Invalid hex-character pattern in string");
+ parse_error("SyntaxError: Invalid hex-character pattern in string");
num = (num << 4) | digit;
}
return num;
};
- var read_string = with_eof_error("Unterminated string constant", function(quote_char){
+ var read_string = with_eof_error("SyntaxError: Unterminated string constant", function(quote_char){
var quote = next(), ret = "";
for (;;) {
var ch = next(true, true);
if (ch == "\\") ch = read_escaped_char(true);
- else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) parse_error("Unterminated string constant");
+ else if (NEWLINE_CHARS(ch)) parse_error("SyntaxError: Unterminated string constant");
else if (ch == quote) break;
ret += ch;
}
@@ -436,21 +437,14 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
return next_token;
};
- var skip_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
+ var skip_multiline_comment = with_eof_error("SyntaxError: Unterminated multiline comment", function(){
var regex_allowed = S.regex_allowed;
var i = find("*/", true);
- var text = S.text.substring(S.pos, i).replace(/\r\n|\r/g, '\n');
- var a = text.split("\n"), n = a.length;
+ var text = S.text.substring(S.pos, i).replace(/\r\n|\r|\u2028|\u2029/g, '\n');
// update stream position
- S.pos = i + 2;
- S.line += n - 1;
- if (n > 1) S.col = a[n - 1].length;
- else S.col += a[n - 1].length;
- S.col += 2;
- var nlb = S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
+ forward(text.length /* doesn't count \r\n as 2 char while S.pos - i does */ + 2);
S.comments_before.push(token("comment2", text, true));
S.regex_allowed = regex_allowed;
- S.newline_before = nlb;
return next_token;
});
@@ -463,9 +457,9 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
else break;
}
else {
- if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
+ if (ch != "u") parse_error("SyntaxError: Expecting UnicodeEscapeSequence -- uXXXX");
ch = read_escaped_char();
- if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
+ if (!is_identifier_char(ch)) parse_error("SyntaxError: Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
name += ch;
backslash = false;
}
@@ -477,9 +471,11 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
return name;
};
- var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
+ var read_regexp = with_eof_error("SyntaxError: Unterminated regular expression", function(regexp){
var prev_backslash = false, ch, in_class = false;
- while ((ch = next(true))) if (prev_backslash) {
+ while ((ch = next(true))) if (NEWLINE_CHARS(ch)) {
+ parse_error("SyntaxError: Unexpected line terminator");
+ } else if (prev_backslash) {
regexp += "\\" + ch;
prev_backslash = false;
} else if (ch == "[") {
@@ -492,8 +488,6 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
break;
} else if (ch == "\\") {
prev_backslash = true;
- } else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) {
- parse_error("Unexpected line terminator");
} else {
regexp += ch;
}
@@ -602,7 +596,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
}
break;
}
- parse_error("Unexpected character '" + ch + "'");
+ parse_error("SyntaxError: Unexpected character '" + ch + "'");
};
next_token.context = function(nc) {
diff --git a/lib/utils.js b/lib/utils.js
index 78c6dbf7..8ef61936 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -227,10 +227,19 @@ function makePredicate(words) {
}
cats.push([words[i]]);
}
+ function quote(word) {
+ return JSON.stringify(word).replace(/[\u2028\u2029]/g, function(s) {
+ switch (s) {
+ case "\u2028": return "\\u2028";
+ case "\u2029": return "\\u2029";
+ }
+ return s;
+ });
+ }
function compareTo(arr) {
- if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
+ if (arr.length == 1) return f += "return str === " + quote(arr[0]) + ";";
f += "switch(str){";
- for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
+ for (var i = 0; i < arr.length; ++i) f += "case " + quote(arr[i]) + ":";
f += "return true}return false;";
}
// When there are more than three length categories, an outer