Skip to content

Commit 3fad2bc

Browse files
committed
lexing, revisited with regexes
1 parent 663aa2f commit 3fad2bc

2 files changed

Lines changed: 16 additions & 57 deletions

File tree

src/jsonselect.js

Lines changed: 15 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
/*! Copyright (c) 2011, Lloyd Hilaiel, ISC License */
22
(function() {
33
var w = window;
4-
var jsonParse = (w.JSON ? w.JSON.parse : w.eval);
4+
var jp = (w.JSON ? w.JSON.parse : w.eval);
5+
function jsonParse(s) { try { return jp(s); } catch(e) { te("ijs"); }; }
56

67
// emitted error codes. Strip this table for an, uh, "optimized build"
78
var _es = {}; // overshadow any globals when the table is stripped
@@ -30,60 +31,20 @@
3031
str: 4, // string
3132
};
3233

33-
var tPat = /^(?:string|boolean|null|array|object|number)/;
34-
var pcPat = /^(?:root|first-child|last-child|only-child)/;
35-
var jsPat = /^(?:\"(?:[^\\]|\\[^\"])*\")/;
36-
var iPat = /^(?:[_a-zA-Z]|[^\0-\0177]|\\[^\r\n\f0-9a-fA-F])(?:[_a-zA-Z0-9-]|[^\u0000-\u0177]|(?:\\[^\r\n\f0-9a-fA-F]))*/;
34+
var pat = /^(?:([\r\n\t\ ]+)|([*#,>])|(string|boolean|null|array|object|number)|(:(?:root|first-child|last-child|only-child))|(:\w+)|(\"(?:[^\\]|\\[^\"])*\")|(\")|((?:[_a-zA-Z]|[^\0-\0177]|\\[^\r\n\f0-9a-fA-F])(?:[_a-zA-Z0-9-]|[^\u0000-\u0177]|(?:\\[^\r\n\f0-9a-fA-F]))*))/;
3735
var lex = function (str, off) {
38-
if (off == undefined) off = 0;
39-
while (str.length > off) {
40-
switch(str.charCodeAt(off)) {
41-
// for simple 1 char tokens, we'll let them represent themselves.
42-
case 0x23: case 0x2a: case 0x2c:
43-
case 0x3e: case 0x7e:
44-
return [off+1, str.charAt(off)];
45-
// whitespace: space, nl, tab, cr, represented as the space char
46-
case 0x20: case 0x0a: case 0x0d: case 0x09:
47-
do { off++; } while (off < str.length && "\t\r\n ".indexOf(str.charAt(off)) !== -1);
48-
return [off, " "];
49-
// colon ':' indicates psuedo class
50-
case 0x3a:
51-
var m;
52-
var ss = str.substr(off+1);
53-
if (m = pcPat.exec(ss)) {
54-
return [off + 1 + m[0].length, toks.psc, ":" + m[0]];
55-
} else if (false) {
56-
te("pcny");
57-
}
58-
te("upc");
59-
// quote '"' indicates embedded JSON string
60-
case 0x22:
61-
var m;
62-
if (m = jsPat.exec(str.substr(off))) {
63-
try {
64-
// using JSON parsing directly here is bad, it kills our
65-
// portability. Can we safely use eval considering we know
66-
// this is a value enclosed in quotes?
67-
return [off + m[0].length, toks.str, jsonParse(m[0])];
68-
} catch(e) {
69-
te("ijs");
70-
}
71-
}
72-
te("ujs");
73-
default:
74-
var m;
75-
var ss = str.substr(off);
76-
// test for types
77-
if (m = tPat.exec(ss)) {
78-
return [off + m[0].length, toks.typ, m[0]];
79-
}
80-
// test for idents
81-
else if (m = iPat.exec(ss)) {
82-
return [off + m[0].length, toks.str, m[0].replace(/\\([^\r\n\f0-9a-fA-F])/g,"$1")];
83-
}
84-
te("uc");
85-
}
86-
}
36+
if (!off) off = 0;
37+
var m = pat.exec(str.substr(off));
38+
if (!m) return undefined;
39+
off+=m[0].length;
40+
if (m[1]) return [off, " "];
41+
if (m[2]) return [off, m[0]];
42+
else if (m[3]) return [off, toks.typ, m[0]];
43+
else if (m[4]) return [off, toks.psc, m[0]];
44+
else if (m[5]) te("upc");
45+
else if (m[6]) return [off, toks.str, jsonParse(m[0])];
46+
else if (m[7]) te("ujs");
47+
else if (m[8]) return [off, toks.str, m[0].replace(/\\([^\r\n\f0-9a-fA-F])/g,"$1")];
8748
};
8849

8950
// THE PARSER

src/test/lex_test.html

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ <h2> Tests of the JSONSelect lexer </h2>
2020
<pre class="doctest">
2121
$ JSONSelect._lex(">");
2222
[1, ">"]
23-
$ JSONSelect._lex("~");
24-
[1, "~"]
2523
$ JSONSelect._lex("*");
2624
[1, "*"]
2725
$ JSONSelect._lex(",");
@@ -93,7 +91,7 @@ <h2> Tests of the JSONSelect lexer </h2>
9391
[13, 4, "foo bar baz"]
9492
$ JSONSelect._lex('"\\u0020"');
9593
[8, 4, " "]
96-
$ JSONSelect._lex('\"not terminated');E
94+
$ JSONSelect._lex('\"not terminated');
9795
Error: unclosed json string
9896
$ JSONSelect._lex('"invalid escape: \\y"');
9997
Error: invalid json string

0 commit comments

Comments
 (0)