|
|
//Tokenizer.js
|
|
|
function Tokenizer(cbs) {
|
|
|
this._state = "TEXT";
|
|
|
this._buffer = "";
|
|
|
this._sectionStart = 0;
|
|
|
this._index = 0;
|
|
|
this._cbs = cbs;
|
|
|
}
|
|
|
Tokenizer.prototype.TEXT = function(c) {
|
|
|
var index = this._buffer.indexOf("<", this._index);
|
|
|
if (index != -1) {
|
|
|
this._index = index;
|
|
|
this._cbs.ontext(this._getSection());
|
|
|
this._state = "BeforeTag";
|
|
|
this._sectionStart = this._index;
|
|
|
} else this._index = this._buffer.length;
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeTag = function(c) {
|
|
|
switch (c) {
|
|
|
case "/":
|
|
|
this._state = "BeforeCloseTag";
|
|
|
break;
|
|
|
case "!":
|
|
|
this._state = "BeforeDeclaration";
|
|
|
break;
|
|
|
case "?":
|
|
|
let index = this._buffer.indexOf(">", this._index);
|
|
|
if (index != -1) {
|
|
|
this._index = index;
|
|
|
this._sectionStart = this._index + 1;
|
|
|
} else this._sectionStart = this._index = this._buffer.length;
|
|
|
this._state = "TEXT";
|
|
|
break;
|
|
|
case ">":
|
|
|
this._state = "TEXT";
|
|
|
break;
|
|
|
case "<":
|
|
|
this._cbs.ontext(this._getSection());
|
|
|
this._sectionStart = this._index;
|
|
|
break;
|
|
|
default:
|
|
|
if (/\s/.test(c)) this._state = "TEXT";
|
|
|
else {
|
|
|
this._state = "InTag";
|
|
|
this._sectionStart = this._index;
|
|
|
}
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.InTag = function(c) {
|
|
|
if (c === "/" || c === ">" || /\s/.test(c)) {
|
|
|
this._cbs.onopentagname(this._getSection());
|
|
|
this._state = "BeforeAttrsName";
|
|
|
this._index--;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeAttrsName = function(c) {
|
|
|
if (c === ">") {
|
|
|
this._cbs.onopentagend();
|
|
|
this._state = "TEXT";
|
|
|
this._sectionStart = this._index + 1;
|
|
|
} else if (c === "/") {
|
|
|
this._state = "InSelfCloseTag";
|
|
|
} else if (!(/\s/.test(c))) {
|
|
|
this._state = "InAttrsName";
|
|
|
this._sectionStart = this._index;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.InAttrsName = function(c) {
|
|
|
if (c === "=" || c === "/" || c === ">" || /\s/.test(c)) {
|
|
|
this._cbs._attribname = this._getSection().toLowerCase();
|
|
|
this._sectionStart = -1;
|
|
|
this._state = "AfterAttrsName";
|
|
|
this._index--;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.AfterAttrsName = function(c) {
|
|
|
if (c === "=") {
|
|
|
this._state = "BeforeAttrsValue";
|
|
|
} else if (c === "/" || c === ">") {
|
|
|
this._cbs.onattribend();
|
|
|
this._state = "BeforeAttrsName";
|
|
|
this._index--;
|
|
|
} else if (!(/\s/.test(c))) {
|
|
|
this._cbs.onattribend();
|
|
|
this._state = "InAttrsName";
|
|
|
this._sectionStart = this._index;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeAttrsValue = function(c) {
|
|
|
if (c === '"') {
|
|
|
this._state = "InAttrsValueDQ";
|
|
|
this._sectionStart = this._index + 1;
|
|
|
} else if (c === "'") {
|
|
|
this._state = "InAttrsValueSQ";
|
|
|
this._sectionStart = this._index + 1;
|
|
|
} else if (!(/\s/.test(c))) {
|
|
|
this._state = "InAttrsValueNQ";
|
|
|
this._sectionStart = this._index;
|
|
|
this._index--;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.InAttrsValueDQ = function(c) {
|
|
|
if (c === '"') {
|
|
|
this._cbs._attribvalue += this._getSection();
|
|
|
this._cbs.onattribend();
|
|
|
this._state = "BeforeAttrsName";
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.InAttrsValueSQ = function(c) {
|
|
|
if (c === "'") {
|
|
|
this._cbs._attribvalue += this._getSection();
|
|
|
this._cbs.onattribend();
|
|
|
this._state = "BeforeAttrsName";
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.InAttrsValueNQ = function(c) {
|
|
|
if (/\s/.test(c) || c === ">") {
|
|
|
this._cbs._attribvalue += this._getSection();
|
|
|
this._cbs.onattribend();
|
|
|
this._state = "BeforeAttrsName";
|
|
|
this._index--;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeCloseTag = function(c) {
|
|
|
if (/\s/.test(c));
|
|
|
else if (c === ">") {
|
|
|
this._state = "TEXT";
|
|
|
} else {
|
|
|
this._state = "InCloseTag";
|
|
|
this._sectionStart = this._index;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.InCloseTag = function(c) {
|
|
|
if (c === ">" || /\s/.test(c)) {
|
|
|
this._cbs.onclosetag(this._getSection());
|
|
|
this._state = "AfterCloseTag";
|
|
|
this._index--;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.InSelfCloseTag = function(c) {
|
|
|
if (c === ">") {
|
|
|
this._cbs.onopentagend();
|
|
|
this._state = "TEXT";
|
|
|
this._sectionStart = this._index + 1;
|
|
|
} else if (!(/\s/.test(c))) {
|
|
|
this._state = "BeforeAttrsName";
|
|
|
this._index--;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.AfterCloseTag = function(c) {
|
|
|
if (c === ">") {
|
|
|
this._state = "TEXT";
|
|
|
this._sectionStart = this._index + 1;
|
|
|
}
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeDeclaration = function(c) {
|
|
|
if (c == '-') this._state = "InComment";
|
|
|
else if (c == '[') this._state = "BeforeCDATA1";
|
|
|
else this._state = "InDeclaration";
|
|
|
};
|
|
|
Tokenizer.prototype.InDeclaration = function(c) {
|
|
|
var index = this._buffer.indexOf(">", this._index);
|
|
|
if (index != -1) {
|
|
|
this._index = index;
|
|
|
this._sectionStart = index + 1;
|
|
|
} else this._sectionStart = this._index = this._buffer.length;
|
|
|
this._state = "TEXT";
|
|
|
};
|
|
|
Tokenizer.prototype.InComment = function(c) {
|
|
|
let key = (c == '-' ? '-->' : '>');
|
|
|
let index = this._buffer.indexOf(key, this._index);
|
|
|
if (index != -1) {
|
|
|
this._index = index + key.length - 1;
|
|
|
this._sectionStart = this._index + 1;
|
|
|
} else this._sectionStart = this._index = this._buffer.length;
|
|
|
this._state = "TEXT";
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeCDATA1 = function(c) {
|
|
|
if (c == 'C') this._state = "BeforeCDATA2";
|
|
|
else this._state = "InDeclaration";
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeCDATA2 = function(c) {
|
|
|
if (c == 'D') this._state = "BeforeCDATA3";
|
|
|
else this._state = "InDeclaration";
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeCDATA3 = function(c) {
|
|
|
if (c == 'A') this._state = "BeforeCDATA4";
|
|
|
else this._state = "InDeclaration";
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeCDATA4 = function(c) {
|
|
|
if (c == 'T') this._state = "BeforeCDATA5";
|
|
|
else this._state = "InDeclaration";
|
|
|
};
|
|
|
Tokenizer.prototype.BeforeCDATA5 = function(c) {
|
|
|
if (c == 'A') this._state = "InCDATA";
|
|
|
else this._state = "InDeclaration";
|
|
|
};
|
|
|
Tokenizer.prototype.InCDATA = function(c) {
|
|
|
let key = (c == '[' ? ']]>' : '>');
|
|
|
let index = this._buffer.indexOf(key, this._index);
|
|
|
if (index != -1) {
|
|
|
this._index = index + key.length - 1;
|
|
|
this._sectionStart = this._index + 1;
|
|
|
} else this._sectionStart = this._index = this._buffer.length;
|
|
|
this._state = "TEXT";
|
|
|
};
|
|
|
Tokenizer.prototype.parse = function(chunk) {
|
|
|
this._buffer += chunk;
|
|
|
for (; this._index < this._buffer.length; this._index++)
|
|
|
this[this._state](this._buffer[this._index]);
|
|
|
if (this._state === "TEXT" && this._sectionStart !== this._index)
|
|
|
this._cbs.ontext(this._buffer.substr(this._sectionStart));
|
|
|
this._cbs.onend();
|
|
|
};
|
|
|
Tokenizer.prototype._getSection = function() {
|
|
|
return this._buffer.substring(this._sectionStart, this._index);
|
|
|
};
|
|
|
module.exports = Tokenizer; |
|
|
\ No newline at end of file |
...
|
...
|
|