Token: Rewrite using classes, initial range implementation
This commit is contained in:
parent
fea3ba16a9
commit
cba7f2b58f
3 changed files with 383 additions and 134 deletions
|
@ -47,7 +47,7 @@ export class Highlighter {
|
||||||
}
|
}
|
||||||
case State.Plain: {
|
case State.Plain: {
|
||||||
switch (this.consumeNextTokenType()) {
|
switch (this.consumeNextTokenType()) {
|
||||||
case Type.Character: this.currentNode.content += this.currentTokenOfType(Type.Character).data; break;
|
case Type.Character: this.currentNode.content += this.currentTokenOfType(CharacterToken).data; break;
|
||||||
default:
|
default:
|
||||||
this.emitNode(this.currentNode);
|
this.emitNode(this.currentNode);
|
||||||
this.reconsumeIn(State.Undefined);
|
this.reconsumeIn(State.Undefined);
|
||||||
|
@ -56,15 +56,15 @@ export class Highlighter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case State.StartTag: {
|
case State.StartTag: {
|
||||||
switch (this.consumeNextTokenOfType(Type.StartTag).name) {
|
switch (this.consumeNextTokenOfType(StartTagToken).name) {
|
||||||
case 'script': this.returnState = State.BeforeScript; break;
|
case 'script': this.returnState = State.BeforeScript; break;
|
||||||
default: this.returnState = State.Undefined; break;
|
default: this.returnState = State.Undefined; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: `<` });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: `<` });
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: this.currentTokenOfType(Type.StartTag).name });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: this.currentTokenOfType(StartTagToken).name });
|
||||||
|
|
||||||
if (this.currentTokenOfType(Type.StartTag).attributes.nonEmpty()) {
|
if (this.currentTokenOfType(StartTagToken).attributes.nonEmpty()) {
|
||||||
this.emitSpace({ line: 0, character: 0 });
|
this.emitSpace({ line: 0, character: 0 });
|
||||||
this.reconsumeIn(State.Attributes);
|
this.reconsumeIn(State.Attributes);
|
||||||
}
|
}
|
||||||
|
@ -77,7 +77,7 @@ export class Highlighter {
|
||||||
}
|
}
|
||||||
case State.EndTag: {
|
case State.EndTag: {
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '</' });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '</' });
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: this.consumeNextTokenOfType(Type.EndTag).name });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: this.consumeNextTokenOfType(EndTagToken).name });
|
||||||
|
|
||||||
this.reconsumeIn(State.AfterAttributes);
|
this.reconsumeIn(State.AfterAttributes);
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ export class Highlighter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case State.Attributes: {
|
case State.Attributes: {
|
||||||
const attributes = this.consumeNextTokenOfEitherType(Type.StartTag, Type.EndTag).attributes.list;
|
const attributes = this.consumeNextTokenOfEitherType(StartTagToken, EndTagToken).attributes.list;
|
||||||
|
|
||||||
for (let i = 0; i < attributes.length; i++) {
|
for (let i = 0; i < attributes.length; i++) {
|
||||||
const attribute = attributes[i];
|
const attribute = attributes[i];
|
||||||
|
@ -103,12 +103,13 @@ export class Highlighter {
|
||||||
case State.AfterAttributes: {
|
case State.AfterAttributes: {
|
||||||
switch (this.consumeNextTokenType()) {
|
switch (this.consumeNextTokenType()) {
|
||||||
case Type.StartTag:
|
case Type.StartTag:
|
||||||
if (this.currentTokenOfType(Type.StartTag).selfClosing === undefined) {
|
// FIXME: StartTagToken does not support selfClosing as of now
|
||||||
|
// if (this.currentTokenOfType(StartTagToken).selfClosing === undefined) {
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '>' });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '>' });
|
||||||
} else {
|
// } else {
|
||||||
this.emitSpace({ line: 0, character: 0 });
|
// this.emitSpace({ line: 0, character: 0 });
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '/>' });
|
// this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '/>' });
|
||||||
}
|
// }
|
||||||
break;
|
break;
|
||||||
case Type.EndTag:
|
case Type.EndTag:
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '>' });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '>' });
|
||||||
|
@ -132,7 +133,7 @@ export class Highlighter {
|
||||||
}
|
}
|
||||||
case State.Script: {
|
case State.Script: {
|
||||||
switch (this.consumeNextTokenType()) {
|
switch (this.consumeNextTokenType()) {
|
||||||
case Type.Character: this.currentNode.content += this.currentTokenOfType(Type.Character).data; break;
|
case Type.Character: this.currentNode.content += this.currentTokenOfType(CharacterToken).data; break;
|
||||||
default:
|
default:
|
||||||
this.emitNode(this.currentNode);
|
this.emitNode(this.currentNode);
|
||||||
this.reconsumeIn(State.Undefined);
|
this.reconsumeIn(State.Undefined);
|
||||||
|
@ -141,7 +142,7 @@ export class Highlighter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case State.DOCTYPE: {
|
case State.DOCTYPE: {
|
||||||
const doctype = this.consumeNextTokenOfType(Type.DOCTYPE);
|
const doctype = this.consumeNextTokenOfType(DOCTYPEToken);
|
||||||
|
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '<!' });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '<!' });
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: 'DOCTYPE' });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: 'DOCTYPE' });
|
||||||
|
@ -157,7 +158,7 @@ export class Highlighter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case State.Comment:
|
case State.Comment:
|
||||||
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Comment, content: `<!--${this.consumeNextTokenOfType(Type.Comment).data}-->` });
|
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Comment, content: `<!--${this.consumeNextTokenOfType(CommentToken).data}-->` });
|
||||||
|
|
||||||
this.state = State.Undefined;
|
this.state = State.Undefined;
|
||||||
break;
|
break;
|
||||||
|
@ -177,24 +178,24 @@ export class Highlighter {
|
||||||
return this.currentNode = node;
|
return this.currentNode = node;
|
||||||
}
|
}
|
||||||
|
|
||||||
private consumeNextTokenOfType<T extends Type>(type: T): Token & { type: T } {
|
private consumeNextTokenOfType<T extends Token>(type: Constructor<T>): T {
|
||||||
this.currentToken = this.tokens[this.pointer];
|
this.currentToken = this.tokens[this.pointer];
|
||||||
|
|
||||||
VERIFY(this.currentToken.type === type, `Expected '${type}', got '${this.currentToken.type}' instead`);
|
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
|
||||||
|
|
||||||
this.pointer++;
|
this.pointer++;
|
||||||
|
|
||||||
return this.currentToken as Token & { type: T };
|
return this.currentToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private consumeNextTokenOfEitherType<T extends Type, U extends Type>(a: T, b: U): Token & { type: T | U } {
|
private consumeNextTokenOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): T | U {
|
||||||
this.currentToken = this.tokens[this.pointer];
|
this.currentToken = this.tokens[this.pointer];
|
||||||
|
|
||||||
VERIFY(this.currentToken.type === a || this.currentToken.type === b, `Expected '${a}' or '${b}', got '${this.currentToken.type}' instead`);
|
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
|
||||||
|
|
||||||
this.pointer++;
|
this.pointer++;
|
||||||
|
|
||||||
return this.currentToken as Token & { type: T };
|
return this.currentToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private consumeNextTokenType(): Type {
|
private consumeNextTokenType(): Type {
|
||||||
|
@ -211,16 +212,16 @@ export class Highlighter {
|
||||||
return this.currentToken;
|
return this.currentToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private currentTokenOfType<T extends Type>(type: T): Token & { type: T } {
|
private currentTokenOfType<T extends Token>(type: Constructor<T>): T {
|
||||||
VERIFY(this.currentToken.type === type, `Expected '${type}', got '${this.currentToken.type}' instead`);
|
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
|
||||||
|
|
||||||
return this.currentToken as Token & { type: T };
|
return this.currentToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private currentTokenOfEitherType<T extends Type, U extends Type>(a: T, b: U): Token & { type: T | U } {
|
private currentTokenOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): T | U {
|
||||||
VERIFY(this.currentToken.type === a || this.currentToken.type === b, `Expected '${a}' or '${b}', got '${this.currentToken.type}' instead`);
|
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
|
||||||
|
|
||||||
return this.currentToken as Token & { type: T };
|
return this.currentToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private reconsumeIn(state: State): void {
|
private reconsumeIn(state: State): void {
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
import { TODO, VERIFY } from "../util/assertions.js";
|
import { TODO, VERIFY, VERIFY_NOT_REACHED } from "../util/assertions.js";
|
||||||
|
import { Constructor } from "../util/guards.js";
|
||||||
import { ParseError } from "./errors.js";
|
import { ParseError } from "./errors.js";
|
||||||
import { entities } from "./tokenizer/entities.js";
|
import { entities } from "./tokenizer/entities.js";
|
||||||
import { State } from "./tokenizer/state.js";
|
import { State } from "./tokenizer/state.js";
|
||||||
import { AttributeList, Token, Type } from "./tokenizer/token.js";
|
import { Attribute, CharacterToken, CommentToken, DOCTYPEToken, EndOfFileToken, EndTagToken, Position, StartTagToken, Token } from "./tokenizer/token.js";
|
||||||
|
|
||||||
export class Tokenizer {
|
export class Tokenizer {
|
||||||
private state: State = State.Data;
|
private state: State = State.Data;
|
||||||
|
@ -13,6 +14,8 @@ export class Tokenizer {
|
||||||
private currentToken!: Token;
|
private currentToken!: Token;
|
||||||
private currentInputCharacter!: string;
|
private currentInputCharacter!: string;
|
||||||
|
|
||||||
|
private currentPosition: Position = { line: 0, column: 0, index: 0 };
|
||||||
|
|
||||||
public tokens: Array<Token> = new Array<Token>();
|
public tokens: Array<Token> = new Array<Token>();
|
||||||
private pointer: number = 0;
|
private pointer: number = 0;
|
||||||
|
|
||||||
|
@ -30,10 +33,10 @@ export class Tokenizer {
|
||||||
case '\u003C': this.state = State.TagOpen; break;
|
case '\u003C': this.state = State.TagOpen; break;
|
||||||
case '\u0000':
|
case '\u0000':
|
||||||
this.parseError('unexpected-null-character');
|
this.parseError('unexpected-null-character');
|
||||||
this.emit({ type: Type.Character, data: this.currentInputCharacter });
|
this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
|
||||||
break;
|
break;
|
||||||
case undefined: this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.emit({ type: Type.Character, data: this.currentInputCharacter });
|
default: this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -41,9 +44,9 @@ export class Tokenizer {
|
||||||
case State.RCDATA: {
|
case State.RCDATA: {
|
||||||
switch (this.consumeNext()) {
|
switch (this.consumeNext()) {
|
||||||
case '\u003C': this.state = State.RAWTEXTLessThan; break;
|
case '\u003C': this.state = State.RAWTEXTLessThan; break;
|
||||||
case '\u0000': this.parseError('unexpected-null-character'); this.emit({ type: Type.Character, data: '\uFFFD' }); break;
|
case '\u0000': this.parseError('unexpected-null-character'); this.emit(CharacterToken.createReplacementCharacter().at(this.currentPosition)); break;
|
||||||
case undefined: this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.emit({ type: Type.Character, data: this.currentInputCharacter });
|
default: this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -54,23 +57,23 @@ export class Tokenizer {
|
||||||
case '\u002F': this.state = State.EndTagOpen; break;
|
case '\u002F': this.state = State.EndTagOpen; break;
|
||||||
case '\u003F':
|
case '\u003F':
|
||||||
this.parseError('unexpected-question-mark-instead-of-tag-name');
|
this.parseError('unexpected-question-mark-instead-of-tag-name');
|
||||||
this.create({ type: Type.Comment, data: '' });
|
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
|
||||||
this.reconsumeIn(State.BogusComment);
|
this.reconsumeIn(State.BogusComment);
|
||||||
break;
|
break;
|
||||||
case undefined:
|
case undefined:
|
||||||
this.parseError('eof-before-tag-name');
|
this.parseError('eof-before-tag-name');
|
||||||
this.emit({ type: Type.Character, data: '\u003C' });
|
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
|
||||||
this.emit({ type: Type.EndOfFile });
|
this.emit(EndOfFileToken.create());
|
||||||
break;
|
break;
|
||||||
default: {
|
default: {
|
||||||
if (this.asciiAlpha(this.currentInputCharacter)) {
|
if (this.asciiAlpha(this.currentInputCharacter)) {
|
||||||
this.create({ type: Type.StartTag, name: '', attributes: new AttributeList() });
|
this.create(StartTagToken.createEmpty().startingAt(this.currentPosition));
|
||||||
this.reconsumeIn(State.TagName);
|
this.reconsumeIn(State.TagName);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.parseError('invalid-first-character-of-tag-name');
|
this.parseError('invalid-first-character-of-tag-name');
|
||||||
this.emit({ type: Type.Character, data: '\u003C' });
|
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
|
||||||
this.reconsumeIn(State.Data);
|
this.reconsumeIn(State.Data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -82,19 +85,19 @@ export class Tokenizer {
|
||||||
case '\u003E': this.parseError('missing-end-tag-name'); this.state = State.Data; break;
|
case '\u003E': this.parseError('missing-end-tag-name'); this.state = State.Data; break;
|
||||||
case undefined:
|
case undefined:
|
||||||
this.parseError('eof-before-tag-name');
|
this.parseError('eof-before-tag-name');
|
||||||
this.emit({ type: Type.Character, data: '\u003C' });
|
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
|
||||||
this.emit({ type: Type.Character, data: '\u002F' });
|
this.emit(CharacterToken.createWith('\u002F').at(this.currentPosition));
|
||||||
this.emit({ type: Type.EndOfFile });
|
this.emit(EndOfFileToken.create());
|
||||||
break;
|
break;
|
||||||
default: {
|
default: {
|
||||||
if (this.asciiAlpha(this.currentInputCharacter)) {
|
if (this.asciiAlpha(this.currentInputCharacter)) {
|
||||||
this.create({ type: Type.EndTag, name: '', attributes: new AttributeList() });
|
this.create(EndTagToken.createEmpty().startingAt(this.currentPosition));
|
||||||
this.reconsumeIn(State.TagName);
|
this.reconsumeIn(State.TagName);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.parseError('invalid-first-character-of-tag-name');
|
this.parseError('invalid-first-character-of-tag-name');
|
||||||
this.create({ type: Type.Comment, data: '' });
|
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
|
||||||
this.reconsumeIn(State.BogusComment);
|
this.reconsumeIn(State.BogusComment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -104,7 +107,7 @@ export class Tokenizer {
|
||||||
case State.MarkupDeclarationOpen: {
|
case State.MarkupDeclarationOpen: {
|
||||||
if (this.matchNextFew('--')) {
|
if (this.matchNextFew('--')) {
|
||||||
this.consumeNextFew('--');
|
this.consumeNextFew('--');
|
||||||
this.create({ type: Type.Comment, data: '' });
|
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
|
||||||
this.state = State.CommentStart;
|
this.state = State.CommentStart;
|
||||||
} else if (this.matchNextFewCaseInsensitive('DOCTYPE')) {
|
} else if (this.matchNextFewCaseInsensitive('DOCTYPE')) {
|
||||||
this.consumeNextFewCaseInsensitive('DOCTYPE');
|
this.consumeNextFewCaseInsensitive('DOCTYPE');
|
||||||
|
@ -114,11 +117,11 @@ export class Tokenizer {
|
||||||
// NOTE: This parser will never be generated as part of the fragment parsing algorithm, as such the CDATA section state does not
|
// NOTE: This parser will never be generated as part of the fragment parsing algorithm, as such the CDATA section state does not
|
||||||
// exist and will not be started here.
|
// exist and will not be started here.
|
||||||
this.parseError('cdata-in-html-content');
|
this.parseError('cdata-in-html-content');
|
||||||
this.create({ type: Type.Comment, data: '[CDATA[' });
|
this.create(CommentToken.createWith('[CDATA[').startingAt(this.currentPosition));
|
||||||
this.state = State.BogusComment;
|
this.state = State.BogusComment;
|
||||||
} else {
|
} else {
|
||||||
this.parseError('incorrectly-opened-comment');
|
this.parseError('incorrectly-opened-comment');
|
||||||
this.create({ type: Type.Comment, data: '' });
|
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
|
||||||
this.state = State.BogusComment;
|
this.state = State.BogusComment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,8 +136,8 @@ export class Tokenizer {
|
||||||
case '\u003E': this.reconsumeIn(State.BeforeDOCTYPEName); break;
|
case '\u003E': this.reconsumeIn(State.BeforeDOCTYPEName); break;
|
||||||
case undefined:
|
case undefined:
|
||||||
this.parseError('eof-in-doctype');
|
this.parseError('eof-in-doctype');
|
||||||
this.emit({ type: Type.DOCTYPE, forceQuirks: true });
|
this.emit(DOCTYPEToken.createWithForcedQuirks().at(this.currentPosition));
|
||||||
this.emit({ type: Type.EndOfFile });
|
this.emit(EndOfFileToken.create());
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
this.parseError('missing-whitespace-before-doctype-name');
|
this.parseError('missing-whitespace-before-doctype-name');
|
||||||
|
@ -151,22 +154,22 @@ export class Tokenizer {
|
||||||
case '\u0020': break;
|
case '\u0020': break;
|
||||||
case '\u0000':
|
case '\u0000':
|
||||||
this.parseError('unexpected-null-character');
|
this.parseError('unexpected-null-character');
|
||||||
this.create({ type: Type.DOCTYPE, name: '\uFFFD' });
|
this.create(DOCTYPEToken.createWithName('\uFFFD').startingAt(this.currentPosition));
|
||||||
this.state = State.DOCTYPEName;
|
this.state = State.DOCTYPEName;
|
||||||
break;
|
break;
|
||||||
case undefined:
|
case undefined:
|
||||||
this.parseError('eof-in-doctype');
|
this.parseError('eof-in-doctype');
|
||||||
this.emit({ type: Type.DOCTYPE, forceQuirks: true });
|
this.emit(DOCTYPEToken.createWithForcedQuirks().at(this.currentPosition));
|
||||||
this.emit({ type: Type.EndOfFile });
|
this.emit(EndOfFileToken.create());
|
||||||
break;
|
break;
|
||||||
default: {
|
default: {
|
||||||
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
||||||
this.create({ type: Type.DOCTYPE, name: this.currentInputCharacter.toLowerCase()});
|
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter.toLowerCase()).startingAt(this.currentPosition));
|
||||||
this.state = State.DOCTYPEName;
|
this.state = State.DOCTYPEName;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.create({ type: Type.DOCTYPE, name: this.currentInputCharacter });
|
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter).startingAt(this.currentPosition));
|
||||||
this.state = State.DOCTYPE;
|
this.state = State.DOCTYPE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,21 +182,21 @@ export class Tokenizer {
|
||||||
case '\u000A':
|
case '\u000A':
|
||||||
case '\u000C':
|
case '\u000C':
|
||||||
case '\u0020': this.state = State.AfterDOCTYPEName; break;
|
case '\u0020': this.state = State.AfterDOCTYPEName; break;
|
||||||
case '\u003E': this.state = State.Data; this.emitCurrentOfType(Type.DOCTYPE); break;
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(DOCTYPEToken); break;
|
||||||
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(Type.DOCTYPE)!.name += '\uFFFD'; break;
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(DOCTYPEToken).appendReplacementCharacterToName(); break;
|
||||||
case undefined:
|
case undefined:
|
||||||
this.parseError('eof-in-doctype');
|
this.parseError('eof-in-doctype');
|
||||||
this.currentOfType(Type.DOCTYPE).forceQuirks = true;
|
this.currentOfType(DOCTYPEToken).forceQuirks = true;
|
||||||
this.emitCurrentOfType(Type.DOCTYPE);
|
this.emitCurrentOfType(DOCTYPEToken);
|
||||||
this.emit({ type: Type.EndOfFile });
|
this.emit(EndOfFileToken.create());
|
||||||
break;
|
break;
|
||||||
default: {
|
default: {
|
||||||
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
||||||
this.currentOfType(Type.DOCTYPE)!.name += this.currentInputCharacter.toLowerCase();
|
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter.toLowerCase());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.currentOfType(Type.DOCTYPE)!.name += this.currentInputCharacter;
|
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -206,19 +209,19 @@ export class Tokenizer {
|
||||||
case '\u000C':
|
case '\u000C':
|
||||||
case '\u0020': this.state = State.BeforeAttributeName; break;
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
||||||
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
||||||
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag); break;
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
|
||||||
case '\u0000':
|
case '\u0000':
|
||||||
this.parseError('unexpected-null-character');
|
this.parseError('unexpected-null-character');
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).name += '\uFFFD';
|
this.currentOfEitherType(StartTagToken, EndTagToken).appendReplacementCharacterToName();
|
||||||
break;
|
break;
|
||||||
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
||||||
default: {
|
default: {
|
||||||
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).name += this.currentInputCharacter.toLowerCase();
|
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter.toLowerCase());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).name += this.currentInputCharacter;
|
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,12 +238,12 @@ export class Tokenizer {
|
||||||
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
|
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
|
||||||
case '\u003D': {
|
case '\u003D': {
|
||||||
this.parseError('unexpected-equals-sign-before-attribute-name');
|
this.parseError('unexpected-equals-sign-before-attribute-name');
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.append({ name: this.currentInputCharacter, value: '' });
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyValue(this.currentInputCharacter));
|
||||||
this.state = State.AttributeName;
|
this.state = State.AttributeName;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.append({ name: '', value: '' });
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyNameAndValue());
|
||||||
this.reconsumeIn(State.AttributeName);
|
this.reconsumeIn(State.AttributeName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -258,21 +261,21 @@ export class Tokenizer {
|
||||||
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
|
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
|
||||||
case '\u003D': this.state = State.BeforeAttributeValue; break;
|
case '\u003D': this.state = State.BeforeAttributeValue; break;
|
||||||
case '\u0000': this.parseError('unexpected-null-character');
|
case '\u0000': this.parseError('unexpected-null-character');
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.name += '\uFFFD';
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToName();
|
||||||
break;
|
break;
|
||||||
case '\u0022':
|
case '\u0022':
|
||||||
case '\u0027':
|
case '\u0027':
|
||||||
case '\u003C':
|
case '\u003C':
|
||||||
this.parseError('unexpected-character-in-attribute-name');
|
this.parseError('unexpected-character-in-attribute-name');
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.name += this.currentInputCharacter;
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter);
|
||||||
break;
|
break;
|
||||||
default: {
|
default: {
|
||||||
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.name += this.currentInputCharacter.toLowerCase();
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter.toLowerCase());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.name += this.currentInputCharacter;
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,10 +289,10 @@ export class Tokenizer {
|
||||||
case '\u0020': break;
|
case '\u0020': break;
|
||||||
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
||||||
case '\u003D': this.state = State.BeforeAttributeValue; break;
|
case '\u003D': this.state = State.BeforeAttributeValue; break;
|
||||||
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag); break;
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
|
||||||
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
||||||
default:
|
default:
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.append({ name: '', value: '' });
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyNameAndValue());
|
||||||
this.reconsumeIn(State.AttributeName);
|
this.reconsumeIn(State.AttributeName);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -307,7 +310,7 @@ export class Tokenizer {
|
||||||
case '\u003E':
|
case '\u003E':
|
||||||
this.parseError('missing-attribute-value');
|
this.parseError('missing-attribute-value');
|
||||||
this.state = State.Data;
|
this.state = State.Data;
|
||||||
this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag);
|
this.emitCurrentOfEitherType(StartTagToken, EndTagToken);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
this.reconsumeIn(State.AttributeValueUnquoted);
|
this.reconsumeIn(State.AttributeValueUnquoted);
|
||||||
|
@ -321,10 +324,10 @@ export class Tokenizer {
|
||||||
case '\u0026': this.returnState = State.AttributeValueDouble; this.state = State.CharacterReference; break;
|
case '\u0026': this.returnState = State.AttributeValueDouble; this.state = State.CharacterReference; break;
|
||||||
case '\u0000':
|
case '\u0000':
|
||||||
this.parseError('unexpected-null-character');
|
this.parseError('unexpected-null-character');
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += '\uFFFD';
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
|
||||||
break;
|
break;
|
||||||
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -335,10 +338,10 @@ export class Tokenizer {
|
||||||
case '\u0026': this.returnState = State.AttributeValueSingle; this.state = State.CharacterReference; break;
|
case '\u0026': this.returnState = State.AttributeValueSingle; this.state = State.CharacterReference; break;
|
||||||
case '\u0000':
|
case '\u0000':
|
||||||
this.parseError('unexpected-null-character');
|
this.parseError('unexpected-null-character');
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += '\uFFFD';
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
|
||||||
break;
|
break;
|
||||||
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -350,10 +353,10 @@ export class Tokenizer {
|
||||||
case '\u000C':
|
case '\u000C':
|
||||||
case '\u0020': this.state = State.BeforeAttributeName; break;
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
||||||
case '\u0026': this.returnState = State.AttributeValueUnquoted; this.state = State.CharacterReference; break;
|
case '\u0026': this.returnState = State.AttributeValueUnquoted; this.state = State.CharacterReference; break;
|
||||||
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag); break;
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
|
||||||
case '\u0000':
|
case '\u0000':
|
||||||
this.parseError('unexpected-null-character');
|
this.parseError('unexpected-null-character');
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += '\uFFFD';
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
|
||||||
break;
|
break;
|
||||||
case '\u0022':
|
case '\u0022':
|
||||||
case '\u0027':
|
case '\u0027':
|
||||||
|
@ -361,10 +364,10 @@ export class Tokenizer {
|
||||||
case '\u003D':
|
case '\u003D':
|
||||||
case '\u0060':
|
case '\u0060':
|
||||||
this.parseError('unexpected-character-in-unquoted-attribute-value');
|
this.parseError('unexpected-character-in-unquoted-attribute-value');
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
||||||
break;
|
break;
|
||||||
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -376,8 +379,8 @@ export class Tokenizer {
|
||||||
case '\u000C':
|
case '\u000C':
|
||||||
case '\u0020': this.state = State.BeforeAttributeName; break;
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
||||||
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
||||||
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag); break;
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
|
||||||
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.parseError('missing-whitespace-between-attributes'); this.reconsumeIn(State.BeforeAttributeName);
|
default: this.parseError('missing-whitespace-between-attributes'); this.reconsumeIn(State.BeforeAttributeName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,7 +389,7 @@ export class Tokenizer {
|
||||||
case State.CommentStart: {
|
case State.CommentStart: {
|
||||||
switch (this.consumeNext()) {
|
switch (this.consumeNext()) {
|
||||||
case '\u002D': this.state = State.CommentStartDash; break;
|
case '\u002D': this.state = State.CommentStartDash; break;
|
||||||
case '\u003E': this.parseError('abrupt-closing-of-empty-comment'); this.state = State.Data; this.emitCurrentOfType(Type.Comment); break;
|
case '\u003E': this.parseError('abrupt-closing-of-empty-comment'); this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
|
||||||
default: this.reconsumeIn(State.Comment);
|
default: this.reconsumeIn(State.Comment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -395,11 +398,11 @@ export class Tokenizer {
|
||||||
// FIXME: Possible improvement to https://html.spec.whatwg.org/multipage/parsing.html#comment-state (adding **current** in some places)
|
// FIXME: Possible improvement to https://html.spec.whatwg.org/multipage/parsing.html#comment-state (adding **current** in some places)
|
||||||
case State.Comment: {
|
case State.Comment: {
|
||||||
switch (this.consumeNext()) {
|
switch (this.consumeNext()) {
|
||||||
case '\u003C': this.currentOfType(Type.Comment).data += this.currentInputCharacter; this.state = State.CommentLessThanSign; break;
|
case '\u003C': this.currentOfType(CommentToken).append(this.currentInputCharacter); this.state = State.CommentLessThanSign; break;
|
||||||
case '\u002D': this.state = State.CommentEndDash; break;
|
case '\u002D': this.state = State.CommentEndDash; break;
|
||||||
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(Type.Comment).data += '\uFFFD'; break;
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(CommentToken).appendReplacementCharacter(); break;
|
||||||
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(Type.Comment); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.currentOfType(Type.Comment).data += this.currentInputCharacter;
|
default: this.currentOfType(CommentToken).append(this.currentInputCharacter);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -407,8 +410,8 @@ export class Tokenizer {
|
||||||
case State.CommentEndDash: {
|
case State.CommentEndDash: {
|
||||||
switch (this.consumeNext()) {
|
switch (this.consumeNext()) {
|
||||||
case '\u002D': this.state = State.CommentEnd; break;
|
case '\u002D': this.state = State.CommentEnd; break;
|
||||||
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(Type.Comment); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.currentOfType(Type.Comment).data += '\u002D'; this.reconsumeIn(State.Comment);
|
default: this.currentOfType(CommentToken).append('\u002D'); this.reconsumeIn(State.Comment);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -416,11 +419,11 @@ export class Tokenizer {
|
||||||
// Same as above fixme https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
|
// Same as above fixme https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
|
||||||
case State.CommentEnd: {
|
case State.CommentEnd: {
|
||||||
switch (this.consumeNext()) {
|
switch (this.consumeNext()) {
|
||||||
case '\u003E': this.state = State.Data; this.emitCurrentOfType(Type.Comment); break;
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
|
||||||
case '\u0021': this.state = State.CommentEndBang; break;
|
case '\u0021': this.state = State.CommentEndBang; break;
|
||||||
case '\u002D': this.currentOfType(Type.Comment).data += '\u002D'; break;
|
case '\u002D': this.currentOfType(CommentToken).append('\u002D'); break;
|
||||||
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(Type.Comment); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
|
||||||
default: this.currentOfType(Type.Comment).data += '\u002D\u002D'; this.reconsumeIn(State.Comment);
|
default: this.currentOfType(CommentToken).append('\u002D\u002D'); this.reconsumeIn(State.Comment);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -428,10 +431,10 @@ export class Tokenizer {
|
||||||
// Same as above https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
|
// Same as above https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
|
||||||
case State.BogusComment: {
|
case State.BogusComment: {
|
||||||
switch (this.consumeNext()) {
|
switch (this.consumeNext()) {
|
||||||
case '\u003E': this.state = State.Data; this.emitCurrentOfType(Type.Comment); break;
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
|
||||||
case undefined: this.emitCurrentOfType(Type.Comment); this.emit({ type: Type.EndOfFile }); break;
|
case undefined: this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
|
||||||
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(Type.Comment).data += '\uFFFD'; break;
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(CommentToken).appendReplacementCharacter(); break;
|
||||||
default: this.currentOfType(Type.Comment).data += this.currentInputCharacter;
|
default: this.currentOfType(CommentToken).append(this.currentInputCharacter);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -496,9 +499,9 @@ export class Tokenizer {
|
||||||
default: {
|
default: {
|
||||||
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
|
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
|
||||||
if (this.consumedAsPartOfAnAttribute()) {
|
if (this.consumedAsPartOfAnAttribute()) {
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
||||||
} else {
|
} else {
|
||||||
this.emit({ type: Type.Character, data: this.currentInputCharacter });
|
this.emit(CharacterToken.createWith(this.currentInputCharacter));
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -516,12 +519,12 @@ export class Tokenizer {
|
||||||
|
|
||||||
private flushCodePointsConsumedAsCharacterReference(): void {
|
private flushCodePointsConsumedAsCharacterReference(): void {
|
||||||
if (this.consumedAsPartOfAnAttribute()) {
|
if (this.consumedAsPartOfAnAttribute()) {
|
||||||
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.temporaryBuffer;
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.temporaryBuffer);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const codePoint of this.temporaryBuffer)
|
for (const codePoint of this.temporaryBuffer)
|
||||||
this.emit({ type: Type.Character, data: codePoint });
|
this.emit(CharacterToken.createWith(codePoint));
|
||||||
}
|
}
|
||||||
|
|
||||||
private consumedAsPartOfAnAttribute(): boolean {
|
private consumedAsPartOfAnAttribute(): boolean {
|
||||||
|
@ -562,6 +565,14 @@ export class Tokenizer {
|
||||||
this.currentInputCharacter = this.input[this.pointer];
|
this.currentInputCharacter = this.input[this.pointer];
|
||||||
this.pointer++;
|
this.pointer++;
|
||||||
|
|
||||||
|
this.currentPosition.column++;
|
||||||
|
this.currentPosition.index++;
|
||||||
|
|
||||||
|
if (this.currentInputCharacter === '\n') {
|
||||||
|
this.currentPosition.column = 0;
|
||||||
|
this.currentPosition.line++;
|
||||||
|
}
|
||||||
|
|
||||||
return this.currentInputCharacter;
|
return this.currentInputCharacter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -594,34 +605,53 @@ export class Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private emit(token: Token): void {
|
private emit(token: Token): void {
|
||||||
|
this.populateRangeOnEmit(token);
|
||||||
this.tokens.push(token);
|
this.tokens.push(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
private emitCurrentOfType(type: Type): void {
|
private emitCurrentOfType(type: Constructor<Token>): void {
|
||||||
VERIFY(this.currentToken.type === type, `Expected '${type}', got '${this.currentToken.type}' instead`);
|
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
|
||||||
|
|
||||||
|
this.populateRangeOnEmit(this.currentToken);
|
||||||
this.tokens.push(this.currentToken);
|
this.tokens.push(this.currentToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
private emitCurrentOfEitherType(a: Type, b: Type): void {
|
private emitCurrentOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): void {
|
||||||
VERIFY(this.currentToken.type === a || this.currentToken.type === b, `Expected '${a}' or '${b}', got '${this.currentToken.type}' instead`);
|
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
|
||||||
|
|
||||||
|
this.populateRangeOnEmit(this.currentToken);
|
||||||
this.tokens.push(this.currentToken);
|
this.tokens.push(this.currentToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
private currentOfType<T extends Type>(type: T): Token & { type: T } {
|
private currentOfType<T extends Token>(type: Constructor<T>): T {
|
||||||
VERIFY(this.currentToken.type === type, `Expected '${type}', got '${this.currentToken.type}' instead`);
|
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
|
||||||
|
|
||||||
return this.currentToken as Token & { type: T };
|
this.populateRangeOnEmit(this.currentToken);
|
||||||
|
return this.currentToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
private currentOfEitherType<T extends Type, U extends Type>(a: T, b: U): Token & { type: T | U } {
|
private currentOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): T | U {
|
||||||
VERIFY(this.currentToken.type === a || this.currentToken.type === b, `Expected '${a}' or '${b}', got '${this.currentToken.type}' instead`);
|
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
|
||||||
|
|
||||||
return this.currentToken as Token & { type: T };
|
this.populateRangeOnEmit(this.currentToken);
|
||||||
|
return this.currentToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
private populateRangeOnEmit(token: Token): void {
|
||||||
|
if (token.range.start === undefined && token.range.end === undefined)
|
||||||
|
token.at(this.currentPosition);
|
||||||
|
|
||||||
|
if (token.range.start !== undefined && token.range.end === undefined)
|
||||||
|
token.endingAt(this.currentPosition);
|
||||||
|
|
||||||
|
if (token.range.start === undefined && token.range.end !== undefined)
|
||||||
|
VERIFY_NOT_REACHED();
|
||||||
}
|
}
|
||||||
|
|
||||||
private create(token: Token): Token {
|
private create(token: Token): Token {
|
||||||
|
if (token.range.start === undefined)
|
||||||
|
token.startingAt(this.currentPosition);
|
||||||
|
|
||||||
return this.currentToken = token;
|
return this.currentToken = token;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
import { VERIFY, VERIFY_NOT_REACHED } from "../../util/assertions.js";
|
||||||
|
|
||||||
export const enum Type {
|
export const enum Type {
|
||||||
DOCTYPE = 'DOCTYPE',
|
DOCTYPE = 'DOCTYPE',
|
||||||
StartTag = 'start tag',
|
StartTag = 'start tag',
|
||||||
|
@ -7,7 +9,52 @@ export const enum Type {
|
||||||
EndOfFile = 'end-of-file'
|
EndOfFile = 'end-of-file'
|
||||||
}
|
}
|
||||||
|
|
||||||
export type Attribute = { name: NonNullable<string>, value: NonNullable<string> };
|
export const REPLACEMENT_CHARACTER = '\uFFFD';
|
||||||
|
|
||||||
|
export type Range = {
|
||||||
|
start: Position,
|
||||||
|
end: Position
|
||||||
|
}
|
||||||
|
|
||||||
|
export type Position = {
|
||||||
|
line: number,
|
||||||
|
column: number,
|
||||||
|
index: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Attribute {
|
||||||
|
public name: string;
|
||||||
|
public value: string;
|
||||||
|
|
||||||
|
public constructor(name: string, value: string) {
|
||||||
|
this.name = name;
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendToName(characters: string): void {
|
||||||
|
this.name += characters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendReplacementCharacterToName(): void {
|
||||||
|
this.appendToName(REPLACEMENT_CHARACTER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendToValue(characters: string): void {
|
||||||
|
this.value += characters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendReplacementCharacterToValue(): void {
|
||||||
|
this.appendToValue(REPLACEMENT_CHARACTER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createWithEmptyNameAndValue(): Attribute {
|
||||||
|
return new Attribute('', '');
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createWithEmptyValue(name: string): Attribute {
|
||||||
|
return new Attribute(name, '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export class AttributeList {
|
export class AttributeList {
|
||||||
private attributes: Array<Attribute>;
|
private attributes: Array<Attribute>;
|
||||||
|
@ -33,29 +80,200 @@ export class AttributeList {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export type Token = { type: Type.DOCTYPE, name?: string, publicIdentifier?: string, systemIdentifier?: string, forceQuirks?: true } |
|
export abstract class Token {
|
||||||
{ type: Type.StartTag, name: NonNullable<string>, selfClosing?: true, attributes: AttributeList } |
|
#type: Type;
|
||||||
{ type: Type.EndTag, name: NonNullable<string>, selfClosing?: true, attributes: AttributeList } |
|
#range!: Range;
|
||||||
{ type: Type.Comment, data: NonNullable<string> } |
|
|
||||||
{ type: Type.Character, data: NonNullable<string> } |
|
protected constructor(type: Type) {
|
||||||
{ type: Type.EndOfFile };
|
this.#type = type;
|
||||||
|
|
||||||
|
// @ts-expect-error
|
||||||
|
this.#range = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
public startingAt(position: Position): this {
|
||||||
|
this.#range.start = { line: position.line, column: position.column, index: position.index };
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public endingAt(position: Position): this {
|
||||||
|
this.#range.end = { line: position.line, column: position.column, index: position.index };
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public at(position: Position): this {
|
||||||
|
this.#range.start = { line: position.line, column: position.column, index: position.index };
|
||||||
|
this.#range.end = { line: position.line, column: position.column, index: position.index };
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public get range(): Range {
|
||||||
|
return this.#range;
|
||||||
|
}
|
||||||
|
|
||||||
|
public get type(): Type {
|
||||||
|
return this.#type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class CharacterToken extends Token {
|
||||||
|
public readonly data: NonNullable<string>;
|
||||||
|
|
||||||
|
public constructor(data: NonNullable<string>) {
|
||||||
|
super(Type.Character);
|
||||||
|
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createWith(data: NonNullable<string>): CharacterToken {
|
||||||
|
return new CharacterToken(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createReplacementCharacter(): CharacterToken {
|
||||||
|
return new CharacterToken(REPLACEMENT_CHARACTER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class CommentToken extends Token {
|
||||||
|
public data: NonNullable<string>;
|
||||||
|
|
||||||
|
public constructor(data: NonNullable<string>) {
|
||||||
|
super(Type.Comment);
|
||||||
|
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public append(characters: string): void {
|
||||||
|
this.data += characters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendReplacementCharacter(): void {
|
||||||
|
this.append(REPLACEMENT_CHARACTER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createEmpty(): CommentToken {
|
||||||
|
return new CommentToken('');
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createWith(data: string): CommentToken {
|
||||||
|
return new CommentToken(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class EndOfFileToken extends Token {
|
||||||
|
public constructor() {
|
||||||
|
super(Type.EndOfFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static create(): EndOfFileToken {
|
||||||
|
return new EndOfFileToken();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class StartTagToken extends Token {
|
||||||
|
public name: NonNullable<string>;
|
||||||
|
public readonly attributes: AttributeList;
|
||||||
|
|
||||||
|
public constructor(name: NonNullable<string>, attributes: AttributeList) {
|
||||||
|
super(Type.StartTag);
|
||||||
|
|
||||||
|
this.name = name;
|
||||||
|
this.attributes = attributes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendToName(characters: string): void {
|
||||||
|
this.name += characters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendReplacementCharacterToName(): void {
|
||||||
|
this.appendToName(REPLACEMENT_CHARACTER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createEmpty(): StartTagToken {
|
||||||
|
return new StartTagToken('', new AttributeList());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class EndTagToken extends Token {
|
||||||
|
public name: NonNullable<string>;
|
||||||
|
public readonly attributes: AttributeList;
|
||||||
|
|
||||||
|
public constructor(name: NonNullable<string>, attributes: AttributeList) {
|
||||||
|
super(Type.EndTag);
|
||||||
|
|
||||||
|
this.name = name;
|
||||||
|
this.attributes = attributes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendToName(characters: string): void {
|
||||||
|
this.name += characters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendReplacementCharacterToName(): void {
|
||||||
|
this.appendToName(REPLACEMENT_CHARACTER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createEmpty(): EndTagToken {
|
||||||
|
return new EndTagToken('', new AttributeList());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class DOCTYPEToken extends Token {
|
||||||
|
public name?: string;
|
||||||
|
public publicIdentifier?: string;
|
||||||
|
public systemIdentifier?: string;
|
||||||
|
public forceQuirks?: true;
|
||||||
|
|
||||||
|
public constructor(name?: string, publicIdentifier?: string, systemIdentifier?: string, forceQuirks?: true) {
|
||||||
|
super(Type.DOCTYPE);
|
||||||
|
|
||||||
|
this.name = name;
|
||||||
|
this.publicIdentifier = publicIdentifier;
|
||||||
|
this.systemIdentifier = systemIdentifier;
|
||||||
|
this.forceQuirks = forceQuirks;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendToName(characters: string): void {
|
||||||
|
VERIFY(this.name !== undefined);
|
||||||
|
|
||||||
|
this.name += characters;
|
||||||
|
}
|
||||||
|
|
||||||
|
public appendReplacementCharacterToName(): void {
|
||||||
|
this.appendToName(REPLACEMENT_CHARACTER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createWithForcedQuirks(): DOCTYPEToken {
|
||||||
|
return new DOCTYPEToken(undefined, undefined, undefined, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static createWithName(name: string): DOCTYPEToken {
|
||||||
|
return new DOCTYPEToken(name, undefined, undefined, undefined);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function stringify(token: Token): string {
|
export function stringify(token: Token): string {
|
||||||
switch (token.type) {
|
if (token instanceof CharacterToken) return token.data;
|
||||||
case Type.Character: return token.data;
|
if (token instanceof CommentToken) return `<!--${token.data}-->`;
|
||||||
case Type.Comment: return `<!--${token.data}-->`;
|
if (token instanceof DOCTYPEToken) return `<!DOCTYPE ${token.name}>`;
|
||||||
case Type.DOCTYPE: return `<!DOCTYPE ${token.name}>`;
|
if (token instanceof EndOfFileToken) return 'EOF';
|
||||||
case Type.EndOfFile: return 'EOF';
|
if (token instanceof EndTagToken) return `</${token.name}>`;
|
||||||
case Type.EndTag: return `</${token.name}>`;
|
if (token instanceof StartTagToken) {
|
||||||
case Type.StartTag: {
|
|
||||||
let string = `<${token.name}`;
|
let string = `<${token.name}`;
|
||||||
|
|
||||||
for (const attribute of token.attributes.list)
|
for (const attribute of token.attributes.list)
|
||||||
string += ` ${attribute.name}="${attribute.value}"`;
|
string += ` ${attribute.name}="${attribute.value}"`;
|
||||||
|
|
||||||
if (token.selfClosing) return `${string} />`;
|
// TODO: Implemement selfClosing
|
||||||
|
// if (token.selfClosing) return `${string} />`;
|
||||||
|
|
||||||
return `${string}>`;
|
return `${string}>`;
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VERIFY_NOT_REACHED(token.constructor.name);
|
||||||
|
|
||||||
|
return '';
|
||||||
}
|
}
|
Loading…
Reference in a new issue