657 lines
29 KiB
TypeScript
657 lines
29 KiB
TypeScript
import { TODO, VERIFY, VERIFY_NOT_REACHED } from "../util/assertions.js";
|
|
import { Constructor } from "../util/guards.js";
|
|
import { ParseError } from "./errors.js";
|
|
import { entities } from "./tokenizer/entities.js";
|
|
import { State } from "./tokenizer/state.js";
|
|
import { Attribute, CharacterToken, CommentToken, DOCTYPEToken, EndOfFileToken, EndTagToken, Position, StartTagToken, Token } from "./tokenizer/token.js";
|
|
|
|
export class Tokenizer {
|
|
private state: State = State.Data;
|
|
private returnState!: State;
|
|
|
|
private temporaryBuffer!: string;
|
|
|
|
private currentToken!: Token;
|
|
private currentInputCharacter!: string;
|
|
|
|
private currentPosition: Position = { line: 0, column: 0, index: 0 };
|
|
|
|
public tokens: Array<Token> = new Array<Token>();
|
|
private pointer: number = 0;
|
|
|
|
public constructor(private input: string) {
|
|
}
|
|
|
|
public spin(): void {
|
|
switch (this.state) {
|
|
case State.Data: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0026':
|
|
this.returnState = State.Data;
|
|
this.state = State.CharacterReference;
|
|
break;
|
|
case '\u003C': this.state = State.TagOpen; break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
|
|
break;
|
|
case undefined: this.emit(EndOfFileToken.create()); break;
|
|
default: this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.RCDATA: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003C': this.state = State.RAWTEXTLessThan; break;
|
|
case '\u0000': this.parseError('unexpected-null-character'); this.emit(CharacterToken.createReplacementCharacter().at(this.currentPosition)); break;
|
|
case undefined: this.emit(EndOfFileToken.create()); break;
|
|
default: this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.TagOpen: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0021': this.state = State.MarkupDeclarationOpen; break;
|
|
case '\u002F': this.state = State.EndTagOpen; break;
|
|
case '\u003F':
|
|
this.parseError('unexpected-question-mark-instead-of-tag-name');
|
|
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
|
|
this.reconsumeIn(State.BogusComment);
|
|
break;
|
|
case undefined:
|
|
this.parseError('eof-before-tag-name');
|
|
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
|
|
this.emit(EndOfFileToken.create());
|
|
break;
|
|
default: {
|
|
if (this.asciiAlpha(this.currentInputCharacter)) {
|
|
this.create(StartTagToken.createEmpty().startingAt(this.currentPosition));
|
|
this.reconsumeIn(State.TagName);
|
|
break;
|
|
}
|
|
|
|
this.parseError('invalid-first-character-of-tag-name');
|
|
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
|
|
this.reconsumeIn(State.Data);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.EndTagOpen: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003E': this.parseError('missing-end-tag-name'); this.state = State.Data; break;
|
|
case undefined:
|
|
this.parseError('eof-before-tag-name');
|
|
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
|
|
this.emit(CharacterToken.createWith('\u002F').at(this.currentPosition));
|
|
this.emit(EndOfFileToken.create());
|
|
break;
|
|
default: {
|
|
if (this.asciiAlpha(this.currentInputCharacter)) {
|
|
this.create(EndTagToken.createEmpty().startingAt(this.currentPosition));
|
|
this.reconsumeIn(State.TagName);
|
|
break;
|
|
}
|
|
|
|
this.parseError('invalid-first-character-of-tag-name');
|
|
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
|
|
this.reconsumeIn(State.BogusComment);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.MarkupDeclarationOpen: {
|
|
if (this.matchNextFew('--')) {
|
|
this.consumeNextFew('--');
|
|
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
|
|
this.state = State.CommentStart;
|
|
} else if (this.matchNextFewCaseInsensitive('DOCTYPE')) {
|
|
this.consumeNextFewCaseInsensitive('DOCTYPE');
|
|
this.state = State.DOCTYPE;
|
|
} else if (this.matchNextFew('[CDATA[')) {
|
|
this.consumeNextFew('[CDATA[');
|
|
// NOTE: This parser will never be generated as part of the fragment parsing algorithm, as such the CDATA section state does not
|
|
// exist and will not be started here.
|
|
this.parseError('cdata-in-html-content');
|
|
this.create(CommentToken.createWith('[CDATA[').startingAt(this.currentPosition));
|
|
this.state = State.BogusComment;
|
|
} else {
|
|
this.parseError('incorrectly-opened-comment');
|
|
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
|
|
this.state = State.BogusComment;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.DOCTYPE: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.BeforeDOCTYPEName; break;
|
|
case '\u003E': this.reconsumeIn(State.BeforeDOCTYPEName); break;
|
|
case undefined:
|
|
this.parseError('eof-in-doctype');
|
|
this.emit(DOCTYPEToken.createWithForcedQuirks().at(this.currentPosition));
|
|
this.emit(EndOfFileToken.create());
|
|
break;
|
|
default:
|
|
this.parseError('missing-whitespace-before-doctype-name');
|
|
this.reconsumeIn(State.BeforeDOCTYPEName);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.BeforeDOCTYPEName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.create(DOCTYPEToken.createWithName('\uFFFD').startingAt(this.currentPosition));
|
|
this.state = State.DOCTYPEName;
|
|
break;
|
|
case undefined:
|
|
this.parseError('eof-in-doctype');
|
|
this.emit(DOCTYPEToken.createWithForcedQuirks().at(this.currentPosition));
|
|
this.emit(EndOfFileToken.create());
|
|
break;
|
|
default: {
|
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
|
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter.toLowerCase()).startingAt(this.currentPosition));
|
|
this.state = State.DOCTYPEName;
|
|
break;
|
|
}
|
|
|
|
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter).startingAt(this.currentPosition));
|
|
this.state = State.DOCTYPE;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.DOCTYPEName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.AfterDOCTYPEName; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(DOCTYPEToken); break;
|
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(DOCTYPEToken).appendReplacementCharacterToName(); break;
|
|
case undefined:
|
|
this.parseError('eof-in-doctype');
|
|
this.currentOfType(DOCTYPEToken).forceQuirks = true;
|
|
this.emitCurrentOfType(DOCTYPEToken);
|
|
this.emit(EndOfFileToken.create());
|
|
break;
|
|
default: {
|
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
|
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter.toLowerCase());
|
|
break;
|
|
}
|
|
|
|
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.TagName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).appendReplacementCharacterToName();
|
|
break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
|
default: {
|
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter.toLowerCase());
|
|
break;
|
|
}
|
|
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.BeforeAttributeName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': break;
|
|
case '\u002F':
|
|
case '\u003E':
|
|
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
|
|
case '\u003D': {
|
|
this.parseError('unexpected-equals-sign-before-attribute-name');
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyValue(this.currentInputCharacter));
|
|
this.state = State.AttributeName;
|
|
break;
|
|
}
|
|
default: {
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyNameAndValue());
|
|
this.reconsumeIn(State.AttributeName);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AttributeName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020':
|
|
case '\u002F':
|
|
case '\u003E':
|
|
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
|
|
case '\u003D': this.state = State.BeforeAttributeValue; break;
|
|
case '\u0000': this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToName();
|
|
break;
|
|
case '\u0022':
|
|
case '\u0027':
|
|
case '\u003C':
|
|
this.parseError('unexpected-character-in-attribute-name');
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter);
|
|
break;
|
|
default: {
|
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter.toLowerCase());
|
|
break;
|
|
}
|
|
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AfterAttributeName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': break;
|
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
|
case '\u003D': this.state = State.BeforeAttributeValue; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
|
default:
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyNameAndValue());
|
|
this.reconsumeIn(State.AttributeName);
|
|
break;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.BeforeAttributeValue: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': break;
|
|
case '\u0022': this.state = State.AttributeValueDouble; break;
|
|
case '\u0027': this.state = State.AttributeValueSingle; break;
|
|
case '\u003E':
|
|
this.parseError('missing-attribute-value');
|
|
this.state = State.Data;
|
|
this.emitCurrentOfEitherType(StartTagToken, EndTagToken);
|
|
break;
|
|
default:
|
|
this.reconsumeIn(State.AttributeValueUnquoted);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AttributeValueDouble: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0022': this.state = State.AfterAttributeValue; break;
|
|
case '\u0026': this.returnState = State.AttributeValueDouble; this.state = State.CharacterReference; break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
|
|
break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
|
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AttributeValueSingle: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0027': this.state = State.AfterAttributeValue; break;
|
|
case '\u0026': this.returnState = State.AttributeValueSingle; this.state = State.CharacterReference; break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
|
|
break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
|
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AttributeValueUnquoted: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
|
case '\u0026': this.returnState = State.AttributeValueUnquoted; this.state = State.CharacterReference; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
|
|
break;
|
|
case '\u0022':
|
|
case '\u0027':
|
|
case '\u003C':
|
|
case '\u003D':
|
|
case '\u0060':
|
|
this.parseError('unexpected-character-in-unquoted-attribute-value');
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
|
break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
|
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AfterAttributeValue: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
|
|
default: this.parseError('missing-whitespace-between-attributes'); this.reconsumeIn(State.BeforeAttributeName);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.CommentStart: {
|
|
switch (this.consumeNext()) {
|
|
case '\u002D': this.state = State.CommentStartDash; break;
|
|
case '\u003E': this.parseError('abrupt-closing-of-empty-comment'); this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
|
|
default: this.reconsumeIn(State.Comment);
|
|
}
|
|
|
|
break;
|
|
}
|
|
// FIXME: Possible improvement to https://html.spec.whatwg.org/multipage/parsing.html#comment-state (adding **current** in some places)
|
|
case State.Comment: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003C': this.currentOfType(CommentToken).append(this.currentInputCharacter); this.state = State.CommentLessThanSign; break;
|
|
case '\u002D': this.state = State.CommentEndDash; break;
|
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(CommentToken).appendReplacementCharacter(); break;
|
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
|
|
default: this.currentOfType(CommentToken).append(this.currentInputCharacter);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.CommentEndDash: {
|
|
switch (this.consumeNext()) {
|
|
case '\u002D': this.state = State.CommentEnd; break;
|
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
|
|
default: this.currentOfType(CommentToken).append('\u002D'); this.reconsumeIn(State.Comment);
|
|
}
|
|
|
|
break;
|
|
}
|
|
// Same as above fixme https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
|
|
case State.CommentEnd: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
|
|
case '\u0021': this.state = State.CommentEndBang; break;
|
|
case '\u002D': this.currentOfType(CommentToken).append('\u002D'); break;
|
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
|
|
default: this.currentOfType(CommentToken).append('\u002D\u002D'); this.reconsumeIn(State.Comment);
|
|
}
|
|
|
|
break;
|
|
}
|
|
// Same as above https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
|
|
case State.BogusComment: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
|
|
case undefined: this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
|
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(CommentToken).appendReplacementCharacter(); break;
|
|
default: this.currentOfType(CommentToken).append(this.currentInputCharacter);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.CharacterReference: {
|
|
this.temporaryBuffer = '';
|
|
this.temporaryBuffer += '\u0026';
|
|
|
|
switch (this.consumeNext()) {
|
|
case '\u0023': this.temporaryBuffer += this.currentInputCharacter; this.state = State.NumericCharacterReference; break;
|
|
default: {
|
|
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
|
|
this.reconsumeIn(State.NamedCharacterReference);
|
|
break;
|
|
}
|
|
|
|
this.flushCodePointsConsumedAsCharacterReference();
|
|
this.reconsumeIn(this.returnState);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.NamedCharacterReference: {
|
|
let match = false;
|
|
|
|
for (const entry in entities) {
|
|
if (this.matchNextFew(entry)) {
|
|
match = true;
|
|
|
|
this.consumeNextFew(entry);
|
|
this.temporaryBuffer += entry;
|
|
|
|
if (this.consumedAsPartOfAnAttribute() && entry[entry.length - 1] !== '\u003B' && (this.next() === '\u003D' || this.asciiAlphanumeric(this.next() ?? ''))) {
|
|
this.flushCodePointsConsumedAsCharacterReference();
|
|
this.state = this.returnState;
|
|
break;
|
|
}
|
|
|
|
if (entry[entry.length - 1] !== '\u003B')
|
|
this.parseError('missing-semicolon-after-character-reference');
|
|
|
|
this.temporaryBuffer = '';
|
|
this.temporaryBuffer += entities[entry].characters;
|
|
this.flushCodePointsConsumedAsCharacterReference();
|
|
this.state = this.returnState;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!match) {
|
|
this.flushCodePointsConsumedAsCharacterReference();
|
|
this.state = State.AmbiguousAmpersand;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AmbiguousAmpersand: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003B': this.parseError('unknown-named-character-reference'); this.reconsumeIn(this.returnState); break;
|
|
default: {
|
|
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
|
|
if (this.consumedAsPartOfAnAttribute()) {
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
|
|
} else {
|
|
this.emit(CharacterToken.createWith(this.currentInputCharacter));
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
this.reconsumeIn(this.returnState);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
default: TODO(`Unimplemented state '${this.state}'`);
|
|
}
|
|
}
|
|
|
|
private flushCodePointsConsumedAsCharacterReference(): void {
|
|
if (this.consumedAsPartOfAnAttribute()) {
|
|
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.temporaryBuffer);
|
|
return;
|
|
}
|
|
|
|
for (const codePoint of this.temporaryBuffer)
|
|
this.emit(CharacterToken.createWith(codePoint));
|
|
}
|
|
|
|
private consumedAsPartOfAnAttribute(): boolean {
|
|
return this.returnState === State.AttributeValueDouble || this.returnState === State.AttributeValueSingle || this.returnState === State.AttributeValueUnquoted;
|
|
}
|
|
|
|
private asciiAlphanumeric(input: string): boolean {
|
|
return this.asciiAlpha(input) || this.asciiDigit(input);
|
|
}
|
|
|
|
private asciiAlpha(input: string): boolean {
|
|
return this.asciiUpperAlpha(input) || this.asciiLowerAlpha(input);
|
|
}
|
|
|
|
private asciiUpperAlpha(input: string): boolean {
|
|
return /[^\u0041-\u005A]/.test(input);
|
|
}
|
|
|
|
private asciiLowerAlpha(input: string): boolean {
|
|
return /[^\u0061-\u007A]/.test(input);
|
|
}
|
|
|
|
private asciiDigit(input: string): boolean {
|
|
return /[^\u0030-\u0030]/.test(input);
|
|
}
|
|
|
|
private reconsumeIn(state: State): void {
|
|
this.pointer--;
|
|
this.state = state;
|
|
this.spin();
|
|
}
|
|
|
|
private parseError(error: ParseError): void {
|
|
console.error('Parse error: ' + error);
|
|
}
|
|
|
|
private consumeNext(): string | undefined {
|
|
this.currentInputCharacter = this.input[this.pointer];
|
|
this.pointer++;
|
|
|
|
this.currentPosition.column++;
|
|
this.currentPosition.index++;
|
|
|
|
if (this.currentInputCharacter === '\n') {
|
|
this.currentPosition.column = 0;
|
|
this.currentPosition.line++;
|
|
}
|
|
|
|
return this.currentInputCharacter;
|
|
}
|
|
|
|
private next(): string | undefined {
|
|
return this.input[this.pointer];
|
|
}
|
|
|
|
private matchNextFew(input: string): boolean {
|
|
return this.input.substr(this.pointer, input.length) === input;
|
|
}
|
|
|
|
private matchNextFewCaseInsensitive(input: string): boolean {
|
|
return this.input.substr(this.pointer, input.length).toLowerCase() === input.toLowerCase();
|
|
}
|
|
|
|
private consumeNextFew(input: string): void {
|
|
for (let i = 0; i < input.length; i++) {
|
|
const consumed = this.consumeNext();
|
|
|
|
VERIFY(consumed === input[i], `Expected '${input[i]}' (${input} at ${i}), got ${consumed} instead`);
|
|
}
|
|
}
|
|
|
|
private consumeNextFewCaseInsensitive(input: string): void {
|
|
for (let i = 0; i < input.length; i++) {
|
|
const consumed = this.consumeNext()?.toLowerCase();
|
|
|
|
VERIFY(consumed === input[i].toLowerCase(), `Expected '${input[i].toLowerCase()}' (${input.toLowerCase()} at ${i}), got ${consumed} instead`);
|
|
}
|
|
}
|
|
|
|
private emit(token: Token): void {
|
|
this.populateRangeOnEmit(token);
|
|
this.tokens.push(token);
|
|
}
|
|
|
|
private emitCurrentOfType(type: Constructor<Token>): void {
|
|
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
|
|
|
|
this.populateRangeOnEmit(this.currentToken);
|
|
this.tokens.push(this.currentToken);
|
|
}
|
|
|
|
private emitCurrentOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): void {
|
|
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
|
|
|
|
this.populateRangeOnEmit(this.currentToken);
|
|
this.tokens.push(this.currentToken);
|
|
}
|
|
|
|
private currentOfType<T extends Token>(type: Constructor<T>): T {
|
|
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
|
|
|
|
this.populateRangeOnEmit(this.currentToken);
|
|
return this.currentToken;
|
|
}
|
|
|
|
private currentOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): T | U {
|
|
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
|
|
|
|
this.populateRangeOnEmit(this.currentToken);
|
|
return this.currentToken;
|
|
}
|
|
|
|
private populateRangeOnEmit(token: Token): void {
|
|
if (token.range.start === undefined && token.range.end === undefined)
|
|
token.at(this.currentPosition);
|
|
|
|
if (token.range.start !== undefined && token.range.end === undefined)
|
|
token.endingAt(this.currentPosition);
|
|
|
|
if (token.range.start === undefined && token.range.end !== undefined)
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
private create(token: Token): Token {
|
|
if (token.range.start === undefined)
|
|
token.startingAt(this.currentPosition);
|
|
|
|
return this.currentToken = token;
|
|
}
|
|
}
|