networkException
586546ee57
This patch removes the Next.js React project that was contained by this repository previously. The replacement is a vanilla HTML page with TypeScript that parses it's own HTML source and highlights it using on load. The concept will be iterated on in following commits, planned are on hover tooltips showing metadata about HTML tokens as well as tokenizing (perhaps parsing) of JavaScript and CSS to be able to highlight those sections as well. To properly determent the range of script and style sections it might be required to also implement HTML tree building, however on read execution of JavaScript or on the fly parsing as well as fragment parsing is not required for the site. This commit merely represents a start and is made to better track the progress of changes.
639 lines
28 KiB
TypeScript
639 lines
28 KiB
TypeScript
import { ParseError } from "./errors.js";
|
|
import { entities } from "./tokenizer/entities.js";
|
|
import { State } from "./tokenizer/state.js";
|
|
import { AttributeList, Token, Type } from "./tokenizer/token.js";
|
|
|
|
// FIXME: Replace console.assert calls will throwing errors
|
|
export class Tokenizer {
|
|
private state: State = State.Data;
|
|
private returnState!: State;
|
|
|
|
private temporaryBuffer!: string;
|
|
|
|
private currentToken!: Token;
|
|
private currentInputCharacter!: string;
|
|
|
|
public tokens: Array<Token> = new Array<Token>();
|
|
private pointer: number = 0;
|
|
|
|
public constructor(private input: string) {
|
|
}
|
|
|
|
public spin(): void {
|
|
switch (this.state) {
|
|
case State.Data: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0026':
|
|
this.returnState = State.Data;
|
|
this.state = State.CharacterReference;
|
|
break;
|
|
case '\u003C': this.state = State.TagOpen; break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.emit({ type: Type.Character, data: this.currentInputCharacter });
|
|
break;
|
|
case undefined: this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.emit({ type: Type.Character, data: this.currentInputCharacter });
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.RCDATA: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003C': this.state = State.RAWTEXTLessThan; break;
|
|
case '\u0000': this.parseError('unexpected-null-character'); this.emit({ type: Type.Character, data: '\uFFFD' }); break;
|
|
case undefined: this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.emit({ type: Type.Character, data: this.currentInputCharacter });
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.TagOpen: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0021': this.state = State.MarkupDeclarationOpen; break;
|
|
case '\u002F': this.state = State.EndTagOpen; break;
|
|
case '\u003F':
|
|
this.parseError('unexpected-question-mark-instead-of-tag-name');
|
|
this.create({ type: Type.Comment, data: '' });
|
|
this.reconsumeIn(State.BogusComment);
|
|
break;
|
|
case undefined:
|
|
this.parseError('eof-before-tag-name');
|
|
this.emit({ type: Type.Character, data: '\u003C' });
|
|
this.emit({ type: Type.EndOfFile });
|
|
break;
|
|
default: {
|
|
if (this.asciiAlpha(this.currentInputCharacter)) {
|
|
this.create({ type: Type.StartTag, name: '', attributes: new AttributeList() });
|
|
this.reconsumeIn(State.TagName);
|
|
break;
|
|
}
|
|
|
|
this.parseError('invalid-first-character-of-tag-name');
|
|
this.emit({ type: Type.Character, data: '\u003C' });
|
|
this.reconsumeIn(State.Data);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.EndTagOpen: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003E': this.parseError('missing-end-tag-name'); this.state = State.Data; break;
|
|
case undefined:
|
|
this.parseError('eof-before-tag-name');
|
|
this.emit({ type: Type.Character, data: '\u003C' });
|
|
this.emit({ type: Type.Character, data: '\u002F' });
|
|
this.emit({ type: Type.EndOfFile });
|
|
break;
|
|
default: {
|
|
if (this.asciiAlpha(this.currentInputCharacter)) {
|
|
this.create({ type: Type.EndTag, name: '', attributes: new AttributeList() });
|
|
this.reconsumeIn(State.TagName);
|
|
break;
|
|
}
|
|
|
|
this.parseError('invalid-first-character-of-tag-name');
|
|
this.create({ type: Type.Comment, data: '' });
|
|
this.reconsumeIn(State.BogusComment);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.MarkupDeclarationOpen: {
|
|
if (this.matchNextFew('--')) {
|
|
this.consumeNextFew('--');
|
|
this.create({ type: Type.Comment, data: '' });
|
|
this.state = State.CommentStart;
|
|
} else if (this.matchNextFewCaseInsensitive('DOCTYPE')) {
|
|
this.consumeNextFewCaseInsensitive('DOCTYPE');
|
|
this.state = State.DOCTYPE;
|
|
} else if (this.matchNextFew('[CDATA[')) {
|
|
this.consumeNextFew('[CDATA[');
|
|
// NOTE: This parser will never be generated as part of the fragment parsing algorithm, as such the CDATA section state does not
|
|
// exist and will not be started here.
|
|
this.parseError('cdata-in-html-content');
|
|
this.create({ type: Type.Comment, data: '[CDATA[' });
|
|
this.state = State.BogusComment;
|
|
} else {
|
|
this.parseError('incorrectly-opened-comment');
|
|
this.create({ type: Type.Comment, data: '' });
|
|
this.state = State.BogusComment;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.DOCTYPE: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.BeforeDOCTYPEName; break;
|
|
case '\u003E': this.reconsumeIn(State.BeforeDOCTYPEName); break;
|
|
case undefined:
|
|
this.parseError('eof-in-doctype');
|
|
this.emit({ type: Type.DOCTYPE, forceQuirks: true });
|
|
this.emit({ type: Type.EndOfFile });
|
|
break;
|
|
default:
|
|
this.parseError('missing-whitespace-before-doctype-name');
|
|
this.reconsumeIn(State.BeforeDOCTYPEName);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.BeforeDOCTYPEName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.create({ type: Type.DOCTYPE, name: '\uFFFD' });
|
|
this.state = State.DOCTYPEName;
|
|
break;
|
|
case undefined:
|
|
this.parseError('eof-in-doctype');
|
|
this.emit({ type: Type.DOCTYPE, forceQuirks: true });
|
|
this.emit({ type: Type.EndOfFile });
|
|
break;
|
|
default: {
|
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
|
this.create({ type: Type.DOCTYPE, name: this.currentInputCharacter.toLowerCase()});
|
|
this.state = State.DOCTYPEName;
|
|
break;
|
|
}
|
|
|
|
this.create({ type: Type.DOCTYPE, name: this.currentInputCharacter });
|
|
this.state = State.DOCTYPE;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.DOCTYPEName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.AfterDOCTYPEName; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(Type.DOCTYPE); break;
|
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(Type.DOCTYPE)!.name += '\uFFFD'; break;
|
|
case undefined:
|
|
this.parseError('eof-in-doctype');
|
|
this.currentOfType(Type.DOCTYPE).forceQuirks = true;
|
|
this.emitCurrentOfType(Type.DOCTYPE);
|
|
this.emit({ type: Type.EndOfFile });
|
|
break;
|
|
default: {
|
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
|
this.currentOfType(Type.DOCTYPE)!.name += this.currentInputCharacter.toLowerCase();
|
|
break;
|
|
}
|
|
|
|
this.currentOfType(Type.DOCTYPE)!.name += this.currentInputCharacter;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.TagName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag); break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).name += '\uFFFD';
|
|
break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
|
default: {
|
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).name += this.currentInputCharacter.toLowerCase();
|
|
break;
|
|
}
|
|
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).name += this.currentInputCharacter;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.BeforeAttributeName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': break;
|
|
case '\u002F':
|
|
case '\u003E':
|
|
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
|
|
case '\u003D': {
|
|
this.parseError('unexpected-equals-sign-before-attribute-name');
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.append({ name: this.currentInputCharacter, value: '' });
|
|
this.state = State.AttributeName;
|
|
break;
|
|
}
|
|
default: {
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.append({ name: '', value: '' });
|
|
this.reconsumeIn(State.AttributeName);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AttributeName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020':
|
|
case '\u002F':
|
|
case '\u003E':
|
|
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
|
|
case '\u003D': this.state = State.BeforeAttributeValue; break;
|
|
case '\u0000': this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.name += '\uFFFD';
|
|
break;
|
|
case '\u0022':
|
|
case '\u0027':
|
|
case '\u003C':
|
|
this.parseError('unexpected-character-in-attribute-name');
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.name += this.currentInputCharacter;
|
|
break;
|
|
default: {
|
|
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.name += this.currentInputCharacter.toLowerCase();
|
|
break;
|
|
}
|
|
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.name += this.currentInputCharacter;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AfterAttributeName: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': break;
|
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
|
case '\u003D': this.state = State.BeforeAttributeValue; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag); break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
|
default:
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.append({ name: '', value: '' });
|
|
this.reconsumeIn(State.AttributeName);
|
|
break;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.BeforeAttributeValue: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': break;
|
|
case '\u0022': this.state = State.AttributeValueDouble; break;
|
|
case '\u0027': this.state = State.AttributeValueSingle; break;
|
|
case '\u003E':
|
|
this.parseError('missing-attribute-value');
|
|
this.state = State.Data;
|
|
this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag);
|
|
break;
|
|
default:
|
|
this.reconsumeIn(State.AttributeValueUnquoted);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AttributeValueDouble: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0022': this.state = State.AfterAttributeValue; break;
|
|
case '\u0026': this.returnState = State.AttributeValueDouble; this.state = State.CharacterReference; break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += '\uFFFD';
|
|
break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AttributeValueSingle: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0027': this.state = State.AfterAttributeValue; break;
|
|
case '\u0026': this.returnState = State.AttributeValueSingle; this.state = State.CharacterReference; break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += '\uFFFD';
|
|
break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AttributeValueUnquoted: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
|
case '\u0026': this.returnState = State.AttributeValueUnquoted; this.state = State.CharacterReference; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag); break;
|
|
case '\u0000':
|
|
this.parseError('unexpected-null-character');
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += '\uFFFD';
|
|
break;
|
|
case '\u0022':
|
|
case '\u0027':
|
|
case '\u003C':
|
|
case '\u003D':
|
|
case '\u0060':
|
|
this.parseError('unexpected-character-in-unquoted-attribute-value');
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
|
break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AfterAttributeValue: {
|
|
switch (this.consumeNext()) {
|
|
case '\u0009':
|
|
case '\u000A':
|
|
case '\u000C':
|
|
case '\u0020': this.state = State.BeforeAttributeName; break;
|
|
case '\u002F': this.state = State.SelfClosingStartTag; break;
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(Type.StartTag, Type.EndTag); break;
|
|
case undefined: this.parseError('eof-in-tag'); this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.parseError('missing-whitespace-between-attributes'); this.reconsumeIn(State.BeforeAttributeName);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.CommentStart: {
|
|
switch (this.consumeNext()) {
|
|
case '\u002D': this.state = State.CommentStartDash; break;
|
|
case '\u003E': this.parseError('abrupt-closing-of-empty-comment'); this.state = State.Data; this.emitCurrentOfType(Type.Comment); break;
|
|
default: this.reconsumeIn(State.Comment);
|
|
}
|
|
|
|
break;
|
|
}
|
|
// FIXME: Possible improvement to https://html.spec.whatwg.org/multipage/parsing.html#comment-state (adding **current** in some places)
|
|
case State.Comment: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003C': this.currentOfType(Type.Comment).data += this.currentInputCharacter; this.state = State.CommentLessThanSign; break;
|
|
case '\u002D': this.state = State.CommentEndDash; break;
|
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(Type.Comment).data += '\uFFFD'; break;
|
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(Type.Comment); this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.currentOfType(Type.Comment).data += this.currentInputCharacter;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.CommentEndDash: {
|
|
switch (this.consumeNext()) {
|
|
case '\u002D': this.state = State.CommentEnd; break;
|
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(Type.Comment); this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.currentOfType(Type.Comment).data += '\u002D'; this.reconsumeIn(State.Comment);
|
|
}
|
|
|
|
break;
|
|
}
|
|
// Same as above fixme https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
|
|
case State.CommentEnd: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(Type.Comment); break;
|
|
case '\u0021': this.state = State.CommentEndBang; break;
|
|
case '\u002D': this.currentOfType(Type.Comment).data += '\u002D'; break;
|
|
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(Type.Comment); this.emit({ type: Type.EndOfFile }); break;
|
|
default: this.currentOfType(Type.Comment).data += '\u002D\u002D'; this.reconsumeIn(State.Comment);
|
|
}
|
|
|
|
break;
|
|
}
|
|
// Same as above https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
|
|
case State.BogusComment: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003E': this.state = State.Data; this.emitCurrentOfType(Type.Comment); break;
|
|
case undefined: this.emitCurrentOfType(Type.Comment); this.emit({ type: Type.EndOfFile }); break;
|
|
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(Type.Comment).data += '\uFFFD'; break;
|
|
default: this.currentOfType(Type.Comment).data += this.currentInputCharacter;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.CharacterReference: {
|
|
this.temporaryBuffer = '';
|
|
this.temporaryBuffer += '\u0026';
|
|
|
|
switch (this.consumeNext()) {
|
|
case '\u0023': this.temporaryBuffer += this.currentInputCharacter; this.state = State.NumericCharacterReference; break;
|
|
default: {
|
|
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
|
|
this.reconsumeIn(State.NamedCharacterReference);
|
|
break;
|
|
}
|
|
|
|
this.flushCodePointsConsumedAsCharacterReference();
|
|
this.reconsumeIn(this.returnState);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.NamedCharacterReference: {
|
|
let match = false;
|
|
|
|
for (const entry in entities) {
|
|
if (this.matchNextFew(entry)) {
|
|
match = true;
|
|
|
|
this.consumeNextFew(entry);
|
|
this.temporaryBuffer += entry;
|
|
|
|
if (this.consumedAsPartOfAnAttribute() && entry[entry.length - 1] !== '\u003B' && (this.next() === '\u003D' || this.asciiAlphanumeric(this.next() ?? ''))) {
|
|
this.flushCodePointsConsumedAsCharacterReference();
|
|
this.state = this.returnState;
|
|
break;
|
|
}
|
|
|
|
if (entry[entry.length - 1] !== '\u003B')
|
|
this.parseError('missing-semicolon-after-character-reference');
|
|
|
|
this.temporaryBuffer = '';
|
|
this.temporaryBuffer += entities[entry].characters;
|
|
this.flushCodePointsConsumedAsCharacterReference();
|
|
this.state = this.returnState;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!match) {
|
|
this.flushCodePointsConsumedAsCharacterReference();
|
|
this.state = State.AmbiguousAmpersand;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case State.AmbiguousAmpersand: {
|
|
switch (this.consumeNext()) {
|
|
case '\u003B': this.parseError('unknown-named-character-reference'); this.reconsumeIn(this.returnState); break;
|
|
default: {
|
|
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
|
|
if (this.consumedAsPartOfAnAttribute()) {
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.currentInputCharacter;
|
|
} else {
|
|
this.emit({ type: Type.Character, data: this.currentInputCharacter });
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
this.reconsumeIn(this.returnState);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
default: throw new Error(`FIXME (Tokenizer#iterate, Unimplemented state '${this.state}')`);
|
|
}
|
|
}
|
|
|
|
private flushCodePointsConsumedAsCharacterReference(): void {
|
|
if (this.consumedAsPartOfAnAttribute()) {
|
|
this.currentOfEitherType(Type.StartTag, Type.EndTag).attributes.current.value += this.temporaryBuffer;
|
|
return;
|
|
}
|
|
|
|
for (const codePoint of this.temporaryBuffer)
|
|
this.emit({ type: Type.Character, data: codePoint });
|
|
}
|
|
|
|
private consumedAsPartOfAnAttribute(): boolean {
|
|
return this.returnState === State.AttributeValueDouble || this.returnState === State.AttributeValueSingle || this.returnState === State.AttributeValueUnquoted;
|
|
}
|
|
|
|
private asciiAlphanumeric(input: string): boolean {
|
|
return this.asciiAlpha(input) || this.asciiDigit(input);
|
|
}
|
|
|
|
private asciiAlpha(input: string): boolean {
|
|
return this.asciiUpperAlpha(input) || this.asciiLowerAlpha(input);
|
|
}
|
|
|
|
private asciiUpperAlpha(input: string): boolean {
|
|
return /[^\u0041-\u005A]/.test(input);
|
|
}
|
|
|
|
private asciiLowerAlpha(input: string): boolean {
|
|
return /[^\u0061-\u007A]/.test(input);
|
|
}
|
|
|
|
private asciiDigit(input: string): boolean {
|
|
return /[^\u0030-\u0030]/.test(input);
|
|
}
|
|
|
|
private reconsumeIn(state: State): void {
|
|
this.pointer--;
|
|
this.state = state;
|
|
this.spin();
|
|
}
|
|
|
|
private parseError(error: ParseError): void {
|
|
console.error('Parse error: ' + error);
|
|
}
|
|
|
|
private consumeNext(): string | undefined {
|
|
this.currentInputCharacter = this.input[this.pointer];
|
|
this.pointer++;
|
|
|
|
return this.currentInputCharacter;
|
|
}
|
|
|
|
private next(): string | undefined {
|
|
return this.input[this.pointer];
|
|
}
|
|
|
|
private matchNextFew(input: string): boolean {
|
|
return this.input.substr(this.pointer, input.length) === input;
|
|
}
|
|
|
|
private matchNextFewCaseInsensitive(input: string): boolean {
|
|
return this.input.substr(this.pointer, input.length).toLowerCase() === input.toLowerCase();
|
|
}
|
|
|
|
private consumeNextFew(input: string): void {
|
|
for (let i = 0; i < input.length; i++) {
|
|
const consumed = this.consumeNext();
|
|
|
|
console.assert(consumed === input[i], {
|
|
message: `Tokenizer#consumeNextFew: Expected '${input[i]}' (${input} at ${i}), got ${consumed} instead`
|
|
});
|
|
}
|
|
}
|
|
|
|
private consumeNextFewCaseInsensitive(input: string): void {
|
|
for (let i = 0; i < input.length; i++) {
|
|
const consumed = this.consumeNext()?.toLowerCase();
|
|
|
|
console.assert(consumed === input[i].toLowerCase(), {
|
|
message: `Tokenizer#consumeNextFewCaseInsensitive: Expected '${input[i].toLowerCase()}' (${input.toLowerCase()} at ${i}), got ${consumed} instead`
|
|
});
|
|
}
|
|
}
|
|
|
|
private emit(token: Token): void {
|
|
this.tokens.push(token);
|
|
}
|
|
|
|
private emitCurrentOfType(type: Type): void {
|
|
console.assert(this.currentToken.type === type, {
|
|
message: `Tokenizer#emitCurrentOfType: Expected '${type}', got '${this.currentToken.type}' instead`
|
|
});
|
|
|
|
this.tokens.push(this.currentToken);
|
|
}
|
|
|
|
private emitCurrentOfEitherType(a: Type, b: Type): void {
|
|
console.assert(this.currentToken.type === a || this.currentToken.type === b, {
|
|
message: `Tokenizer#emitCurrentOfEitherType: Expected '${a}' or '${b}', got '${this.currentToken.type}' instead`
|
|
});
|
|
|
|
this.tokens.push(this.currentToken);
|
|
}
|
|
|
|
private currentOfType<T extends Type>(type: T): Token & { type: T } {
|
|
console.assert(this.currentToken.type === type, {
|
|
message: `Tokenizer#currentOfType: Expected '${type}', got '${this.currentToken.type}' instead`
|
|
});
|
|
|
|
return this.currentToken as Token & { type: T };
|
|
}
|
|
|
|
private currentOfEitherType<T extends Type, U extends Type>(a: T, b: U): Token & { type: T | U } {
|
|
console.assert(this.currentToken.type === a || this.currentToken.type === b, {
|
|
message: `Tokenizer#currentOfEitherType: Expected '${a}' or '${b}', got '${this.currentToken.type}' instead`
|
|
});
|
|
|
|
return this.currentToken as Token & { type: T };
|
|
}
|
|
|
|
private create(token: Token): Token {
|
|
return this.currentToken = token;
|
|
}
|
|
}
|