Everywhere: Move files into public and src directories

This commit is contained in:
networkException 2022-01-05 15:22:15 +01:00
parent fa85024316
commit b4d1a28824
42 changed files with 3865 additions and 3488 deletions

View file

@ -1,232 +0,0 @@
import { Palette } from "./highlighter/palette";
import { Node, Position } from "./highlighter/node";
import { State } from "./highlighter/state";
import { Token, Type } from "./tokenizer/token";
import { TODO, VERIFY, VERIFY_NOT_REACHED } from "../util/assertions.js";
export class Highlighter {
private state: State = State.Undefined;
private returnState!: State;
private currentToken!: Token;
private currentNode!: Node;
public nodes: Array<Node> = new Array<Node>();
private pointer: number = 0;
public finished: boolean = false;
public constructor(private tokens: Array<Token>) {
}
public spin(): void {
switch (this.state) {
case State.Undefined: {
switch (this.consumeNextTokenType()) {
case Type.Character: this.reconsumeIn(State.BeforePlain); break;
case Type.StartTag: this.reconsumeIn(State.StartTag); break;
case Type.EndTag: this.reconsumeIn(State.EndTag); break;
case Type.DOCTYPE: this.reconsumeIn(State.DOCTYPE); break;
case Type.Comment: this.reconsumeIn(State.Comment); break;
case Type.EndOfFile: this.finished = true; break;
default: TODO(`Unimplemented token type '${this.currentToken.type}'`);
}
break;
}
case State.BeforePlain: {
switch (this.consumeNextTokenType()) {
case Type.Character:
this.createNode({ position: { line: 0, character: 0 }, color: Palette.Plain, content: '' });
this.reconsumeIn(State.Plain);
break;
default: VERIFY_NOT_REACHED(this.currentToken.type);
}
break;
}
case State.Plain: {
switch (this.consumeNextTokenType()) {
case Type.Character: this.currentNode.content += this.currentTokenOfType(CharacterToken).data; break;
default:
this.emitNode(this.currentNode);
this.reconsumeIn(State.Undefined);
}
break;
}
case State.StartTag: {
switch (this.consumeNextTokenOfType(StartTagToken).name) {
case 'script': this.returnState = State.BeforeScript; break;
default: this.returnState = State.Undefined; break;
}
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: `<` });
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: this.currentTokenOfType(StartTagToken).name });
if (this.currentTokenOfType(StartTagToken).attributes.nonEmpty()) {
this.emitSpace({ line: 0, character: 0 });
this.reconsumeIn(State.Attributes);
}
this.reconsumeIn(State.AfterAttributes);
this.state = this.returnState;
break;
}
case State.EndTag: {
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '</' });
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: this.consumeNextTokenOfType(EndTagToken).name });
this.reconsumeIn(State.AfterAttributes);
this.state = State.Undefined;
break;
}
case State.Attributes: {
const attributes = this.consumeNextTokenOfEitherType(StartTagToken, EndTagToken).attributes.list;
for (let i = 0; i < attributes.length; i++) {
const attribute = attributes[i];
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Attribute, content: attribute.name });
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '=' });
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.String, content: `"${attribute.value}"` });
if (i !== attributes.length - 1) this.emitSpace({ line: 0, character: 0 });
}
break;
}
case State.AfterAttributes: {
switch (this.consumeNextTokenType()) {
case Type.StartTag:
// FIXME: StartTagToken does not support selfClosing as of now
// if (this.currentTokenOfType(StartTagToken).selfClosing === undefined) {
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '>' });
// } else {
// this.emitSpace({ line: 0, character: 0 });
// this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '/>' });
// }
break;
case Type.EndTag:
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '>' });
break;
default: VERIFY_NOT_REACHED(this.currentToken.type);
}
break;
}
case State.BeforeScript: {
switch (this.consumeNextTokenType()) {
case Type.Character:
this.createNode({ position: { line: 0, character: 0 }, color: Palette.String, content: '' });
this.reconsumeIn(State.Script);
break;
case Type.EndTag: this.reconsumeIn(State.EndTag); break;
default: VERIFY_NOT_REACHED(this.currentToken.type);
}
break;
}
case State.Script: {
switch (this.consumeNextTokenType()) {
case Type.Character: this.currentNode.content += this.currentTokenOfType(CharacterToken).data; break;
default:
this.emitNode(this.currentNode);
this.reconsumeIn(State.Undefined);
}
break;
}
case State.DOCTYPE: {
const doctype = this.consumeNextTokenOfType(DOCTYPEToken);
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '<!' });
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Tag, content: 'DOCTYPE' });
this.emitSpace({ line: 0, character: 0 });
// FIXME: Implement more doctype values
if (doctype.name !== undefined) this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Attribute, content: doctype.name })
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Punctuator, content: '>' });
this.state = State.Undefined;
break;
}
case State.Comment:
this.emitNode({ position: { line: 0, character: 0 }, color: Palette.Comment, content: `<!--${this.consumeNextTokenOfType(CommentToken).data}-->` });
this.state = State.Undefined;
break;
default: TODO(`Unimplemented state '${this.state}'`);
}
}
private emitNode(node: Node): void {
this.nodes.push(node);
}
private emitSpace(position: Position): void {
this.nodes.push({ position, color: Palette.Plain, content: ' ' });
}
private createNode(node: Node): Node {
return this.currentNode = node;
}
private consumeNextTokenOfType<T extends Token>(type: Constructor<T>): T {
this.currentToken = this.tokens[this.pointer];
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
this.pointer++;
return this.currentToken;
}
private consumeNextTokenOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): T | U {
this.currentToken = this.tokens[this.pointer];
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
this.pointer++;
return this.currentToken;
}
private consumeNextTokenType(): Type {
this.currentToken = this.tokens[this.pointer];
this.pointer++;
return this.currentToken?.type;
}
private consumeNextToken(): Token {
this.currentToken = this.tokens[this.pointer];
this.pointer++;
return this.currentToken;
}
private currentTokenOfType<T extends Token>(type: Constructor<T>): T {
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
return this.currentToken;
}
private currentTokenOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): T | U {
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
return this.currentToken;
}
private reconsumeIn(state: State): void {
this.pointer--;
this.state = state;
this.spin();
}
}

View file

@ -1,12 +0,0 @@
import { Palette } from "./palette"
export type Node = {
content: string;
position: Position;
color: Palette;
}
export type Position = {
line: number;
character: number;
}

View file

@ -1,8 +0,0 @@
export const enum Palette {
Plain = '#a6accd',
Punctuator = '#89ddff',
Tag = '#f07178',
Attribute = '#c792ea',
String = '#c3e88d',
Comment = '#676e95',
}

View file

@ -1,13 +0,0 @@
export const enum State {
Undefined = 'undefined',
BeforePlain = 'before plain',
Plain = 'plain',
StartTag = 'start tag',
EndTag = 'end tag',
Attributes = 'attributes',
BeforeScript = 'before script',
AfterAttributes = 'after attributes',
DOCTYPE = 'DOCTYPE',
Script = 'script',
Comment = 'comment'
}

View file

@ -1,657 +0,0 @@
import { TODO, VERIFY, VERIFY_NOT_REACHED } from "../util/assertions.js";
import { Constructor } from "../util/guards.js";
import { ParseError } from "./errors.js";
import { entities } from "./tokenizer/entities.js";
import { State } from "./tokenizer/state.js";
import { Attribute, CharacterToken, CommentToken, DOCTYPEToken, EndOfFileToken, EndTagToken, Position, StartTagToken, Token } from "./tokenizer/token.js";
export class Tokenizer {
private state: State = State.Data;
private returnState!: State;
private temporaryBuffer!: string;
private currentToken!: Token;
private currentInputCharacter!: string;
private currentPosition: Position = { line: 0, column: 0, index: 0 };
public tokens: Array<Token> = new Array<Token>();
private pointer: number = 0;
public constructor(private input: string) {
}
public spin(): void {
switch (this.state) {
case State.Data: {
switch (this.consumeNext()) {
case '\u0026':
this.returnState = State.Data;
this.state = State.CharacterReference;
break;
case '\u003C': this.state = State.TagOpen; break;
case '\u0000':
this.parseError('unexpected-null-character');
this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
break;
case undefined: this.emit(EndOfFileToken.create()); break;
default: this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
}
break;
}
case State.RCDATA: {
switch (this.consumeNext()) {
case '\u003C': this.state = State.RAWTEXTLessThan; break;
case '\u0000': this.parseError('unexpected-null-character'); this.emit(CharacterToken.createReplacementCharacter().at(this.currentPosition)); break;
case undefined: this.emit(EndOfFileToken.create()); break;
default: this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
}
break;
}
case State.TagOpen: {
switch (this.consumeNext()) {
case '\u0021': this.state = State.MarkupDeclarationOpen; break;
case '\u002F': this.state = State.EndTagOpen; break;
case '\u003F':
this.parseError('unexpected-question-mark-instead-of-tag-name');
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
this.reconsumeIn(State.BogusComment);
break;
case undefined:
this.parseError('eof-before-tag-name');
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
this.emit(EndOfFileToken.create());
break;
default: {
if (this.asciiAlpha(this.currentInputCharacter)) {
this.create(StartTagToken.createEmpty().startingAt(this.currentPosition));
this.reconsumeIn(State.TagName);
break;
}
this.parseError('invalid-first-character-of-tag-name');
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
this.reconsumeIn(State.Data);
}
}
break;
}
case State.EndTagOpen: {
switch (this.consumeNext()) {
case '\u003E': this.parseError('missing-end-tag-name'); this.state = State.Data; break;
case undefined:
this.parseError('eof-before-tag-name');
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
this.emit(CharacterToken.createWith('\u002F').at(this.currentPosition));
this.emit(EndOfFileToken.create());
break;
default: {
if (this.asciiAlpha(this.currentInputCharacter)) {
this.create(EndTagToken.createEmpty().startingAt(this.currentPosition));
this.reconsumeIn(State.TagName);
break;
}
this.parseError('invalid-first-character-of-tag-name');
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
this.reconsumeIn(State.BogusComment);
}
}
break;
}
case State.MarkupDeclarationOpen: {
if (this.matchNextFew('--')) {
this.consumeNextFew('--');
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
this.state = State.CommentStart;
} else if (this.matchNextFewCaseInsensitive('DOCTYPE')) {
this.consumeNextFewCaseInsensitive('DOCTYPE');
this.state = State.DOCTYPE;
} else if (this.matchNextFew('[CDATA[')) {
this.consumeNextFew('[CDATA[');
// NOTE: This parser will never be generated as part of the fragment parsing algorithm, as such the CDATA section state does not
// exist and will not be started here.
this.parseError('cdata-in-html-content');
this.create(CommentToken.createWith('[CDATA[').startingAt(this.currentPosition));
this.state = State.BogusComment;
} else {
this.parseError('incorrectly-opened-comment');
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
this.state = State.BogusComment;
}
break;
}
case State.DOCTYPE: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.BeforeDOCTYPEName; break;
case '\u003E': this.reconsumeIn(State.BeforeDOCTYPEName); break;
case undefined:
this.parseError('eof-in-doctype');
this.emit(DOCTYPEToken.createWithForcedQuirks().at(this.currentPosition));
this.emit(EndOfFileToken.create());
break;
default:
this.parseError('missing-whitespace-before-doctype-name');
this.reconsumeIn(State.BeforeDOCTYPEName);
}
break;
}
case State.BeforeDOCTYPEName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': break;
case '\u0000':
this.parseError('unexpected-null-character');
this.create(DOCTYPEToken.createWithName('\uFFFD').startingAt(this.currentPosition));
this.state = State.DOCTYPEName;
break;
case undefined:
this.parseError('eof-in-doctype');
this.emit(DOCTYPEToken.createWithForcedQuirks().at(this.currentPosition));
this.emit(EndOfFileToken.create());
break;
default: {
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter.toLowerCase()).startingAt(this.currentPosition));
this.state = State.DOCTYPEName;
break;
}
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter).startingAt(this.currentPosition));
this.state = State.DOCTYPEName;
}
}
break;
}
case State.DOCTYPEName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.AfterDOCTYPEName; break;
case '\u003E': this.state = State.Data; this.emitCurrentOfType(DOCTYPEToken); break;
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(DOCTYPEToken).appendReplacementCharacterToName(); break;
case undefined:
this.parseError('eof-in-doctype');
this.currentOfType(DOCTYPEToken).forceQuirks = true;
this.emitCurrentOfType(DOCTYPEToken);
this.emit(EndOfFileToken.create());
break;
default: {
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter.toLowerCase());
break;
}
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter);
}
}
break;
}
case State.TagName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.BeforeAttributeName; break;
case '\u002F': this.state = State.SelfClosingStartTag; break;
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
case '\u0000':
this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).appendReplacementCharacterToName();
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: {
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter.toLowerCase());
break;
}
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter);
}
}
break;
}
case State.BeforeAttributeName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': break;
case '\u002F':
case '\u003E':
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
case '\u003D': {
this.parseError('unexpected-equals-sign-before-attribute-name');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyValue(this.currentInputCharacter));
this.state = State.AttributeName;
break;
}
default: {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyNameAndValue());
this.reconsumeIn(State.AttributeName);
}
}
break;
}
case State.AttributeName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020':
case '\u002F':
case '\u003E':
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
case '\u003D': this.state = State.BeforeAttributeValue; break;
case '\u0000': this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToName();
break;
case '\u0022':
case '\u0027':
case '\u003C':
this.parseError('unexpected-character-in-attribute-name');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter);
break;
default: {
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter.toLowerCase());
break;
}
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter);
}
}
break;
}
case State.AfterAttributeName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': break;
case '\u002F': this.state = State.SelfClosingStartTag; break;
case '\u003D': this.state = State.BeforeAttributeValue; break;
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default:
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyNameAndValue());
this.reconsumeIn(State.AttributeName);
break;
}
break;
}
case State.BeforeAttributeValue: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': break;
case '\u0022': this.state = State.AttributeValueDouble; break;
case '\u0027': this.state = State.AttributeValueSingle; break;
case '\u003E':
this.parseError('missing-attribute-value');
this.state = State.Data;
this.emitCurrentOfEitherType(StartTagToken, EndTagToken);
break;
default:
this.reconsumeIn(State.AttributeValueUnquoted);
}
break;
}
case State.AttributeValueDouble: {
switch (this.consumeNext()) {
case '\u0022': this.state = State.AfterAttributeValue; break;
case '\u0026': this.returnState = State.AttributeValueDouble; this.state = State.CharacterReference; break;
case '\u0000':
this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
}
break;
}
case State.AttributeValueSingle: {
switch (this.consumeNext()) {
case '\u0027': this.state = State.AfterAttributeValue; break;
case '\u0026': this.returnState = State.AttributeValueSingle; this.state = State.CharacterReference; break;
case '\u0000':
this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
}
break;
}
case State.AttributeValueUnquoted: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.BeforeAttributeName; break;
case '\u0026': this.returnState = State.AttributeValueUnquoted; this.state = State.CharacterReference; break;
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
case '\u0000':
this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
break;
case '\u0022':
case '\u0027':
case '\u003C':
case '\u003D':
case '\u0060':
this.parseError('unexpected-character-in-unquoted-attribute-value');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
}
break;
}
case State.AfterAttributeValue: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.BeforeAttributeName; break;
case '\u002F': this.state = State.SelfClosingStartTag; break;
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: this.parseError('missing-whitespace-between-attributes'); this.reconsumeIn(State.BeforeAttributeName);
}
break;
}
case State.CommentStart: {
switch (this.consumeNext()) {
case '\u002D': this.state = State.CommentStartDash; break;
case '\u003E': this.parseError('abrupt-closing-of-empty-comment'); this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
default: this.reconsumeIn(State.Comment);
}
break;
}
// FIXME: Possible improvement to https://html.spec.whatwg.org/multipage/parsing.html#comment-state (adding **current** in some places)
case State.Comment: {
switch (this.consumeNext()) {
case '\u003C': this.currentOfType(CommentToken).append(this.currentInputCharacter); this.state = State.CommentLessThanSign; break;
case '\u002D': this.state = State.CommentEndDash; break;
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(CommentToken).appendReplacementCharacter(); break;
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
default: this.currentOfType(CommentToken).append(this.currentInputCharacter);
}
break;
}
case State.CommentEndDash: {
switch (this.consumeNext()) {
case '\u002D': this.state = State.CommentEnd; break;
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
default: this.currentOfType(CommentToken).append('\u002D'); this.reconsumeIn(State.Comment);
}
break;
}
// Same as above fixme https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
case State.CommentEnd: {
switch (this.consumeNext()) {
case '\u003E': this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
case '\u0021': this.state = State.CommentEndBang; break;
case '\u002D': this.currentOfType(CommentToken).append('\u002D'); break;
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
default: this.currentOfType(CommentToken).append('\u002D\u002D'); this.reconsumeIn(State.Comment);
}
break;
}
// Same as above https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
case State.BogusComment: {
switch (this.consumeNext()) {
case '\u003E': this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
case undefined: this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(CommentToken).appendReplacementCharacter(); break;
default: this.currentOfType(CommentToken).append(this.currentInputCharacter);
}
break;
}
case State.CharacterReference: {
this.temporaryBuffer = '';
this.temporaryBuffer += '\u0026';
switch (this.consumeNext()) {
case '\u0023': this.temporaryBuffer += this.currentInputCharacter; this.state = State.NumericCharacterReference; break;
default: {
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
this.reconsumeIn(State.NamedCharacterReference);
break;
}
this.flushCodePointsConsumedAsCharacterReference();
this.reconsumeIn(this.returnState);
}
}
break;
}
case State.NamedCharacterReference: {
let match = false;
for (const entry in entities) {
if (this.matchNextFew(entry)) {
match = true;
this.consumeNextFew(entry);
this.temporaryBuffer += entry;
if (this.consumedAsPartOfAnAttribute() && entry[entry.length - 1] !== '\u003B' && (this.next() === '\u003D' || this.asciiAlphanumeric(this.next() ?? ''))) {
this.flushCodePointsConsumedAsCharacterReference();
this.state = this.returnState;
break;
}
if (entry[entry.length - 1] !== '\u003B')
this.parseError('missing-semicolon-after-character-reference');
this.temporaryBuffer = '';
this.temporaryBuffer += entities[entry].characters;
this.flushCodePointsConsumedAsCharacterReference();
this.state = this.returnState;
break;
}
}
if (!match) {
this.flushCodePointsConsumedAsCharacterReference();
this.state = State.AmbiguousAmpersand;
}
break;
}
case State.AmbiguousAmpersand: {
switch (this.consumeNext()) {
case '\u003B': this.parseError('unknown-named-character-reference'); this.reconsumeIn(this.returnState); break;
default: {
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
if (this.consumedAsPartOfAnAttribute()) {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
} else {
this.emit(CharacterToken.createWith(this.currentInputCharacter));
}
break;
}
this.reconsumeIn(this.returnState);
}
}
break;
}
default: TODO(`Unimplemented state '${this.state}'`);
}
}
private flushCodePointsConsumedAsCharacterReference(): void {
if (this.consumedAsPartOfAnAttribute()) {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.temporaryBuffer);
return;
}
for (const codePoint of this.temporaryBuffer)
this.emit(CharacterToken.createWith(codePoint));
}
private consumedAsPartOfAnAttribute(): boolean {
return this.returnState === State.AttributeValueDouble || this.returnState === State.AttributeValueSingle || this.returnState === State.AttributeValueUnquoted;
}
private asciiAlphanumeric(input: string): boolean {
return this.asciiAlpha(input) || this.asciiDigit(input);
}
private asciiAlpha(input: string): boolean {
return this.asciiUpperAlpha(input) || this.asciiLowerAlpha(input);
}
private asciiUpperAlpha(input: string): boolean {
return /[\u0041-\u005A]/.test(input);
}
private asciiLowerAlpha(input: string): boolean {
return /[\u0061-\u007A]/.test(input);
}
private asciiDigit(input: string): boolean {
return /[\u0030-\u0030]/.test(input);
}
private reconsumeIn(state: State): void {
this.pointer--;
this.state = state;
this.spin();
}
private parseError(error: ParseError): void {
console.error('Parse error: ' + error);
}
private consumeNext(): string | undefined {
this.currentInputCharacter = this.input[this.pointer];
this.pointer++;
this.currentPosition.column++;
this.currentPosition.index++;
if (this.currentInputCharacter === '\n') {
this.currentPosition.column = 0;
this.currentPosition.line++;
}
return this.currentInputCharacter;
}
private next(): string | undefined {
return this.input[this.pointer];
}
private matchNextFew(input: string): boolean {
return this.input.substr(this.pointer, input.length) === input;
}
private matchNextFewCaseInsensitive(input: string): boolean {
return this.input.substr(this.pointer, input.length).toLowerCase() === input.toLowerCase();
}
private consumeNextFew(input: string): void {
for (let i = 0; i < input.length; i++) {
const consumed = this.consumeNext();
VERIFY(consumed === input[i], `Expected '${input[i]}' (${input} at ${i}), got ${consumed} instead`);
}
}
private consumeNextFewCaseInsensitive(input: string): void {
for (let i = 0; i < input.length; i++) {
const consumed = this.consumeNext()?.toLowerCase();
VERIFY(consumed === input[i].toLowerCase(), `Expected '${input[i].toLowerCase()}' (${input.toLowerCase()} at ${i}), got ${consumed} instead`);
}
}
private emit(token: Token): void {
this.populateRangeOnEmit(token);
this.tokens.push(token);
}
private emitCurrentOfType(type: Constructor<Token>): void {
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
this.populateRangeOnEmit(this.currentToken);
this.tokens.push(this.currentToken);
}
private emitCurrentOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): void {
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
this.populateRangeOnEmit(this.currentToken);
this.tokens.push(this.currentToken);
}
private currentOfType<T extends Token>(type: Constructor<T>): T {
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
this.populateRangeOnEmit(this.currentToken);
return this.currentToken;
}
private currentOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): T | U {
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
this.populateRangeOnEmit(this.currentToken);
return this.currentToken;
}
private populateRangeOnEmit(token: Token): void {
if (token.range.start === undefined && token.range.end === undefined)
token.at(this.currentPosition);
if (token.range.start !== undefined && token.range.end === undefined)
token.endingAt(this.currentPosition);
if (token.range.start === undefined && token.range.end !== undefined)
VERIFY_NOT_REACHED();
}
private create(token: Token): Token {
if (token.range.start === undefined)
token.startingAt(this.currentPosition);
return this.currentToken = token;
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,279 +0,0 @@
import { VERIFY, VERIFY_NOT_REACHED } from "../../util/assertions.js";
export const enum Type {
DOCTYPE = 'DOCTYPE',
StartTag = 'start tag',
EndTag = 'end tag',
Comment = 'comment',
Character = 'character',
EndOfFile = 'end-of-file'
}
export const REPLACEMENT_CHARACTER = '\uFFFD';
export type Range = {
start: Position,
end: Position
}
export type Position = {
line: number,
column: number,
index: number
}
export class Attribute {
public name: string;
public value: string;
public constructor(name: string, value: string) {
this.name = name;
this.value = value;
}
public appendToName(characters: string): void {
this.name += characters;
}
public appendReplacementCharacterToName(): void {
this.appendToName(REPLACEMENT_CHARACTER);
}
public appendToValue(characters: string): void {
this.value += characters;
}
public appendReplacementCharacterToValue(): void {
this.appendToValue(REPLACEMENT_CHARACTER);
}
public static createWithEmptyNameAndValue(): Attribute {
return new Attribute('', '');
}
public static createWithEmptyValue(name: string): Attribute {
return new Attribute(name, '');
}
}
export class AttributeList {
private attributes: Array<Attribute>;
public constructor() {
this.attributes = new Array<Attribute>();
}
public get current(): Attribute {
return this.attributes[this.attributes.length - 1];
}
public get list(): Array<Attribute> {
return this.attributes;
}
public nonEmpty(): boolean {
return this.list.length !== 0;
}
public append(attribute: Attribute): void {
this.attributes.push(attribute);
}
}
export abstract class Token {
#type: Type;
#range!: Range;
protected constructor(type: Type) {
this.#type = type;
// @ts-expect-error
this.#range = {};
}
public startingAt(position: Position): this {
this.#range.start = { line: position.line, column: position.column, index: position.index };
return this;
}
public endingAt(position: Position): this {
this.#range.end = { line: position.line, column: position.column, index: position.index };
return this;
}
public at(position: Position): this {
this.#range.start = { line: position.line, column: position.column, index: position.index };
this.#range.end = { line: position.line, column: position.column, index: position.index };
return this;
}
public get range(): Range {
return this.#range;
}
public get type(): Type {
return this.#type;
}
}
export class CharacterToken extends Token {
public readonly data: NonNullable<string>;
public constructor(data: NonNullable<string>) {
super(Type.Character);
this.data = data;
}
public static createWith(data: NonNullable<string>): CharacterToken {
return new CharacterToken(data);
}
public static createReplacementCharacter(): CharacterToken {
return new CharacterToken(REPLACEMENT_CHARACTER);
}
}
export class CommentToken extends Token {
public data: NonNullable<string>;
public constructor(data: NonNullable<string>) {
super(Type.Comment);
this.data = data;
}
public append(characters: string): void {
this.data += characters;
}
public appendReplacementCharacter(): void {
this.append(REPLACEMENT_CHARACTER);
}
public static createEmpty(): CommentToken {
return new CommentToken('');
}
public static createWith(data: string): CommentToken {
return new CommentToken(data);
}
}
export class EndOfFileToken extends Token {
public constructor() {
super(Type.EndOfFile);
}
public static create(): EndOfFileToken {
return new EndOfFileToken();
}
}
export class StartTagToken extends Token {
public name: NonNullable<string>;
public readonly attributes: AttributeList;
public constructor(name: NonNullable<string>, attributes: AttributeList) {
super(Type.StartTag);
this.name = name;
this.attributes = attributes;
}
public appendToName(characters: string): void {
this.name += characters;
}
public appendReplacementCharacterToName(): void {
this.appendToName(REPLACEMENT_CHARACTER);
}
public static createEmpty(): StartTagToken {
return new StartTagToken('', new AttributeList());
}
}
export class EndTagToken extends Token {
public name: NonNullable<string>;
public readonly attributes: AttributeList;
public constructor(name: NonNullable<string>, attributes: AttributeList) {
super(Type.EndTag);
this.name = name;
this.attributes = attributes;
}
public appendToName(characters: string): void {
this.name += characters;
}
public appendReplacementCharacterToName(): void {
this.appendToName(REPLACEMENT_CHARACTER);
}
public static createEmpty(): EndTagToken {
return new EndTagToken('', new AttributeList());
}
}
export class DOCTYPEToken extends Token {
public name?: string;
public publicIdentifier?: string;
public systemIdentifier?: string;
public forceQuirks?: true;
public constructor(name?: string, publicIdentifier?: string, systemIdentifier?: string, forceQuirks?: true) {
super(Type.DOCTYPE);
this.name = name;
this.publicIdentifier = publicIdentifier;
this.systemIdentifier = systemIdentifier;
this.forceQuirks = forceQuirks;
}
public appendToName(characters: string): void {
VERIFY(this.name !== undefined);
this.name += characters;
}
public appendReplacementCharacterToName(): void {
this.appendToName(REPLACEMENT_CHARACTER);
}
public static createWithForcedQuirks(): DOCTYPEToken {
return new DOCTYPEToken(undefined, undefined, undefined, true);
}
public static createWithName(name: string): DOCTYPEToken {
return new DOCTYPEToken(name, undefined, undefined, undefined);
}
}
export function stringify(token: Token): string {
if (token instanceof CharacterToken) return token.data;
if (token instanceof CommentToken) return `<!--${token.data}-->`;
if (token instanceof DOCTYPEToken) return `<!DOCTYPE ${token.name}>`;
if (token instanceof EndOfFileToken) return 'EOF';
if (token instanceof EndTagToken) return `</${token.name}>`;
if (token instanceof StartTagToken) {
let string = `<${token.name}`;
for (const attribute of token.attributes.list)
string += ` ${attribute.name}="${attribute.value}"`;
// TODO: Implemement selfClosing
// if (token.selfClosing) return `${string} />`;
return `${string}>`;
}
VERIFY_NOT_REACHED(token.constructor.name);
return '';
}

View file

@ -1,25 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>nwex.de</title>
</head>
<body style="background: #292D3E;">
<h1>networkException</h1>
<p></p>
<script type="module">
import { tokenize, normalizeNewlines, highlight } from './html.js';
import { render } from './view.js';
const response = await fetch(window.location.href);
const text = await response.text();
const tokens = tokenize(normalizeNewlines(text));
const nodes = highlight(tokens);
document.body.replaceChildren(render(nodes));
</script>
</body>
</html>

View file

@ -3,7 +3,7 @@
"version": "1.0.0",
"description": "Landing page for nwex.de",
"scripts": {
"start": "concurrently --kill-others 'tsc -p tsconfig.json --watch' 'reload -b'"
"start": "concurrently --kill-others 'tsc -p tsconfig.json --watch' 'reload -b -d public'"
},
"repository": {
"type": "git",

69
public/index.html Normal file
View file

@ -0,0 +1,69 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<link rel="stylesheet" href="style/index.css">
<title>nwex.de</title>
</head>
<body>
<h1>networkException</h1>
<h2>try to catch(this: Exception);</h2>
<main>
I'm a TypeScript developer working on backend code, libraries and anything that scales.
</main>
<section>
<h3>Projects I maintain in my free time</h3>
<ul>
<li><a href="https://github.com/t2linux/wiki">The t2linux.org wiki - A project to run Linux on Apple T2 devices</a></li>
<li><a href="https://github.com/Eloston/ungoogled-chromium">Chromium sans integration with Google</a></li>
<li><a href="https://github.com/ungoogled-software/ungoogled-chromium-archlinux">Arch Linux packaging for ungoogled-chromium</a></li>
</ul>
</section>
<section>
<h3>Links</h3>
<ul>
<li><a href="https://github.com/networkException">github.com</a></li>
<li><a href="https://gitlab.upi.li/networkException">gitlab.upi.li</a></li>
<li><a href="https://twitter.com/netwrkException">twitter.com</a></li>
<li><a href="https://matrix.to/#/@networkexception:chat.upi.li">matrix.org</a></li>
<li><a href="https://chaos.social/@networkexception">mastodon.social</a></li>
<li><a href="mailto:hello@nwex.de">email</a></li>
<li><a href="/gpg.key">My GPG key</a></li>
</ul>
</section>
<section>
<h3>This website</h3>
<p>
Although I also know my way around frontend development and design as well, I'm far less skilled at it.
As such this website is trying to impress in a different way:
</p>
<p>
It implements parts of the <a href="https://html.spec.whatwg.org/multipage/parsing.html#tokenization">HTML parser spec</a>
to tokenize and highlight it's own source code.
</p>
</section>
<!-- Inner workings on the page -->
<script type="module">
import { tokenize, normalizeNewlines, highlight } from './script/html.js';
import { render } from './script/view.js';
import { Inspector } from './script/html/inspector.js';
const response = await fetch(window.location.href);
const text = await response.text();
const tokens = tokenize(normalizeNewlines(text));
const spans = highlight(tokens);
const inspector = new Inspector();
render(text, spans, inspector);
</script>
</body>
</html>

33
public/style/index.css Normal file
View file

@ -0,0 +1,33 @@
body {
background: #292D3E;
font-family: monospace;
}
pre {
margin: 0;
}
body > pre > span {
cursor: default;
}
a {
color: white;
}
body {
color: white;
font-size: 1vmax;
}
#inspector {
position: fixed;
pointer-events: none;
background: inherit;
color: white;
border: 1px #424864 solid;
padding: .5vw;
}

View file

@ -1,7 +1,7 @@
import { Highlighter } from "./html/highlighter.js";
import { Node } from "./html/highlighter/node.js";
import { Tokenizer } from "./html/tokenizer.js";
import { Token, Type } from "./html/tokenizer/token.js";
import { Highlighter } from './html/highlighter.js';
import { Span } from './html/highlighter/span.js';
import { Tokenizer } from './html/tokenizer.js';
import { Token, Type } from './html/tokenizer/token.js';
export function normalizeNewlines(input: string): string {
return input.replaceAll('\u000D\u000A', '\u000A').replaceAll('\u000D', '\u000A');
@ -20,7 +20,7 @@ export function tokenize(input: string): Array<Token> {
return tokenizer.tokens;
}
export function highlight(tokens: Array<Token>): Array<Node> {
export function highlight(tokens: Array<Token>): Array<Span> {
const highlighter = new Highlighter(tokens);
console.time('html highlighter');
@ -30,5 +30,5 @@ export function highlight(tokens: Array<Token>): Array<Node> {
console.timeEnd('html highlighter');
return highlighter.nodes;
return highlighter.spans;
}

View file

@ -17,4 +17,4 @@ export type ParseError = 'unexpected-null-character' |
'abrupt-closing-of-empty-comment' |
'eof-in-comment' |
'missing-semicolon-after-character-reference' |
'unknown-named-character-reference';
'unknown-named-character-reference';

70
src/html/highlighter.ts Normal file
View file

@ -0,0 +1,70 @@
import { Color } from './highlighter/properties/color.js';
import { Cursor } from './highlighter/properties/cursor.js';
import { Font } from './highlighter/properties/font.js';
import { Link } from './highlighter/properties/link.js';
import { Span } from './highlighter/span.js';
import { Token } from './tokenizer/token.js';
import { CommentToken } from './tokenizer/tokens/comment.js';
import { DOCTYPEToken } from './tokenizer/tokens/doctype.js';
import { EndTagToken } from './tokenizer/tokens/endTag.js';
import { StartTagToken } from './tokenizer/tokens/startTag.js';
export class Highlighter {
public spans: Array<Span> = new Array<Span>();
public finished: boolean = false;
public constructor(private tokens: Array<Token>) {
}
public spin(): void {
for (const token of this.tokens) {
if (token instanceof CommentToken) {
this.spans.push(Span.createFromRange(token, token.range, Color.Comment));
}
if (token instanceof DOCTYPEToken) {
this.spans.push(Span.createFromRange(token, { start: token.range.start.copy().decrement(8), end: token.range.start }, Color.Tag));
this.spans.push(Span.createFromRange(token, { start: token.range.start, end: token.range.end }, Color.Attribute, Font.Italic));
this.spans.push(Span.createFromRange(token, { start: token.range.start.copy().decrement(10), end: token.range.start.copy().decrement(9) }, Color.Punctuator));
this.spans.push(Span.createAt(token, token.range.end, Color.Punctuator));
}
if (token instanceof StartTagToken || token instanceof EndTagToken) {
this.spans.push(Span.createFromRange(token, token.range, Color.Tag));
for (const attribute of token.attributes.list) {
this.spans.push(Span.createFromRange(attribute, attribute.nameRange, Color.Attribute, Font.Italic));
if (attribute.valueRange !== undefined) {
if (attribute.name === 'href') {
this.spans.push(Span.createAnchorFromRange(attribute, attribute.valueRange, Color.String, Font.Underline, Cursor.Pointer, Link.of(attribute.value)));
} else {
this.spans.push(Span.createFromRange(attribute, attribute.valueRange, Color.String));
}
if (attribute.quoted) {
this.spans.push(Span.createAt(attribute, attribute.valueRange.start, Color.Punctuator));
this.spans.push(Span.createAt(attribute, attribute.valueRange.end, Color.Punctuator));
}
}
if (attribute.equalsPosition !== undefined)
this.spans.push(Span.createAt(attribute, attribute.equalsPosition, Color.Punctuator));
}
if (token instanceof StartTagToken) {
this.spans.push(Span.createAt(token, token.range.start.copy().decrement(1), Color.Punctuator));
this.spans.push(Span.createAt(token, token.range.end, Color.Punctuator));
}
if (token instanceof EndTagToken) {
this.spans.push(Span.createFromRange(token, { start: token.range.start.copy().decrement(2), end: token.range.start.copy().decrement(1) }, Color.Punctuator));
this.spans.push(Span.createAt(token, token.range.end, Color.Punctuator));
}
}
}
this.finished = true;
}
}

View file

@ -0,0 +1,3 @@
export abstract class Inspectable {
public abstract inspect(indent: number): string;
}

View file

@ -0,0 +1,36 @@
import { Property } from '../property.js';
export class Color extends Property {
public static Plain = new Color('#a6accd');
public static Punctuator = new Color('#89ddff');
public static Tag = new Color('#f07178');
public static Attribute = new Color('#c792ea');
public static String = new Color('#c3e88d');
public static Comment = new Color('#676e95');
#color: string;
private constructor(color: string) {
super();
this.#color = color;
}
public get color(): string {
return this.#color;
}
public override equals(other: Property): boolean {
if (!(other instanceof Color)) return false;
return other.#color === this.#color;
}
public override apply(element: HTMLElement): void {
element.style.color = this.#color;
}
public override inspect(indent: number): string {
return `Color { ${this.#color} }`;
}
}

View file

@ -0,0 +1,28 @@
import { Property } from '../property.js';
export class Cursor extends Property {
public static Default = new Cursor('default');
public static Pointer = new Cursor('pointer');
#value: string;
private constructor(value: string) {
super();
this.#value = value;
}
public override equals(other: Property): boolean {
if (!(other instanceof Cursor)) return false;
return other.#value === this.#value;
}
public override apply(element: HTMLElement): void {
element.style.cursor = this.#value;
}
public override inspect(indent: number): string {
return `Cursor { ${this.#value} }`;
}
}

View file

@ -0,0 +1,30 @@
import { Property } from '../property.js';
export class Font extends Property {
public static Italic = new Font(style => style.fontStyle = 'italic', 'italic');
public static Underline = new Font(style => style.textDecoration = ' underline', 'underline');
#impelementation: (style: CSSStyleDeclaration) => void;
#value: string;
private constructor(impelementation: (style: CSSStyleDeclaration) => void, value: string) {
super();
this.#impelementation = impelementation;
this.#value = value;
}
public override equals(other: Property): boolean {
if (!(other instanceof Font)) return false;
return other.#impelementation === this.#impelementation;
}
public override apply(element: HTMLElement): void {
this.#impelementation(element.style);
}
public override inspect(indent: number): string {
return `Font { ${this.#value} }`;
}
}

View file

@ -0,0 +1,29 @@
import { Property } from '../property.js';
export class Link extends Property {
#href: string;
private constructor(href: string) {
super();
this.#href = href;
}
public override equals(other: Property): boolean {
if (!(other instanceof Link)) return false;
return other.#href === this.#href;
}
public override apply(element: HTMLAnchorElement): void {
element.href = this.#href;
}
public static of(href: string): Link {
return new Link(href);
}
public override inspect(indent: number): string {
return `Link { href: '${this.#href}' }`;
}
}

View file

@ -0,0 +1,6 @@
import { Inspectable } from './inspectable.js';
export abstract class Property extends Inspectable {
public abstract equals(other: Property): boolean;
public abstract apply(element: HTMLElement): void;
}

View file

@ -0,0 +1,72 @@
import { Attribute } from '../tokenizer/attribute.js';
import { Position } from '../tokenizer/position.js';
import { Range } from '../tokenizer/range.js';
import { Token } from '../tokenizer/token.js';
import { Inspectable } from './inspectable.js';
import { Property } from './property.js';
export type Source = Attribute | Token | null;
export class Span extends Inspectable {
#source: Source;
#from: Position;
#to: Position;
#properties: Array<Property>;
#tagName: keyof HTMLElementTagNameMap;
private constructor(source: Source, from: Position, to: Position, tagName: keyof HTMLElementTagNameMap, properties: Array<Property>) {
super();
this.#source = source;
this.#from = from;
this.#to = to;
this.#tagName = tagName;
this.#properties = properties;
}
public get source(): Source {
return this.#source;
}
public contains(index: number): boolean {
return this.#from.index <= index && this.#to.index >= index;
}
public get properties(): Array<Property> {
return this.#properties;
}
public get tagName(): keyof HTMLElementTagNameMap {
return this.#tagName;
}
public static createFromRange(source: Source, range: Range, ...properties: Array<Property>): Span {
return new Span(source, range.start, range.end, 'span', properties);
}
public static createAnchorFromRange(source: Source, range: Range, ...properties: Array<Property>): Span {
return new Span(source, range.start, range.end, 'a', properties);
}
public static createAt(source: Source, position: Position, ...properties: Array<Property>): Span {
return new Span(source, position, position, 'span', properties);
}
public static createAnchorAt(source: Source, position: Position, ...properties: Array<Property>): Span {
return new Span(source, position, position, 'a', properties);
}
public override inspect(indent: number): string {
let string = 'Span {\n';
string += ` from: ${this.#from.inspect(0)}\n`;
string += ` to: ${this.#to.inspect(0)}\n`;
string += ` properties: [ ${this.#properties.map(property => property.inspect(0)).join(', ')} ]\n`;
if (this.#source !== null) string += ` source: ${this.#source.inspect(0)}\n`;
string += '}';
return string;
}
}

48
src/html/inspector.ts Normal file
View file

@ -0,0 +1,48 @@
import { Color } from './highlighter/properties/color.js';
import { Span } from './highlighter/span.js';
export class Inspector {
#element: HTMLDivElement;
public constructor() {
this.#element = document.createElement('div');
this.#element.id = 'inspector';
document.body.appendChild(this.#element);
document.addEventListener('mousemove', event => this.#element.style.transform = `translate(${event.clientX + 10}px, ${event.clientY + 10}px)`);
}
public instrument(element: HTMLElement, spans: Array<Span>): void {
if (spans.length === 0)
return;
const container = document.createElement('pre');
container.textContent += spans[spans.length - 1].inspect(0);
element.addEventListener('mouseenter', () => {
element.style.background = Color.Comment.color;
this.show(container);
});
element.addEventListener('mouseleave', () => {
element.style.background = 'none';
this.hide();
});
}
public show(element: HTMLElement): void {
this.#element.style.display = 'block';
if (this.#element.children[0] !== element)
this.#element.replaceChildren(element);
}
public hide(): void {
this.#element.style.display = 'none';
}
public get element(): HTMLDivElement {
return this.#element;
}
}

698
src/html/tokenizer.ts Normal file
View file

@ -0,0 +1,698 @@
import { TODO, VERIFY, VERIFY_NOT_REACHED } from '../util/assertions.js';
import { Constructor } from '../util/guards.js';
import { ParseError } from './errors.js';
import { Attribute } from './tokenizer/attribute.js';
import { entities } from './tokenizer/entities.js';
import { Position } from './tokenizer/position.js';
import { State } from './tokenizer/state.js';
import { Token } from './tokenizer/token.js';
import { CharacterToken } from './tokenizer/tokens/character.js';
import { CommentToken } from './tokenizer/tokens/comment.js';
import { DOCTYPEToken } from './tokenizer/tokens/doctype.js';
import { EndOfFileToken } from './tokenizer/tokens/endOfFile.js';
import { EndTagToken } from './tokenizer/tokens/endTag.js';
import { StartTagToken } from './tokenizer/tokens/startTag.js';
export class Tokenizer {
private state: State = State.Data;
private returnState!: State;
private temporaryBuffer!: string;
private currentToken!: Token;
private currentInputCharacter!: string;
private currentPosition: Position = Position.createStarting();
public tokens: Array<Token> = new Array<Token>();
private pointer: number = 0;
public constructor(private input: string) {
}
public spin(): void {
switch (this.state) {
case State.Data: {
switch (this.consumeNext()) {
case '\u0026':
this.returnState = State.Data;
this.state = State.CharacterReference;
break;
case '\u003C': this.state = State.TagOpen; break;
case '\u0000':
this.parseError('unexpected-null-character');
this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
break;
case undefined: this.emit(EndOfFileToken.create()); break;
default: this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
}
break;
}
case State.RCDATA: {
switch (this.consumeNext()) {
case '\u003C': this.state = State.RAWTEXTLessThan; break;
case '\u0000': this.parseError('unexpected-null-character'); this.emit(CharacterToken.createReplacementCharacter().at(this.currentPosition)); break;
case undefined: this.emit(EndOfFileToken.create()); break;
default: this.emit(CharacterToken.createWith(this.currentInputCharacter).at(this.currentPosition));
}
break;
}
case State.TagOpen: {
switch (this.consumeNext()) {
case '\u0021': this.state = State.MarkupDeclarationOpen; break;
case '\u002F': this.state = State.EndTagOpen; break;
case '\u003F':
this.parseError('unexpected-question-mark-instead-of-tag-name');
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
this.reconsumeIn(State.BogusComment);
break;
case undefined:
this.parseError('eof-before-tag-name');
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
this.emit(EndOfFileToken.create());
break;
default: {
if (this.asciiAlpha(this.currentInputCharacter)) {
this.create(StartTagToken.createEmpty().startingAt(this.currentPosition));
this.reconsumeIn(State.TagName);
break;
}
this.parseError('invalid-first-character-of-tag-name');
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
this.reconsumeIn(State.Data);
}
}
break;
}
case State.EndTagOpen: {
switch (this.consumeNext()) {
case '\u003E': this.parseError('missing-end-tag-name'); this.state = State.Data; break;
case undefined:
this.parseError('eof-before-tag-name');
this.emit(CharacterToken.createWith('\u003C').at(this.currentPosition));
this.emit(CharacterToken.createWith('\u002F').at(this.currentPosition));
this.emit(EndOfFileToken.create());
break;
default: {
if (this.asciiAlpha(this.currentInputCharacter)) {
this.create(EndTagToken.createEmpty().startingAt(this.currentPosition));
this.reconsumeIn(State.TagName);
break;
}
this.parseError('invalid-first-character-of-tag-name');
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
this.reconsumeIn(State.BogusComment);
}
}
break;
}
case State.MarkupDeclarationOpen: {
if (this.matchNextFew('--')) {
this.consumeNextFew('--');
this.create(CommentToken.createEmpty().startingAt(this.currentPosition.copy().decrement(4)));
this.state = State.CommentStart;
} else if (this.matchNextFewCaseInsensitive('DOCTYPE')) {
this.consumeNextFewCaseInsensitive('DOCTYPE');
this.state = State.DOCTYPE;
} else if (this.matchNextFew('[CDATA[')) {
this.consumeNextFew('[CDATA[');
// NOTE: This parser will never be generated as part of the fragment parsing algorithm, as such the CDATA section state does not
// exist and will not be started here.
this.parseError('cdata-in-html-content');
this.create(CommentToken.createWith('[CDATA[').startingAt(this.currentPosition));
this.state = State.BogusComment;
} else {
this.parseError('incorrectly-opened-comment');
this.create(CommentToken.createEmpty().startingAt(this.currentPosition));
this.state = State.BogusComment;
}
break;
}
case State.DOCTYPE: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.BeforeDOCTYPEName; break;
case '\u003E': this.reconsumeIn(State.BeforeDOCTYPEName); break;
case undefined:
this.parseError('eof-in-doctype');
this.emit(DOCTYPEToken.createWithForcedQuirks().at(this.currentPosition));
this.emit(EndOfFileToken.create());
break;
default:
this.parseError('missing-whitespace-before-doctype-name');
this.reconsumeIn(State.BeforeDOCTYPEName);
}
break;
}
case State.BeforeDOCTYPEName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': break;
case '\u0000':
this.parseError('unexpected-null-character');
this.create(DOCTYPEToken.createWithName('\uFFFD').startingAt(this.currentPosition));
this.state = State.DOCTYPEName;
break;
case undefined:
this.parseError('eof-in-doctype');
this.emit(DOCTYPEToken.createWithForcedQuirks().at(this.currentPosition));
this.emit(EndOfFileToken.create());
break;
default: {
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter.toLowerCase()).startingAt(this.currentPosition));
this.state = State.DOCTYPEName;
break;
}
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter).startingAt(this.currentPosition));
this.state = State.DOCTYPEName;
}
}
break;
}
case State.DOCTYPEName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.AfterDOCTYPEName; break;
case '\u003E': this.state = State.Data; this.emitCurrentOfType(DOCTYPEToken); break;
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(DOCTYPEToken).appendReplacementCharacterToName(); break;
case undefined:
this.parseError('eof-in-doctype');
this.currentOfType(DOCTYPEToken).forceQuirks = true;
this.emitCurrentOfType(DOCTYPEToken);
this.emit(EndOfFileToken.create());
break;
default: {
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter.toLowerCase());
break;
}
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter);
}
}
break;
}
case State.TagName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.BeforeAttributeName; break;
case '\u002F': this.state = State.SelfClosingStartTag; break;
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
case '\u0000':
this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).appendReplacementCharacterToName();
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: {
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter.toLowerCase());
break;
}
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter);
}
}
break;
}
case State.BeforeAttributeName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': break;
case '\u002F':
case '\u003E':
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
case '\u003D': {
this.parseError('unexpected-equals-sign-before-attribute-name');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyValue(this.currentInputCharacter).startingNameAt(this.currentPosition));
this.state = State.AttributeName;
break;
}
default: {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyNameAndValue().startingNameAt(this.currentPosition));
this.reconsumeIn(State.AttributeName);
}
}
break;
}
case State.AttributeName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020':
case '\u002F':
case '\u003E':
case undefined: this.reconsumeIn(State.AfterAttributeName); break;
case '\u003D':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.endingNameAt(this.currentPosition.copy().decrement(1));
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.equalsAt(this.currentPosition);
this.state = State.BeforeAttributeValue;
break;
case '\u0000': this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToName();
break;
case '\u0022':
case '\u0027':
case '\u003C':
this.parseError('unexpected-character-in-attribute-name');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter);
break;
default: {
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter.toLowerCase());
break;
}
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter);
}
}
break;
}
case State.AfterAttributeName: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': break;
case '\u002F':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.endingNameAt(this.currentPosition);
this.state = State.SelfClosingStartTag;
break;
case '\u003D':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.endingNameAt(this.currentPosition);
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.equalsAt(this.currentPosition);
this.state = State.BeforeAttributeValue;
break;
case '\u003E':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.endingNameAt(this.currentPosition);
this.state = State.Data;
this.emitCurrentOfEitherType(StartTagToken, EndTagToken);
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default:
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.append(Attribute.createWithEmptyNameAndValue().startingNameAt(this.currentPosition));
this.reconsumeIn(State.AttributeName);
break;
}
break;
}
case State.BeforeAttributeValue: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': break;
case '\u0022':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.startingValueAt(this.currentPosition);
this.state = State.AttributeValueDouble;
break;
case '\u0027':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.startingValueAt(this.currentPosition);
this.state = State.AttributeValueSingle;
break;
case '\u003E':
this.parseError('missing-attribute-value');
this.state = State.Data;
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.endingNameAt(this.currentPosition);
this.emitCurrentOfEitherType(StartTagToken, EndTagToken);
break;
default:
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.startingValueAt(this.currentPosition);
this.reconsumeIn(State.AttributeValueUnquoted);
}
break;
}
case State.AttributeValueDouble: {
switch (this.consumeNext()) {
case '\u0022':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.endingValueAt(this.currentPosition);
this.state = State.AfterAttributeValue;
break;
case '\u0026': this.returnState = State.AttributeValueDouble; this.state = State.CharacterReference; break;
case '\u0000':
this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
}
break;
}
case State.AttributeValueSingle: {
switch (this.consumeNext()) {
case '\u0027':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.endingValueAt(this.currentPosition);
this.state = State.AfterAttributeValue;
break;
case '\u0026': this.returnState = State.AttributeValueSingle; this.state = State.CharacterReference; break;
case '\u0000':
this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
}
break;
}
case State.AttributeValueUnquoted: {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.setUnquoted();
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.BeforeAttributeName; break;
case '\u0026': this.returnState = State.AttributeValueUnquoted; this.state = State.CharacterReference; break;
case '\u003E':
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.endingValueAt(this.currentPosition);
this.state = State.Data;
this.emitCurrentOfEitherType(StartTagToken, EndTagToken);
break;
case '\u0000':
this.parseError('unexpected-null-character');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendReplacementCharacterToValue();
break;
case '\u0022':
case '\u0027':
case '\u003C':
case '\u003D':
case '\u0060':
this.parseError('unexpected-character-in-unquoted-attribute-value');
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
}
break;
}
case State.AfterAttributeValue: {
switch (this.consumeNext()) {
case '\u0009':
case '\u000A':
case '\u000C':
case '\u0020': this.state = State.BeforeAttributeName; break;
case '\u002F': this.state = State.SelfClosingStartTag; break;
case '\u003E': this.state = State.Data; this.emitCurrentOfEitherType(StartTagToken, EndTagToken); break;
case undefined: this.parseError('eof-in-tag'); this.emit(EndOfFileToken.create()); break;
default: this.parseError('missing-whitespace-between-attributes'); this.reconsumeIn(State.BeforeAttributeName);
}
break;
}
case State.CommentStart: {
switch (this.consumeNext()) {
case '\u002D': this.state = State.CommentStartDash; break;
case '\u003E': this.parseError('abrupt-closing-of-empty-comment'); this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
default: this.reconsumeIn(State.Comment);
}
break;
}
// FIXME: Possible improvement to https://html.spec.whatwg.org/multipage/parsing.html#comment-state (adding **current** in some places)
case State.Comment: {
switch (this.consumeNext()) {
case '\u003C': this.currentOfType(CommentToken).append(this.currentInputCharacter); this.state = State.CommentLessThanSign; break;
case '\u002D': this.state = State.CommentEndDash; break;
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(CommentToken).appendReplacementCharacter(); break;
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
default: this.currentOfType(CommentToken).append(this.currentInputCharacter);
}
break;
}
case State.CommentEndDash: {
switch (this.consumeNext()) {
case '\u002D': this.state = State.CommentEnd; break;
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
default: this.currentOfType(CommentToken).append('\u002D'); this.reconsumeIn(State.Comment);
}
break;
}
// Same as above fixme https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
case State.CommentEnd: {
switch (this.consumeNext()) {
case '\u003E': this.state = State.Data; this.emit(this.currentOfType(CommentToken).endingAt(this.currentPosition.copy().increment(1))); break;
case '\u0021': this.state = State.CommentEndBang; break;
case '\u002D': this.currentOfType(CommentToken).append('\u002D'); break;
case undefined: this.parseError('eof-in-comment'); this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
default: this.currentOfType(CommentToken).append('\u002D\u002D'); this.reconsumeIn(State.Comment);
}
break;
}
// Same as above https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
case State.BogusComment: {
switch (this.consumeNext()) {
case '\u003E': this.state = State.Data; this.emitCurrentOfType(CommentToken); break;
case undefined: this.emitCurrentOfType(CommentToken); this.emit(EndOfFileToken.create()); break;
case '\u0000': this.parseError('unexpected-null-character'); this.currentOfType(CommentToken).appendReplacementCharacter(); break;
default: this.currentOfType(CommentToken).append(this.currentInputCharacter);
}
break;
}
case State.CharacterReference: {
this.temporaryBuffer = '';
this.temporaryBuffer += '\u0026';
switch (this.consumeNext()) {
case '\u0023': this.temporaryBuffer += this.currentInputCharacter; this.state = State.NumericCharacterReference; break;
default: {
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
this.reconsumeIn(State.NamedCharacterReference);
break;
}
this.flushCodePointsConsumedAsCharacterReference();
this.reconsumeIn(this.returnState);
}
}
break;
}
case State.NamedCharacterReference: {
let match = false;
for (const entry in entities) {
if (this.matchNextFew(entry)) {
match = true;
this.consumeNextFew(entry);
this.temporaryBuffer += entry;
if (this.consumedAsPartOfAnAttribute() && entry[entry.length - 1] !== '\u003B' && (this.next() === '\u003D' || this.asciiAlphanumeric(this.next() ?? ''))) {
this.flushCodePointsConsumedAsCharacterReference();
this.state = this.returnState;
break;
}
if (entry[entry.length - 1] !== '\u003B')
this.parseError('missing-semicolon-after-character-reference');
this.temporaryBuffer = '';
this.temporaryBuffer += entities[entry].characters;
this.flushCodePointsConsumedAsCharacterReference();
this.state = this.returnState;
break;
}
}
if (!match) {
this.flushCodePointsConsumedAsCharacterReference();
this.state = State.AmbiguousAmpersand;
}
break;
}
case State.AmbiguousAmpersand: {
switch (this.consumeNext()) {
case '\u003B': this.parseError('unknown-named-character-reference'); this.reconsumeIn(this.returnState); break;
default: {
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
if (this.consumedAsPartOfAnAttribute()) {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.currentInputCharacter);
} else {
this.emit(CharacterToken.createWith(this.currentInputCharacter));
}
break;
}
this.reconsumeIn(this.returnState);
}
}
break;
}
default: TODO(`Unimplemented state '${this.state}'`);
}
}
private flushCodePointsConsumedAsCharacterReference(): void {
if (this.consumedAsPartOfAnAttribute()) {
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToValue(this.temporaryBuffer);
return;
}
for (const codePoint of this.temporaryBuffer)
this.emit(CharacterToken.createWith(codePoint));
}
private consumedAsPartOfAnAttribute(): boolean {
return this.returnState === State.AttributeValueDouble || this.returnState === State.AttributeValueSingle || this.returnState === State.AttributeValueUnquoted;
}
private asciiAlphanumeric(input: string): boolean {
return this.asciiAlpha(input) || this.asciiDigit(input);
}
private asciiAlpha(input: string): boolean {
return this.asciiUpperAlpha(input) || this.asciiLowerAlpha(input);
}
private asciiUpperAlpha(input: string): boolean {
return /[\u0041-\u005A]/.test(input);
}
private asciiLowerAlpha(input: string): boolean {
return /[\u0061-\u007A]/.test(input);
}
private asciiDigit(input: string): boolean {
return /[\u0030-\u0030]/.test(input);
}
private reconsumeIn(state: State): void {
this.pointer--;
this.currentPosition.decrement();
this.state = state;
this.spin();
}
private parseError(error: ParseError): void {
console.error('Parse error: ' + error);
}
private consumeNext(): string | undefined {
this.currentInputCharacter = this.input[this.pointer];
this.pointer++;
this.currentPosition.increment();
if (this.currentInputCharacter === '\n')
this.currentPosition.incrementLine();
return this.currentInputCharacter;
}
private next(): string | undefined {
return this.input[this.pointer];
}
private matchNextFew(input: string): boolean {
return this.input.substr(this.pointer, input.length) === input;
}
private matchNextFewCaseInsensitive(input: string): boolean {
return this.input.substr(this.pointer, input.length).toLowerCase() === input.toLowerCase();
}
private consumeNextFew(input: string): void {
for (let i = 0; i < input.length; i++) {
const consumed = this.consumeNext();
VERIFY(consumed === input[i], `Expected '${input[i]}' (${input} at ${i}), got ${consumed} instead`);
}
}
private consumeNextFewCaseInsensitive(input: string): void {
for (let i = 0; i < input.length; i++) {
const consumed = this.consumeNext()?.toLowerCase();
VERIFY(consumed === input[i].toLowerCase(), `Expected '${input[i].toLowerCase()}' (${input.toLowerCase()} at ${i}), got ${consumed} instead`);
}
}
private emit(token: Token): void {
this.populateRangeOnEmit(token);
this.tokens.push(token);
}
private emitCurrentOfType(type: Constructor<Token>): void {
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
this.populateRangeOnEmit(this.currentToken);
this.tokens.push(this.currentToken);
}
private emitCurrentOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): void {
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
this.populateRangeOnEmit(this.currentToken);
this.tokens.push(this.currentToken);
}
private currentOfType<T extends Token>(type: Constructor<T>): T {
VERIFY(this.currentToken instanceof type, `Expected '${type.name}', got '${this.currentToken.constructor.name}' instead`);
return this.currentToken;
}
private currentOfEitherType<T extends Token, U extends Token>(a: Constructor<T>, b: Constructor<U>): T | U {
VERIFY(this.currentToken instanceof a || this.currentToken instanceof b, `Expected '${a.name}' or '${b.name}', got '${this.currentToken.constructor.name}' instead`);
return this.currentToken;
}
private populateRangeOnEmit(token: Token): void {
if (token.range.start === undefined && token.range.end === undefined)
token.at(this.currentPosition);
if (token.range.start !== undefined && token.range.end === undefined)
token.endingAt(this.currentPosition);
if (token.range.start === undefined && token.range.end !== undefined)
VERIFY_NOT_REACHED();
}
private create(token: Token): Token {
if (token.range.start === undefined)
token.startingAt(this.currentPosition);
return this.currentToken = token;
}
}

View file

@ -0,0 +1,117 @@
import { VERIFY } from '../../util/assertions.js';
import { Inspectable } from '../highlighter/inspectable.js';
import { Position } from './position.js';
import { Range } from './range.js';
import { REPLACEMENT_CHARACTER } from './token.js';
export class Attribute extends Inspectable {
public name: string;
public value: string;
public nameRange!: Range;
public valueRange?: Range;
public equalsPosition?: Position;
public quoted: boolean;
public constructor(name: string, value: string) {
super();
this.name = name;
this.value = value;
this.quoted = true;
// @ts-expect-error
this.nameRange = {};
}
public setUnquoted(): void {
this.quoted = false;
}
public appendToName(characters: string): void {
this.name += characters;
}
public appendReplacementCharacterToName(): void {
this.appendToName(REPLACEMENT_CHARACTER);
}
public appendToValue(characters: string): void {
this.value += characters;
}
public appendReplacementCharacterToValue(): void {
this.appendToValue(REPLACEMENT_CHARACTER);
}
public startingNameAt(position: Position): this {
this.nameRange.start = position.copy();
return this;
}
public endingNameAt(position: Position): this {
this.nameRange.end = position.copy();
return this;
}
public equalsAt(position: Position): this {
this.equalsPosition = position.copy();
return this;
}
public startingValueAt(position: Position): this {
// @ts-expect-error
if (this.valueRange === undefined) this.valueRange = {};
VERIFY(this.valueRange !== undefined);
this.valueRange.start = position.copy();
return this;
}
public endingValueAt(position: Position): this {
// @ts-expect-error
if (this.valueRange === undefined) this.valueRange = {};
VERIFY(this.valueRange !== undefined);
this.valueRange.end = position.copy();
return this;
}
public static createWithEmptyNameAndValue(): Attribute {
return new Attribute('', '');
}
public static createWithEmptyValue(name: string): Attribute {
return new Attribute(name, '');
}
public override inspect(indent: number): string {
return `Attribute { name: '${this.name}', value: '${this.value}' }`;
}
}
export class AttributeList {
private attributes: Array<Attribute>;
public constructor() {
this.attributes = new Array<Attribute>();
}
public get current(): Attribute {
return this.attributes[this.attributes.length - 1];
}
public get list(): Array<Attribute> {
return this.attributes;
}
public nonEmpty(): boolean {
return this.list.length !== 0;
}
public append(attribute: Attribute): void {
this.attributes.push(attribute);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,60 @@
import { Inspectable } from '../highlighter/inspectable.js';
export class Position extends Inspectable {
#line: number;
#column: number;
#index: number;
private constructor(line: number, column: number, index: number) {
super();
this.#line = line;
this.#column = column;
this.#index = index;
}
public get line(): number {
return this.#line;
}
public get column(): number {
return this.#column;
}
public get index(): number {
return this.#index;
}
public increment(by: number = 1): this {
this.#index += by;
this.#column += by;
return this;
}
public decrement(by: number = 1): this {
this.#index -= by;
this.#column -= by;
return this;
}
public incrementLine(): this {
this.#line++;
this.#column = 0;
return this;
}
public copy(): Position {
return new Position(this.line, this.column, this.index);
}
public static createStarting(): Position {
return new Position(0, 0, -1);
}
public override inspect(indent: number): string {
return `Position { line: ${this.#line}, column: ${this.#column}, index: ${this.#index} }`;
}
}

View file

@ -0,0 +1,6 @@
import { Position } from './position.js';
export type Range = {
start: Position,
end: Position
};

View file

@ -79,4 +79,4 @@ export const enum State {
HexadecimalCharacterReference = 'Hexadecimal character reference',
DecimalCharacterReference = 'Decimal character reference',
NumericCharacterReferenceEnd = 'Numeric character reference end'
}
}

View file

@ -0,0 +1,55 @@
import { Inspectable } from '../highlighter/inspectable.js';
import { Position } from './position.js';
import { Range } from './range.js';
export const enum Type {
DOCTYPE = 'DOCTYPE',
StartTag = 'start tag',
EndTag = 'end tag',
Comment = 'comment',
Character = 'character',
EndOfFile = 'end-of-file'
}
export const REPLACEMENT_CHARACTER = '\uFFFD';
export abstract class Token extends Inspectable {
#type: Type;
#range!: Range;
protected constructor(type: Type) {
super();
this.#type = type;
// @ts-expect-error
this.#range = {};
}
public startingAt(position: Position): this {
this.#range.start = position.copy();
return this;
}
public endingAt(position: Position): this {
this.#range.end = position.copy();
return this;
}
public at(position: Position): this {
this.#range.start = position.copy();
this.#range.end = position.copy();
return this;
}
public get range(): Range {
return this.#range;
}
public get type(): Type {
return this.#type;
}
}

View file

@ -0,0 +1,23 @@
import { REPLACEMENT_CHARACTER, Token, Type } from '../token.js';
export class CharacterToken extends Token {
public readonly data: NonNullable<string>;
public constructor(data: NonNullable<string>) {
super(Type.Character);
this.data = data;
}
public static createWith(data: NonNullable<string>): CharacterToken {
return new CharacterToken(data);
}
public static createReplacementCharacter(): CharacterToken {
return new CharacterToken(REPLACEMENT_CHARACTER);
}
public override inspect(indent: number): string {
return `CharacterToken { '${this.data}' }`;
}
}

View file

@ -0,0 +1,31 @@
import { Token, Type, REPLACEMENT_CHARACTER } from '../token.js';
export class CommentToken extends Token {
public data: NonNullable<string>;
public constructor(data: NonNullable<string>) {
super(Type.Comment);
this.data = data;
}
public append(characters: string): void {
this.data += characters;
}
public appendReplacementCharacter(): void {
this.append(REPLACEMENT_CHARACTER);
}
public static createEmpty(): CommentToken {
return new CommentToken('');
}
public static createWith(data: string): CommentToken {
return new CommentToken(data);
}
public override inspect(indent: number): string {
return `CommentToken { '${this.data}' }`;
}
}

View file

@ -0,0 +1,40 @@
import { VERIFY } from '../../../util/assertions.js';
import { Token, Type, REPLACEMENT_CHARACTER } from '../token.js';
export class DOCTYPEToken extends Token {
public name?: string;
public publicIdentifier?: string;
public systemIdentifier?: string;
public forceQuirks?: true;
public constructor(name?: string, publicIdentifier?: string, systemIdentifier?: string, forceQuirks?: true) {
super(Type.DOCTYPE);
this.name = name;
this.publicIdentifier = publicIdentifier;
this.systemIdentifier = systemIdentifier;
this.forceQuirks = forceQuirks;
}
public appendToName(characters: string): void {
VERIFY(this.name !== undefined);
this.name += characters;
}
public appendReplacementCharacterToName(): void {
this.appendToName(REPLACEMENT_CHARACTER);
}
public static createWithForcedQuirks(): DOCTYPEToken {
return new DOCTYPEToken(undefined, undefined, undefined, true);
}
public static createWithName(name: string): DOCTYPEToken {
return new DOCTYPEToken(name, undefined, undefined, undefined);
}
public override inspect(indent: number): string {
return `DOCTYPEToken { '${this.name}' }`;
}
}

View file

@ -0,0 +1,15 @@
import { Token, Type } from '../token.js';
export class EndOfFileToken extends Token {
public constructor() {
super(Type.EndOfFile);
}
public static create(): EndOfFileToken {
return new EndOfFileToken();
}
public override inspect(indent: number): string {
return 'EndOfFileToken';
}
}

View file

@ -0,0 +1,30 @@
import { AttributeList } from '../attribute.js';
import { Token, Type, REPLACEMENT_CHARACTER } from '../token.js';
export class EndTagToken extends Token {
public name: NonNullable<string>;
public readonly attributes: AttributeList;
public constructor(name: NonNullable<string>, attributes: AttributeList) {
super(Type.EndTag);
this.name = name;
this.attributes = attributes;
}
public appendToName(characters: string): void {
this.name += characters;
}
public appendReplacementCharacterToName(): void {
this.appendToName(REPLACEMENT_CHARACTER);
}
public static createEmpty(): EndTagToken {
return new EndTagToken('', new AttributeList());
}
public override inspect(indent: number): string {
return `EndTagToken { '${this.name}' }`;
}
}

View file

@ -0,0 +1,30 @@
import { AttributeList } from '../attribute.js';
import { Token, Type, REPLACEMENT_CHARACTER } from '../token.js';
export class StartTagToken extends Token {
public name: NonNullable<string>;
public readonly attributes: AttributeList;
public constructor(name: NonNullable<string>, attributes: AttributeList) {
super(Type.StartTag);
this.name = name;
this.attributes = attributes;
}
public appendToName(characters: string): void {
this.name += characters;
}
public appendReplacementCharacterToName(): void {
this.appendToName(REPLACEMENT_CHARACTER);
}
public static createEmpty(): StartTagToken {
return new StartTagToken('', new AttributeList());
}
public override inspect(indent: number): string {
return `StartTagToken { '${this.name}' }`;
}
}

89
src/view.ts Normal file
View file

@ -0,0 +1,89 @@
import { Color } from './html/highlighter/properties/color.js';
import { Property } from './html/highlighter/property.js';
import { Span } from './html/highlighter/span.js';
import { Inspector } from './html/inspector.js';
const sameProperties = (a: Array<Property> | undefined, b: Array<Property> | undefined): boolean => {
if (a === undefined || b === undefined) return false;
if (a.length !== b.length) return false;
for (const property of a) {
let found = false;
for (const otherProperty of b)
if (property.equals(otherProperty))
found = true;
if (!found) return false;
}
return true;
};
const applyProperties = (element: HTMLSpanElement, properties: Array<Property>): void => {
for (const property of properties)
property.apply(element);
};
export function render(text: string, spans: Array<Span>, inspector: Inspector): void {
console.time('render');
const container = document.createElement('pre');
container.ariaHidden = 'true';
for (const child of document.body.children) {
(child as HTMLElement).style.display = 'none';
child.ariaHidden = 'false';
}
document.body.appendChild(container);
const defaultProperties: Array<Property> = [ Color.Plain ];
const defaultTagName: keyof HTMLElementTagNameMap = 'span';
let lastProperties: Array<Property> = defaultProperties;
let lastTagName: keyof HTMLElementTagNameMap = defaultTagName;
let lastElement: HTMLSpanElement = document.createElement(lastTagName);
applyProperties(lastElement, lastProperties);
container.appendChild(lastElement);
for (let characterIndex = 0; characterIndex < text.length; characterIndex++) {
const character = text[characterIndex];
let topMostProperties: Array<Property> = defaultProperties;
let topMostTagName: keyof HTMLElementTagNameMap = defaultTagName;
const matchingSpans = new Array<Span>();
for (const span of spans) {
if (span.contains(characterIndex)) {
matchingSpans.push(span);
topMostProperties = span.properties;
topMostTagName = span.tagName;
}
}
if (sameProperties(lastProperties, topMostProperties) && topMostTagName === lastTagName) {
lastElement.textContent += character;
inspector.instrument(lastElement, matchingSpans);
} else {
lastElement = document.createElement(topMostTagName);
lastElement.textContent = character;
inspector.instrument(lastElement, matchingSpans);
applyProperties(lastElement, topMostProperties);
lastProperties = topMostProperties;
lastTagName = topMostTagName;
container.appendChild(lastElement);
}
}
console.timeEnd('render');
}

View file

@ -4,7 +4,8 @@
"module": "esnext",
"target": "esnext",
"sourceMap": true,
"rootDir": ".",
"rootDir": "src",
"outDir": "public/script",
"strict": true,
"experimentalDecorators": true,
"emitDecoratorMetadata": true,

16
view.ts
View file

@ -1,16 +0,0 @@
import { Node } from "./html/highlighter/node";
export function render(nodes: Array<Node>): HTMLElement {
const p = document.createElement("pre");
for (const node of nodes) {
const span = document.createElement("span");
span.innerText = node.content;
span.style.color = node.color;
p.appendChild(span);
}
return p;
}