Reference: Added tokenizer code from temporary project

This commit is contained in:
networkException 2021-04-04 16:53:23 +02:00
parent e08394bd16
commit f0ee842a1b
8 changed files with 395 additions and 0 deletions

59
.ref/tokenizer.ts Normal file
View file

@ -0,0 +1,59 @@
import { Parser } from './tokenizer/parser.class.js';
import { Token } from './tokenizer/token.interfance.js';
Array.from(document.getElementsByClassName('tokenized')).map(tokenized => tokenized as HTMLDivElement).forEach(tokenized => {
const run = () => {
const source: string = tokenized.innerText;
console.time('tokenized');
const tokens: Array<Token> = new Parser(source).tokenize();
console.timeEnd('tokenized');
tokenized.innerHTML = '';
const lines: Array<string> = source.split('\n');
for (let line = 0; line < lines.length; line++) {
const lineValue: string = lines[line];
for (let column = 0; column < lineValue.length; column++) {
let found: boolean = false;
for (const token of tokens) {
if (token.position.starts(line, column)) {
found = true;
const element: HTMLSpanElement = document.createElement('span');
element.style.color = token.type;
element.innerText = lineValue.substring(token.position.column, token.position.length + token.position.column);
tokenized.appendChild(element);
break;
}
if (token.position.intersects(line, column)) {
found = true;
break;
}
}
if (found) continue;
if (column >= lineValue.length) break;
tokenized.innerHTML += lineValue[column];
}
const element: HTMLSpanElement = document.createElement('span');
element.innerText = '\n';
tokenized.appendChild(element);
}
tokenized.lastChild.remove();
};
tokenized.addEventListener('focusout', run);
run();
});

View file

@ -0,0 +1,230 @@
import { Position } from './position.class.js';
import { IdentifierState } from './state/identifierState.class.js';
import { NumberState } from './state/numberState.class.js';
import { StringState } from './state/stringState.class.js';
import { Token } from './token.interfance.js';
import { Type } from './type.enum.js';
export class Parser {
private string: StringState;
private identifier: IdentifierState;
private number: NumberState;
private position: Position;
public constructor(private source: string) {
this.string = StringState.none.duplicate();
this.identifier = IdentifierState.none.duplicate();
this.number = NumberState.none.duplicate();
this.position = Position.none.duplicate();
}
public tokenize(): Array<Token> {
const tokens: Array<Token> = new Array<Token>();
const commitIdentifier = () => {
if (!this.identifier.active) return;
tokens.push({
type: Type.Identifier,
value: this.identifier.value,
position: this.identifier.start.withLength(this.identifier.value.length)
});
this.identifier = IdentifierState.none;
};
const commitString = () => {
if (!this.string.active) return;
tokens.push({
type: Type.Literal,
value: this.string.value,
position: this.string.start.withLength(this.string.value.length + 2)
});
this.position.addColumn(this.string.value.length + 2);
this.string = StringState.none;
};
const commitNumber = () => {
if (!this.number.active) return;
tokens.push({
type: Type.Literal,
value: this.number.value,
position: this.number.start.withLength(this.number.value.length)
});
this.number = NumberState.none;
};
while (this.source.length > 0) {
if (this.string.active) {
if (this.matches(this.string.quote)) {
this.consume();
commitString();
} else {
this.string.value += this.consume();
}
} else if (this.matches('"', '\'', '`')) {
commitIdentifier();
commitNumber();
this.string = StringState.start(this.consume(), this.position);
} else if (this.number.active) {
if (this.matches('.')) {
if (this.number.decimal) {
commitNumber();
} else {
this.number.decimal = true;
this.number.value += this.consume();
this.position.addColumn(1);
}
} else if (this.matches('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) {
this.number.value += this.consume();
this.position.addColumn(1);
} else commitNumber();
} else if (this.matches('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) {
commitIdentifier();
this.number = NumberState.start(this.position);
this.number.value += this.consume();
this.position.addColumn(1);
} else if (this.peek(2).match(/-\d/)) {
commitIdentifier();
this.number = NumberState.start(this.position);
this.number.value += this.consume(2);
this.position.addColumn(2);
} else if (this.matches('.', ',', ':', ';', '+', '-', '*', '/', '=', '<', '>', '|')) {
commitIdentifier();
commitNumber();
tokens.push({
type: Type.Punctuation,
value: this.consume(),
position: this.position.withLength(1)
});
this.position.addColumn(1);
} else if (this.matches('(', ')', '[', ']', '{', '}')) {
commitIdentifier();
commitNumber();
tokens.push({
type: Type.Nesting,
value: this.consume(),
position: this.position.withLength(1)
});
this.position.addColumn(1);
} else if (this.matches(' ')) {
commitIdentifier();
commitNumber();
this.consume();
this.position.addColumn(1);
} else if (this.matches('\n')) {
commitIdentifier();
commitNumber();
this.consume();
this.position.addLine(1).setColumn(0);
} else {
if (!this.identifier.active) {
this.identifier = IdentifierState.start(this.position);
}
this.identifier.value += this.consume();
this.position.addColumn(1);
}
}
commitIdentifier();
commitString();
commitNumber();
return this.mark(tokens);
}
private consume(length: number = 1): string {
const consumed: string = this.peek(length);
this.source = this.source.substring(length);
return consumed;
}
private peek(length: number = 1): string {
return this.source.substring(0, length);
}
private matches(...values: Array<string>): boolean {
for (const value of values) {
if (this.source.startsWith(value)) return true;
}
return false;
}
private mark(tokens: Array<Token>): Array<Token> {
const marked: Array<Token> = new Array<Token>();
const keywords: Array<string> = [
'let',
'const',
'var',
'class',
'interface',
'enum',
'if',
'else',
'return',
'break',
'continue',
'try',
'catch',
'finally',
'for',
'while',
'do',
'of',
'in',
'as',
'new',
'private',
'public',
'readonly',
'static'
];
tokens.forEach(token => {
if (token.type === Type.Identifier && keywords.includes(token.value)) {
token.type = Type.Keyword;
}
if (token.type === Type.Identifier && token.value === 'true' || token.value === 'false') {
token.type = Type.Literal;
}
if (token.type === Type.Identifier && token.value === 'null' || token.value === 'undefined') {
token.type = Type.Literal;
}
marked.push(token);
});
console.log('Marked Tokens:', marked);
return marked;
}
}

View file

@ -0,0 +1,44 @@
export class Position {
public constructor(public line: number, public column: number, public length: number) {
}
public starts(line: number, column: number): boolean {
return line === this.line && column === this.column;
}
public intersects(line: number, column: number): boolean {
return line === this.line && column > this.column && column < this.column + this.length;
}
public duplicate(): Position {
return new Position(this.line, this.column, this.length);
}
public withLength(length: number): Position {
const duplicate: Position = this.duplicate();
duplicate.length = length;
return duplicate;
}
public setColumn(column: number): Position {
this.column = column;
return this;
}
public addColumn(column: number): Position {
this.column += column;
return this;
}
public addLine(line: number): Position {
this.line += line;
return this;
}
public static none: Position = new Position(0, 0, 0);
}

View file

@ -0,0 +1,16 @@
import { Position } from '../position.class.js';
export class IdentifierState {
private constructor(public value: string, public active: boolean, public start: Position) {
}
public duplicate(): IdentifierState {
return new IdentifierState(this.value, this.active, this.start.duplicate());
}
public static start(position: Position): IdentifierState {
return new IdentifierState('', true, position.duplicate());
}
public static none: IdentifierState = new IdentifierState(null, false, Position.none.duplicate());
}

View file

@ -0,0 +1,16 @@
import { Position } from '../position.class.js';
export class NumberState {
private constructor(public value: string, public decimal: boolean, public active: boolean, public start: Position) {
}
public duplicate(): NumberState {
return new NumberState(this.value, this.decimal, this.active, this.start.duplicate());
}
public static start(position: Position): NumberState {
return new NumberState('', false, true, position.duplicate());
}
public static none: NumberState = new NumberState(null, false, false, Position.none.duplicate());
}

View file

@ -0,0 +1,16 @@
import { Position } from '../position.class.js';
export class StringState {
private constructor(public value: string, public quote: string, public active: boolean, public start: Position) {
}
public duplicate(): StringState {
return new StringState(this.value, this.quote, this.active, this.start.duplicate());
}
public static start(quote: string, position: Position): StringState {
return new StringState('', quote, true, position.duplicate());
}
public static none: StringState = new StringState(null, null, false, Position.none.duplicate());
}

View file

@ -0,0 +1,7 @@
import { Position } from './position.class.js';
export interface Token {
type: string;
value: string;
position: Position;
}

View file

@ -0,0 +1,7 @@
export enum Type {
Identifier = '#606872',
Keyword = '#494f56',
Punctuation = '#72777c',
Nesting = '#78818d',
Literal = '#8f969e'
}