Reference: Added tokenizer code from temporary project
This commit is contained in:
parent
e08394bd16
commit
f0ee842a1b
8 changed files with 395 additions and 0 deletions
59
.ref/tokenizer.ts
Normal file
59
.ref/tokenizer.ts
Normal file
|
@ -0,0 +1,59 @@
|
|||
import { Parser } from './tokenizer/parser.class.js';
|
||||
import { Token } from './tokenizer/token.interfance.js';
|
||||
|
||||
Array.from(document.getElementsByClassName('tokenized')).map(tokenized => tokenized as HTMLDivElement).forEach(tokenized => {
|
||||
const run = () => {
|
||||
const source: string = tokenized.innerText;
|
||||
|
||||
console.time('tokenized');
|
||||
const tokens: Array<Token> = new Parser(source).tokenize();
|
||||
console.timeEnd('tokenized');
|
||||
|
||||
tokenized.innerHTML = '';
|
||||
|
||||
const lines: Array<string> = source.split('\n');
|
||||
|
||||
for (let line = 0; line < lines.length; line++) {
|
||||
const lineValue: string = lines[line];
|
||||
|
||||
for (let column = 0; column < lineValue.length; column++) {
|
||||
let found: boolean = false;
|
||||
|
||||
for (const token of tokens) {
|
||||
if (token.position.starts(line, column)) {
|
||||
found = true;
|
||||
|
||||
const element: HTMLSpanElement = document.createElement('span');
|
||||
|
||||
element.style.color = token.type;
|
||||
element.innerText = lineValue.substring(token.position.column, token.position.length + token.position.column);
|
||||
|
||||
tokenized.appendChild(element);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (token.position.intersects(line, column)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) continue;
|
||||
if (column >= lineValue.length) break;
|
||||
|
||||
tokenized.innerHTML += lineValue[column];
|
||||
}
|
||||
|
||||
const element: HTMLSpanElement = document.createElement('span');
|
||||
element.innerText = '\n';
|
||||
tokenized.appendChild(element);
|
||||
}
|
||||
|
||||
tokenized.lastChild.remove();
|
||||
};
|
||||
|
||||
tokenized.addEventListener('focusout', run);
|
||||
|
||||
run();
|
||||
});
|
230
.ref/tokenizer/parser.class.ts
Normal file
230
.ref/tokenizer/parser.class.ts
Normal file
|
@ -0,0 +1,230 @@
|
|||
import { Position } from './position.class.js';
|
||||
import { IdentifierState } from './state/identifierState.class.js';
|
||||
import { NumberState } from './state/numberState.class.js';
|
||||
import { StringState } from './state/stringState.class.js';
|
||||
import { Token } from './token.interfance.js';
|
||||
import { Type } from './type.enum.js';
|
||||
|
||||
export class Parser {
|
||||
private string: StringState;
|
||||
private identifier: IdentifierState;
|
||||
private number: NumberState;
|
||||
private position: Position;
|
||||
|
||||
public constructor(private source: string) {
|
||||
this.string = StringState.none.duplicate();
|
||||
this.identifier = IdentifierState.none.duplicate();
|
||||
this.number = NumberState.none.duplicate();
|
||||
this.position = Position.none.duplicate();
|
||||
}
|
||||
|
||||
public tokenize(): Array<Token> {
|
||||
const tokens: Array<Token> = new Array<Token>();
|
||||
|
||||
const commitIdentifier = () => {
|
||||
if (!this.identifier.active) return;
|
||||
|
||||
tokens.push({
|
||||
type: Type.Identifier,
|
||||
value: this.identifier.value,
|
||||
position: this.identifier.start.withLength(this.identifier.value.length)
|
||||
});
|
||||
|
||||
this.identifier = IdentifierState.none;
|
||||
};
|
||||
|
||||
const commitString = () => {
|
||||
if (!this.string.active) return;
|
||||
|
||||
tokens.push({
|
||||
type: Type.Literal,
|
||||
value: this.string.value,
|
||||
position: this.string.start.withLength(this.string.value.length + 2)
|
||||
});
|
||||
|
||||
this.position.addColumn(this.string.value.length + 2);
|
||||
|
||||
this.string = StringState.none;
|
||||
};
|
||||
|
||||
const commitNumber = () => {
|
||||
if (!this.number.active) return;
|
||||
|
||||
tokens.push({
|
||||
type: Type.Literal,
|
||||
value: this.number.value,
|
||||
position: this.number.start.withLength(this.number.value.length)
|
||||
});
|
||||
|
||||
this.number = NumberState.none;
|
||||
};
|
||||
|
||||
while (this.source.length > 0) {
|
||||
if (this.string.active) {
|
||||
if (this.matches(this.string.quote)) {
|
||||
this.consume();
|
||||
|
||||
commitString();
|
||||
} else {
|
||||
this.string.value += this.consume();
|
||||
}
|
||||
} else if (this.matches('"', '\'', '`')) {
|
||||
commitIdentifier();
|
||||
commitNumber();
|
||||
|
||||
this.string = StringState.start(this.consume(), this.position);
|
||||
} else if (this.number.active) {
|
||||
if (this.matches('.')) {
|
||||
if (this.number.decimal) {
|
||||
commitNumber();
|
||||
} else {
|
||||
this.number.decimal = true;
|
||||
this.number.value += this.consume();
|
||||
this.position.addColumn(1);
|
||||
}
|
||||
} else if (this.matches('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) {
|
||||
this.number.value += this.consume();
|
||||
this.position.addColumn(1);
|
||||
} else commitNumber();
|
||||
} else if (this.matches('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) {
|
||||
commitIdentifier();
|
||||
|
||||
this.number = NumberState.start(this.position);
|
||||
this.number.value += this.consume();
|
||||
this.position.addColumn(1);
|
||||
} else if (this.peek(2).match(/-\d/)) {
|
||||
commitIdentifier();
|
||||
|
||||
this.number = NumberState.start(this.position);
|
||||
this.number.value += this.consume(2);
|
||||
this.position.addColumn(2);
|
||||
} else if (this.matches('.', ',', ':', ';', '+', '-', '*', '/', '=', '<', '>', '|')) {
|
||||
commitIdentifier();
|
||||
commitNumber();
|
||||
|
||||
tokens.push({
|
||||
type: Type.Punctuation,
|
||||
value: this.consume(),
|
||||
position: this.position.withLength(1)
|
||||
});
|
||||
|
||||
this.position.addColumn(1);
|
||||
} else if (this.matches('(', ')', '[', ']', '{', '}')) {
|
||||
commitIdentifier();
|
||||
commitNumber();
|
||||
|
||||
tokens.push({
|
||||
type: Type.Nesting,
|
||||
value: this.consume(),
|
||||
position: this.position.withLength(1)
|
||||
});
|
||||
|
||||
this.position.addColumn(1);
|
||||
} else if (this.matches(' ')) {
|
||||
commitIdentifier();
|
||||
commitNumber();
|
||||
|
||||
this.consume();
|
||||
this.position.addColumn(1);
|
||||
} else if (this.matches('\n')) {
|
||||
commitIdentifier();
|
||||
commitNumber();
|
||||
|
||||
this.consume();
|
||||
this.position.addLine(1).setColumn(0);
|
||||
} else {
|
||||
if (!this.identifier.active) {
|
||||
this.identifier = IdentifierState.start(this.position);
|
||||
}
|
||||
|
||||
this.identifier.value += this.consume();
|
||||
this.position.addColumn(1);
|
||||
}
|
||||
}
|
||||
|
||||
commitIdentifier();
|
||||
commitString();
|
||||
commitNumber();
|
||||
|
||||
return this.mark(tokens);
|
||||
}
|
||||
|
||||
private consume(length: number = 1): string {
|
||||
const consumed: string = this.peek(length);
|
||||
|
||||
this.source = this.source.substring(length);
|
||||
|
||||
return consumed;
|
||||
}
|
||||
|
||||
private peek(length: number = 1): string {
|
||||
return this.source.substring(0, length);
|
||||
}
|
||||
|
||||
private matches(...values: Array<string>): boolean {
|
||||
for (const value of values) {
|
||||
if (this.source.startsWith(value)) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private mark(tokens: Array<Token>): Array<Token> {
|
||||
const marked: Array<Token> = new Array<Token>();
|
||||
|
||||
const keywords: Array<string> = [
|
||||
'let',
|
||||
'const',
|
||||
'var',
|
||||
|
||||
'class',
|
||||
'interface',
|
||||
'enum',
|
||||
|
||||
'if',
|
||||
'else',
|
||||
|
||||
'return',
|
||||
'break',
|
||||
'continue',
|
||||
|
||||
'try',
|
||||
'catch',
|
||||
'finally',
|
||||
|
||||
'for',
|
||||
'while',
|
||||
'do',
|
||||
|
||||
'of',
|
||||
'in',
|
||||
'as',
|
||||
'new',
|
||||
|
||||
'private',
|
||||
'public',
|
||||
'readonly',
|
||||
'static'
|
||||
];
|
||||
|
||||
tokens.forEach(token => {
|
||||
if (token.type === Type.Identifier && keywords.includes(token.value)) {
|
||||
token.type = Type.Keyword;
|
||||
}
|
||||
|
||||
if (token.type === Type.Identifier && token.value === 'true' || token.value === 'false') {
|
||||
token.type = Type.Literal;
|
||||
}
|
||||
|
||||
if (token.type === Type.Identifier && token.value === 'null' || token.value === 'undefined') {
|
||||
token.type = Type.Literal;
|
||||
}
|
||||
|
||||
marked.push(token);
|
||||
});
|
||||
|
||||
console.log('Marked Tokens:', marked);
|
||||
|
||||
return marked;
|
||||
}
|
||||
}
|
44
.ref/tokenizer/position.class.ts
Normal file
44
.ref/tokenizer/position.class.ts
Normal file
|
@ -0,0 +1,44 @@
|
|||
export class Position {
|
||||
public constructor(public line: number, public column: number, public length: number) {
|
||||
}
|
||||
|
||||
public starts(line: number, column: number): boolean {
|
||||
return line === this.line && column === this.column;
|
||||
}
|
||||
|
||||
public intersects(line: number, column: number): boolean {
|
||||
return line === this.line && column > this.column && column < this.column + this.length;
|
||||
}
|
||||
|
||||
public duplicate(): Position {
|
||||
return new Position(this.line, this.column, this.length);
|
||||
}
|
||||
|
||||
public withLength(length: number): Position {
|
||||
const duplicate: Position = this.duplicate();
|
||||
|
||||
duplicate.length = length;
|
||||
|
||||
return duplicate;
|
||||
}
|
||||
|
||||
public setColumn(column: number): Position {
|
||||
this.column = column;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public addColumn(column: number): Position {
|
||||
this.column += column;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public addLine(line: number): Position {
|
||||
this.line += line;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public static none: Position = new Position(0, 0, 0);
|
||||
}
|
16
.ref/tokenizer/state/identifierState.class.ts
Normal file
16
.ref/tokenizer/state/identifierState.class.ts
Normal file
|
@ -0,0 +1,16 @@
|
|||
import { Position } from '../position.class.js';
|
||||
|
||||
export class IdentifierState {
|
||||
private constructor(public value: string, public active: boolean, public start: Position) {
|
||||
}
|
||||
|
||||
public duplicate(): IdentifierState {
|
||||
return new IdentifierState(this.value, this.active, this.start.duplicate());
|
||||
}
|
||||
|
||||
public static start(position: Position): IdentifierState {
|
||||
return new IdentifierState('', true, position.duplicate());
|
||||
}
|
||||
|
||||
public static none: IdentifierState = new IdentifierState(null, false, Position.none.duplicate());
|
||||
}
|
16
.ref/tokenizer/state/numberState.class.ts
Normal file
16
.ref/tokenizer/state/numberState.class.ts
Normal file
|
@ -0,0 +1,16 @@
|
|||
import { Position } from '../position.class.js';
|
||||
|
||||
export class NumberState {
|
||||
private constructor(public value: string, public decimal: boolean, public active: boolean, public start: Position) {
|
||||
}
|
||||
|
||||
public duplicate(): NumberState {
|
||||
return new NumberState(this.value, this.decimal, this.active, this.start.duplicate());
|
||||
}
|
||||
|
||||
public static start(position: Position): NumberState {
|
||||
return new NumberState('', false, true, position.duplicate());
|
||||
}
|
||||
|
||||
public static none: NumberState = new NumberState(null, false, false, Position.none.duplicate());
|
||||
}
|
16
.ref/tokenizer/state/stringState.class.ts
Normal file
16
.ref/tokenizer/state/stringState.class.ts
Normal file
|
@ -0,0 +1,16 @@
|
|||
import { Position } from '../position.class.js';
|
||||
|
||||
export class StringState {
|
||||
private constructor(public value: string, public quote: string, public active: boolean, public start: Position) {
|
||||
}
|
||||
|
||||
public duplicate(): StringState {
|
||||
return new StringState(this.value, this.quote, this.active, this.start.duplicate());
|
||||
}
|
||||
|
||||
public static start(quote: string, position: Position): StringState {
|
||||
return new StringState('', quote, true, position.duplicate());
|
||||
}
|
||||
|
||||
public static none: StringState = new StringState(null, null, false, Position.none.duplicate());
|
||||
}
|
7
.ref/tokenizer/token.interfance.ts
Normal file
7
.ref/tokenizer/token.interfance.ts
Normal file
|
@ -0,0 +1,7 @@
|
|||
import { Position } from './position.class.js';
|
||||
|
||||
export interface Token {
|
||||
type: string;
|
||||
value: string;
|
||||
position: Position;
|
||||
}
|
7
.ref/tokenizer/type.enum.ts
Normal file
7
.ref/tokenizer/type.enum.ts
Normal file
|
@ -0,0 +1,7 @@
|
|||
export enum Type {
|
||||
Identifier = '#606872',
|
||||
Keyword = '#494f56',
|
||||
Punctuation = '#72777c',
|
||||
Nesting = '#78818d',
|
||||
Literal = '#8f969e'
|
||||
}
|
Loading…
Reference in a new issue