From f0ee842a1bf3c3b015bcc839f2796d5e8ed4a2ad Mon Sep 17 00:00:00 2001 From: networkException Date: Sun, 4 Apr 2021 16:53:23 +0200 Subject: [PATCH] Reference: Added tokenizer code from temporary project --- .ref/tokenizer.ts | 59 +++++ .ref/tokenizer/parser.class.ts | 230 ++++++++++++++++++ .ref/tokenizer/position.class.ts | 44 ++++ .ref/tokenizer/state/identifierState.class.ts | 16 ++ .ref/tokenizer/state/numberState.class.ts | 16 ++ .ref/tokenizer/state/stringState.class.ts | 16 ++ .ref/tokenizer/token.interfance.ts | 7 + .ref/tokenizer/type.enum.ts | 7 + 8 files changed, 395 insertions(+) create mode 100644 .ref/tokenizer.ts create mode 100644 .ref/tokenizer/parser.class.ts create mode 100644 .ref/tokenizer/position.class.ts create mode 100644 .ref/tokenizer/state/identifierState.class.ts create mode 100644 .ref/tokenizer/state/numberState.class.ts create mode 100644 .ref/tokenizer/state/stringState.class.ts create mode 100644 .ref/tokenizer/token.interfance.ts create mode 100644 .ref/tokenizer/type.enum.ts diff --git a/.ref/tokenizer.ts b/.ref/tokenizer.ts new file mode 100644 index 0000000..9138deb --- /dev/null +++ b/.ref/tokenizer.ts @@ -0,0 +1,59 @@ +import { Parser } from './tokenizer/parser.class.js'; +import { Token } from './tokenizer/token.interfance.js'; + +Array.from(document.getElementsByClassName('tokenized')).map(tokenized => tokenized as HTMLDivElement).forEach(tokenized => { + const run = () => { + const source: string = tokenized.innerText; + + console.time('tokenized'); + const tokens: Array = new Parser(source).tokenize(); + console.timeEnd('tokenized'); + + tokenized.innerHTML = ''; + + const lines: Array = source.split('\n'); + + for (let line = 0; line < lines.length; line++) { + const lineValue: string = lines[line]; + + for (let column = 0; column < lineValue.length; column++) { + let found: boolean = false; + + for (const token of tokens) { + if (token.position.starts(line, column)) { + found = true; + + const element: HTMLSpanElement = document.createElement('span'); + + element.style.color = token.type; + element.innerText = lineValue.substring(token.position.column, token.position.length + token.position.column); + + tokenized.appendChild(element); + + break; + } + + if (token.position.intersects(line, column)) { + found = true; + break; + } + } + + if (found) continue; + if (column >= lineValue.length) break; + + tokenized.innerHTML += lineValue[column]; + } + + const element: HTMLSpanElement = document.createElement('span'); + element.innerText = '\n'; + tokenized.appendChild(element); + } + + tokenized.lastChild.remove(); + }; + + tokenized.addEventListener('focusout', run); + + run(); +}); \ No newline at end of file diff --git a/.ref/tokenizer/parser.class.ts b/.ref/tokenizer/parser.class.ts new file mode 100644 index 0000000..bc92cd2 --- /dev/null +++ b/.ref/tokenizer/parser.class.ts @@ -0,0 +1,230 @@ +import { Position } from './position.class.js'; +import { IdentifierState } from './state/identifierState.class.js'; +import { NumberState } from './state/numberState.class.js'; +import { StringState } from './state/stringState.class.js'; +import { Token } from './token.interfance.js'; +import { Type } from './type.enum.js'; + +export class Parser { + private string: StringState; + private identifier: IdentifierState; + private number: NumberState; + private position: Position; + + public constructor(private source: string) { + this.string = StringState.none.duplicate(); + this.identifier = IdentifierState.none.duplicate(); + this.number = NumberState.none.duplicate(); + this.position = Position.none.duplicate(); + } + + public tokenize(): Array { + const tokens: Array = new Array(); + + const commitIdentifier = () => { + if (!this.identifier.active) return; + + tokens.push({ + type: Type.Identifier, + value: this.identifier.value, + position: this.identifier.start.withLength(this.identifier.value.length) + }); + + this.identifier = IdentifierState.none; + }; + + const commitString = () => { + if (!this.string.active) return; + + tokens.push({ + type: Type.Literal, + value: this.string.value, + position: this.string.start.withLength(this.string.value.length + 2) + }); + + this.position.addColumn(this.string.value.length + 2); + + this.string = StringState.none; + }; + + const commitNumber = () => { + if (!this.number.active) return; + + tokens.push({ + type: Type.Literal, + value: this.number.value, + position: this.number.start.withLength(this.number.value.length) + }); + + this.number = NumberState.none; + }; + + while (this.source.length > 0) { + if (this.string.active) { + if (this.matches(this.string.quote)) { + this.consume(); + + commitString(); + } else { + this.string.value += this.consume(); + } + } else if (this.matches('"', '\'', '`')) { + commitIdentifier(); + commitNumber(); + + this.string = StringState.start(this.consume(), this.position); + } else if (this.number.active) { + if (this.matches('.')) { + if (this.number.decimal) { + commitNumber(); + } else { + this.number.decimal = true; + this.number.value += this.consume(); + this.position.addColumn(1); + } + } else if (this.matches('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) { + this.number.value += this.consume(); + this.position.addColumn(1); + } else commitNumber(); + } else if (this.matches('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) { + commitIdentifier(); + + this.number = NumberState.start(this.position); + this.number.value += this.consume(); + this.position.addColumn(1); + } else if (this.peek(2).match(/-\d/)) { + commitIdentifier(); + + this.number = NumberState.start(this.position); + this.number.value += this.consume(2); + this.position.addColumn(2); + } else if (this.matches('.', ',', ':', ';', '+', '-', '*', '/', '=', '<', '>', '|')) { + commitIdentifier(); + commitNumber(); + + tokens.push({ + type: Type.Punctuation, + value: this.consume(), + position: this.position.withLength(1) + }); + + this.position.addColumn(1); + } else if (this.matches('(', ')', '[', ']', '{', '}')) { + commitIdentifier(); + commitNumber(); + + tokens.push({ + type: Type.Nesting, + value: this.consume(), + position: this.position.withLength(1) + }); + + this.position.addColumn(1); + } else if (this.matches(' ')) { + commitIdentifier(); + commitNumber(); + + this.consume(); + this.position.addColumn(1); + } else if (this.matches('\n')) { + commitIdentifier(); + commitNumber(); + + this.consume(); + this.position.addLine(1).setColumn(0); + } else { + if (!this.identifier.active) { + this.identifier = IdentifierState.start(this.position); + } + + this.identifier.value += this.consume(); + this.position.addColumn(1); + } + } + + commitIdentifier(); + commitString(); + commitNumber(); + + return this.mark(tokens); + } + + private consume(length: number = 1): string { + const consumed: string = this.peek(length); + + this.source = this.source.substring(length); + + return consumed; + } + + private peek(length: number = 1): string { + return this.source.substring(0, length); + } + + private matches(...values: Array): boolean { + for (const value of values) { + if (this.source.startsWith(value)) return true; + } + + return false; + } + + private mark(tokens: Array): Array { + const marked: Array = new Array(); + + const keywords: Array = [ + 'let', + 'const', + 'var', + + 'class', + 'interface', + 'enum', + + 'if', + 'else', + + 'return', + 'break', + 'continue', + + 'try', + 'catch', + 'finally', + + 'for', + 'while', + 'do', + + 'of', + 'in', + 'as', + 'new', + + 'private', + 'public', + 'readonly', + 'static' + ]; + + tokens.forEach(token => { + if (token.type === Type.Identifier && keywords.includes(token.value)) { + token.type = Type.Keyword; + } + + if (token.type === Type.Identifier && token.value === 'true' || token.value === 'false') { + token.type = Type.Literal; + } + + if (token.type === Type.Identifier && token.value === 'null' || token.value === 'undefined') { + token.type = Type.Literal; + } + + marked.push(token); + }); + + console.log('Marked Tokens:', marked); + + return marked; + } +} \ No newline at end of file diff --git a/.ref/tokenizer/position.class.ts b/.ref/tokenizer/position.class.ts new file mode 100644 index 0000000..c73b77c --- /dev/null +++ b/.ref/tokenizer/position.class.ts @@ -0,0 +1,44 @@ +export class Position { + public constructor(public line: number, public column: number, public length: number) { + } + + public starts(line: number, column: number): boolean { + return line === this.line && column === this.column; + } + + public intersects(line: number, column: number): boolean { + return line === this.line && column > this.column && column < this.column + this.length; + } + + public duplicate(): Position { + return new Position(this.line, this.column, this.length); + } + + public withLength(length: number): Position { + const duplicate: Position = this.duplicate(); + + duplicate.length = length; + + return duplicate; + } + + public setColumn(column: number): Position { + this.column = column; + + return this; + } + + public addColumn(column: number): Position { + this.column += column; + + return this; + } + + public addLine(line: number): Position { + this.line += line; + + return this; + } + + public static none: Position = new Position(0, 0, 0); +} \ No newline at end of file diff --git a/.ref/tokenizer/state/identifierState.class.ts b/.ref/tokenizer/state/identifierState.class.ts new file mode 100644 index 0000000..2580a1e --- /dev/null +++ b/.ref/tokenizer/state/identifierState.class.ts @@ -0,0 +1,16 @@ +import { Position } from '../position.class.js'; + +export class IdentifierState { + private constructor(public value: string, public active: boolean, public start: Position) { + } + + public duplicate(): IdentifierState { + return new IdentifierState(this.value, this.active, this.start.duplicate()); + } + + public static start(position: Position): IdentifierState { + return new IdentifierState('', true, position.duplicate()); + } + + public static none: IdentifierState = new IdentifierState(null, false, Position.none.duplicate()); +} \ No newline at end of file diff --git a/.ref/tokenizer/state/numberState.class.ts b/.ref/tokenizer/state/numberState.class.ts new file mode 100644 index 0000000..f9f8734 --- /dev/null +++ b/.ref/tokenizer/state/numberState.class.ts @@ -0,0 +1,16 @@ +import { Position } from '../position.class.js'; + +export class NumberState { + private constructor(public value: string, public decimal: boolean, public active: boolean, public start: Position) { + } + + public duplicate(): NumberState { + return new NumberState(this.value, this.decimal, this.active, this.start.duplicate()); + } + + public static start(position: Position): NumberState { + return new NumberState('', false, true, position.duplicate()); + } + + public static none: NumberState = new NumberState(null, false, false, Position.none.duplicate()); +} \ No newline at end of file diff --git a/.ref/tokenizer/state/stringState.class.ts b/.ref/tokenizer/state/stringState.class.ts new file mode 100644 index 0000000..31e6e46 --- /dev/null +++ b/.ref/tokenizer/state/stringState.class.ts @@ -0,0 +1,16 @@ +import { Position } from '../position.class.js'; + +export class StringState { + private constructor(public value: string, public quote: string, public active: boolean, public start: Position) { + } + + public duplicate(): StringState { + return new StringState(this.value, this.quote, this.active, this.start.duplicate()); + } + + public static start(quote: string, position: Position): StringState { + return new StringState('', quote, true, position.duplicate()); + } + + public static none: StringState = new StringState(null, null, false, Position.none.duplicate()); +} \ No newline at end of file diff --git a/.ref/tokenizer/token.interfance.ts b/.ref/tokenizer/token.interfance.ts new file mode 100644 index 0000000..b7cb1e7 --- /dev/null +++ b/.ref/tokenizer/token.interfance.ts @@ -0,0 +1,7 @@ +import { Position } from './position.class.js'; + +export interface Token { + type: string; + value: string; + position: Position; +} \ No newline at end of file diff --git a/.ref/tokenizer/type.enum.ts b/.ref/tokenizer/type.enum.ts new file mode 100644 index 0000000..4b1c0f7 --- /dev/null +++ b/.ref/tokenizer/type.enum.ts @@ -0,0 +1,7 @@ +export enum Type { + Identifier = '#606872', + Keyword = '#494f56', + Punctuation = '#72777c', + Nesting = '#78818d', + Literal = '#8f969e' +} \ No newline at end of file