nwex.de/.ref/tokenizer/parser.class.ts

230 lines
No EOL
6.9 KiB
TypeScript

import { Position } from './position.class.js';
import { IdentifierState } from './state/identifierState.class.js';
import { NumberState } from './state/numberState.class.js';
import { StringState } from './state/stringState.class.js';
import { Token } from './token.interfance.js';
import { Type } from './type.enum.js';
export class Parser {
private string: StringState;
private identifier: IdentifierState;
private number: NumberState;
private position: Position;
public constructor(private source: string) {
this.string = StringState.none.duplicate();
this.identifier = IdentifierState.none.duplicate();
this.number = NumberState.none.duplicate();
this.position = Position.none.duplicate();
}
public tokenize(): Array<Token> {
const tokens: Array<Token> = new Array<Token>();
const commitIdentifier = () => {
if (!this.identifier.active) return;
tokens.push({
type: Type.Identifier,
value: this.identifier.value,
position: this.identifier.start.withLength(this.identifier.value.length)
});
this.identifier = IdentifierState.none;
};
const commitString = () => {
if (!this.string.active) return;
tokens.push({
type: Type.Literal,
value: this.string.value,
position: this.string.start.withLength(this.string.value.length + 2)
});
this.position.addColumn(this.string.value.length + 2);
this.string = StringState.none;
};
const commitNumber = () => {
if (!this.number.active) return;
tokens.push({
type: Type.Literal,
value: this.number.value,
position: this.number.start.withLength(this.number.value.length)
});
this.number = NumberState.none;
};
while (this.source.length > 0) {
if (this.string.active) {
if (this.matches(this.string.quote)) {
this.consume();
commitString();
} else {
this.string.value += this.consume();
}
} else if (this.matches('"', '\'', '`')) {
commitIdentifier();
commitNumber();
this.string = StringState.start(this.consume(), this.position);
} else if (this.number.active) {
if (this.matches('.')) {
if (this.number.decimal) {
commitNumber();
} else {
this.number.decimal = true;
this.number.value += this.consume();
this.position.addColumn(1);
}
} else if (this.matches('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) {
this.number.value += this.consume();
this.position.addColumn(1);
} else commitNumber();
} else if (this.matches('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) {
commitIdentifier();
this.number = NumberState.start(this.position);
this.number.value += this.consume();
this.position.addColumn(1);
} else if (this.peek(2).match(/-\d/)) {
commitIdentifier();
this.number = NumberState.start(this.position);
this.number.value += this.consume(2);
this.position.addColumn(2);
} else if (this.matches('.', ',', ':', ';', '+', '-', '*', '/', '=', '<', '>', '|')) {
commitIdentifier();
commitNumber();
tokens.push({
type: Type.Punctuation,
value: this.consume(),
position: this.position.withLength(1)
});
this.position.addColumn(1);
} else if (this.matches('(', ')', '[', ']', '{', '}')) {
commitIdentifier();
commitNumber();
tokens.push({
type: Type.Nesting,
value: this.consume(),
position: this.position.withLength(1)
});
this.position.addColumn(1);
} else if (this.matches(' ')) {
commitIdentifier();
commitNumber();
this.consume();
this.position.addColumn(1);
} else if (this.matches('\n')) {
commitIdentifier();
commitNumber();
this.consume();
this.position.addLine(1).setColumn(0);
} else {
if (!this.identifier.active) {
this.identifier = IdentifierState.start(this.position);
}
this.identifier.value += this.consume();
this.position.addColumn(1);
}
}
commitIdentifier();
commitString();
commitNumber();
return this.mark(tokens);
}
private consume(length: number = 1): string {
const consumed: string = this.peek(length);
this.source = this.source.substring(length);
return consumed;
}
private peek(length: number = 1): string {
return this.source.substring(0, length);
}
private matches(...values: Array<string>): boolean {
for (const value of values) {
if (this.source.startsWith(value)) return true;
}
return false;
}
private mark(tokens: Array<Token>): Array<Token> {
const marked: Array<Token> = new Array<Token>();
const keywords: Array<string> = [
'let',
'const',
'var',
'class',
'interface',
'enum',
'if',
'else',
'return',
'break',
'continue',
'try',
'catch',
'finally',
'for',
'while',
'do',
'of',
'in',
'as',
'new',
'private',
'public',
'readonly',
'static'
];
tokens.forEach(token => {
if (token.type === Type.Identifier && keywords.includes(token.value)) {
token.type = Type.Keyword;
}
if (token.type === Type.Identifier && token.value === 'true' || token.value === 'false') {
token.type = Type.Literal;
}
if (token.type === Type.Identifier && token.value === 'null' || token.value === 'undefined') {
token.type = Type.Literal;
}
marked.push(token);
});
console.log('Marked Tokens:', marked);
return marked;
}
}