37 lines
1.2 KiB
TypeScript
37 lines
1.2 KiB
TypeScript
import { Highlighter } from './html/highlighter.js';
|
|
import { Span } from './html/highlighter/span.js';
|
|
import { Tokenizer } from './html/tokenizer.js';
|
|
import { Token, Type } from './html/tokenizer/token.js';
|
|
|
|
export function normalizeNewlines(input: string): string {
|
|
return input.replaceAll('\u000D\u000A', '\u000A').replaceAll('\u000D', '\u000A');
|
|
}
|
|
|
|
export function tokenize(input: string): Array<Token> {
|
|
console.time('html tokenizer');
|
|
|
|
const tokenizer = new Tokenizer(input);
|
|
|
|
// FIXME: This completely ignores any state changes set by the tree construction
|
|
// stage - as well, there is no tree construction stage.
|
|
// See https://html.spec.whatwg.org/multipage/parsing.html#tokenization
|
|
while (tokenizer.tokens[tokenizer.tokens.length - 1]?.type !== Type.EndOfFile)
|
|
tokenizer.spin();
|
|
|
|
console.timeEnd('html tokenizer');
|
|
|
|
return tokenizer.tokens;
|
|
}
|
|
|
|
export function highlight(tokens: Array<Token>): Array<Span> {
|
|
const highlighter = new Highlighter(tokens);
|
|
|
|
console.time('html highlighter');
|
|
|
|
while (!highlighter.finished)
|
|
highlighter.spin();
|
|
|
|
console.timeEnd('html highlighter');
|
|
|
|
return highlighter.spans;
|
|
}
|