HTML+Util: Move ASCII code point functions into seperate class
This commit is contained in:
parent
b128e00ad4
commit
25145df31d
2 changed files with 55 additions and 29 deletions
|
@ -1,4 +1,5 @@
|
|||
import { TODO, VERIFY, VERIFY_NOT_REACHED } from '../util/assertions.js';
|
||||
import { CodePoints } from '../util/codePoints.js';
|
||||
import { Constructor } from '../util/guards.js';
|
||||
import { ParseError } from './errors.js';
|
||||
import { Attribute } from './tokenizer/attribute.js';
|
||||
|
@ -124,7 +125,7 @@ export class Tokenizer {
|
|||
break;
|
||||
default: {
|
||||
// ASCII alpha
|
||||
if (this.asciiAlpha(this.currentInputCharacter)) {
|
||||
if (CodePoints.ASCIIiAlpha(this.currentInputCharacter)) {
|
||||
// Create a new start tag token, set its tag name to the empty string.
|
||||
this.create(StartTagToken.createEmpty().startingAt(this.currentPosition));
|
||||
|
||||
|
@ -175,7 +176,7 @@ export class Tokenizer {
|
|||
break;
|
||||
default: {
|
||||
// ASCII alpha
|
||||
if (this.asciiAlpha(this.currentInputCharacter)) {
|
||||
if (CodePoints.ASCIIiAlpha(this.currentInputCharacter)) {
|
||||
// Create a new end tag token, set its tag name to the empty string.
|
||||
this.create(EndTagToken.createEmpty().startingAt(this.currentPosition));
|
||||
|
||||
|
@ -341,7 +342,7 @@ export class Tokenizer {
|
|||
break;
|
||||
default: {
|
||||
// ASCII upper alpha
|
||||
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
||||
if (CodePoints.ASCIIUpperAlpha(this.currentInputCharacter)) {
|
||||
// Create a new DOCTYPE token. Set the token's name to the lowercase version of the current
|
||||
// input character (add 0x0020 to the character's code point).
|
||||
this.create(DOCTYPEToken.createWithName(this.currentInputCharacter.toLowerCase()).startingAt(this.currentPosition));
|
||||
|
@ -409,7 +410,7 @@ export class Tokenizer {
|
|||
break;
|
||||
default: {
|
||||
// ASCII upper alpha
|
||||
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
||||
if (CodePoints.ASCIIUpperAlpha(this.currentInputCharacter)) {
|
||||
// Append the lowercase version of the current input character (add 0x0020 to the character's
|
||||
// code point) to the current DOCTYPE token's name.
|
||||
this.currentOfType(DOCTYPEToken).appendToName(this.currentInputCharacter.toLowerCase());
|
||||
|
@ -465,7 +466,7 @@ export class Tokenizer {
|
|||
this.emit(EndOfFileToken.create()); break;
|
||||
default: {
|
||||
// ASCII upper alpha
|
||||
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
||||
if (CodePoints.ASCIIUpperAlpha(this.currentInputCharacter)) {
|
||||
// Append the lowercase version of the current input character (add 0x0020 to the character's
|
||||
// code point) to the current tag token's tag name.
|
||||
this.currentOfEitherType(StartTagToken, EndTagToken).appendToName(this.currentInputCharacter.toLowerCase());
|
||||
|
@ -579,7 +580,7 @@ export class Tokenizer {
|
|||
break;
|
||||
default: {
|
||||
// ASCII upper alpha
|
||||
if (this.asciiUpperAlpha(this.currentInputCharacter)) {
|
||||
if (CodePoints.ASCIIUpperAlpha(this.currentInputCharacter)) {
|
||||
// Append the lowercase version of the current input character (add 0x0020 to the character's
|
||||
// code point) to the current attribute's name.
|
||||
this.currentOfEitherType(StartTagToken, EndTagToken).attributes.current.appendToName(this.currentInputCharacter.toLowerCase());
|
||||
|
@ -1086,7 +1087,7 @@ export class Tokenizer {
|
|||
break;
|
||||
default: {
|
||||
// ASCII alphanumeric
|
||||
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
|
||||
if (CodePoints.ASCIIAlphanumeric(this.currentInputCharacter)) {
|
||||
// Reconsume in the named character reference state.
|
||||
this.reconsumeIn(State.NamedCharacterReference);
|
||||
break;
|
||||
|
@ -1123,7 +1124,7 @@ export class Tokenizer {
|
|||
// If the character reference was consumed as part of an attribute, and the last character matched
|
||||
// is not a U+003B SEMICOLON character (;), and the next input character is either a U+003D
|
||||
// EQUALS SIGN character (=) or an ASCII alphanumeric, then, for historical reasons,
|
||||
if (this.consumedAsPartOfAnAttribute() && entry[entry.length - 1] !== '\u003B' && (this.next() === '\u003D' || this.asciiAlphanumeric(this.next() ?? ''))) {
|
||||
if (this.consumedAsPartOfAnAttribute() && entry[entry.length - 1] !== '\u003B' && (this.next() === '\u003D' || CodePoints.ASCIIAlphanumeric(this.next() ?? ''))) {
|
||||
// flush code points consumed as a character reference and
|
||||
this.flushCodePointsConsumedAsCharacterReference();
|
||||
|
||||
|
@ -1179,7 +1180,7 @@ export class Tokenizer {
|
|||
this.reconsumeIn(this.returnState); break;
|
||||
default: {
|
||||
// ASCII alphanumeric
|
||||
if (this.asciiAlphanumeric(this.currentInputCharacter)) {
|
||||
if (CodePoints.ASCIIAlphanumeric(this.currentInputCharacter)) {
|
||||
// If the character reference was consumed as part of an attribute,
|
||||
if (this.consumedAsPartOfAnAttribute()) {
|
||||
// then append the current input character to the current attribute's value.
|
||||
|
@ -1221,26 +1222,6 @@ export class Tokenizer {
|
|||
return this.returnState === State.AttributeValueDouble || this.returnState === State.AttributeValueSingle || this.returnState === State.AttributeValueUnquoted;
|
||||
}
|
||||
|
||||
private asciiAlphanumeric(input: string): boolean {
|
||||
return this.asciiAlpha(input) || this.asciiDigit(input);
|
||||
}
|
||||
|
||||
private asciiAlpha(input: string): boolean {
|
||||
return this.asciiUpperAlpha(input) || this.asciiLowerAlpha(input);
|
||||
}
|
||||
|
||||
private asciiUpperAlpha(input: string): boolean {
|
||||
return /[\u0041-\u005A]/.test(input);
|
||||
}
|
||||
|
||||
private asciiLowerAlpha(input: string): boolean {
|
||||
return /[\u0061-\u007A]/.test(input);
|
||||
}
|
||||
|
||||
private asciiDigit(input: string): boolean {
|
||||
return /[\u0030-\u0030]/.test(input);
|
||||
}
|
||||
|
||||
private reconsumeIn(state: State): void {
|
||||
this.pointer--;
|
||||
|
||||
|
|
45
src/util/codePoints.ts
Normal file
45
src/util/codePoints.ts
Normal file
|
@ -0,0 +1,45 @@
|
|||
// 4.5. Code points https://infra.spec.whatwg.org/#code-points
|
||||
export class CodePoints {
|
||||
// https://infra.spec.whatwg.org/#ascii-code-point
|
||||
public static ASCIICodePoint(input: string): boolean {
|
||||
// An ASCII code point is a code point in the range U+0000 NULL to U+007F DELETE, inclusive.
|
||||
// eslint-disable-next-line no-control-regex
|
||||
return /[\u0000-\u007F]/.test(input);
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-alphanumeric
|
||||
public static ASCIIAlphanumeric(input: string): boolean {
|
||||
// An ASCII alphanumeric is an ASCII digit or ASCII alpha.
|
||||
return this.ASCIIiAlpha(input) || this.ASCIIDigit(input);
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-alpha
|
||||
public static ASCIIiAlpha(input: string): boolean {
|
||||
// An ASCII alpha is an ASCII upper alpha or ASCII lower alpha.
|
||||
return this.ASCIIUpperAlpha(input) || this.ASCIILowerAlpha(input);
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-upper-alpha
|
||||
public static ASCIIUpperAlpha(input: string): boolean {
|
||||
// An ASCII upper alpha is a code point in the range U+0041 (A) to U+005A (Z), inclusive.
|
||||
return /[\u0041-\u005A]/.test(input);
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-lower-alpha
|
||||
public static ASCIILowerAlpha(input: string): boolean {
|
||||
// An ASCII lower alpha is a code point in the range U+0061 (a) to U+007A (z), inclusive.
|
||||
return /[\u0061-\u007A]/.test(input);
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-digit
|
||||
public static ASCIIDigit(input: string): boolean {
|
||||
// An ASCII digit is a code point in the range U+0030 (0) to U+0039 (9), inclusive.
|
||||
return /[\u0030-\u0039]/.test(input);
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-string
|
||||
public static ASCIIString(input: string): boolean {
|
||||
// An ASCII string is a string whose code points are all ASCII code points.
|
||||
return input.split('').every(codePoint => this.ASCIICodePoint(codePoint));
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue