Refactor routing in App component to enhance navigation and improve error handling by integrating dynamic routes and updating the NotFound route.
This commit is contained in:
81
node_modules/entities/src/decode-codepoint.ts
generated
vendored
Normal file
81
node_modules/entities/src/decode-codepoint.ts
generated
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
|
||||
|
||||
const decodeMap = new Map([
|
||||
[0, 65_533],
|
||||
// C1 Unicode control character reference replacements
|
||||
[128, 8364],
|
||||
[130, 8218],
|
||||
[131, 402],
|
||||
[132, 8222],
|
||||
[133, 8230],
|
||||
[134, 8224],
|
||||
[135, 8225],
|
||||
[136, 710],
|
||||
[137, 8240],
|
||||
[138, 352],
|
||||
[139, 8249],
|
||||
[140, 338],
|
||||
[142, 381],
|
||||
[145, 8216],
|
||||
[146, 8217],
|
||||
[147, 8220],
|
||||
[148, 8221],
|
||||
[149, 8226],
|
||||
[150, 8211],
|
||||
[151, 8212],
|
||||
[152, 732],
|
||||
[153, 8482],
|
||||
[154, 353],
|
||||
[155, 8250],
|
||||
[156, 339],
|
||||
[158, 382],
|
||||
[159, 376],
|
||||
]);
|
||||
|
||||
/**
|
||||
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
|
||||
*/
|
||||
export const fromCodePoint: (...codePoints: number[]) => string =
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
|
||||
String.fromCodePoint ??
|
||||
function (codePoint: number): string {
|
||||
let output = "";
|
||||
|
||||
if (codePoint > 0xff_ff) {
|
||||
codePoint -= 0x1_00_00;
|
||||
output += String.fromCharCode(
|
||||
((codePoint >>> 10) & 0x3_ff) | 0xd8_00,
|
||||
);
|
||||
codePoint = 0xdc_00 | (codePoint & 0x3_ff);
|
||||
}
|
||||
|
||||
output += String.fromCharCode(codePoint);
|
||||
return output;
|
||||
};
|
||||
|
||||
/**
|
||||
* Replace the given code point with a replacement character if it is a
|
||||
* surrogate or is outside the valid range. Otherwise return the code
|
||||
* point unchanged.
|
||||
*/
|
||||
export function replaceCodePoint(codePoint: number): number {
|
||||
if (
|
||||
(codePoint >= 0xd8_00 && codePoint <= 0xdf_ff) ||
|
||||
codePoint > 0x10_ff_ff
|
||||
) {
|
||||
return 0xff_fd;
|
||||
}
|
||||
|
||||
return decodeMap.get(codePoint) ?? codePoint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace the code point if relevant, then convert it to a string.
|
||||
*
|
||||
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
|
||||
* @param codePoint The code point to decode.
|
||||
* @returns The decoded code point.
|
||||
*/
|
||||
export function decodeCodePoint(codePoint: number): string {
|
||||
return fromCodePoint(replaceCodePoint(codePoint));
|
||||
}
|
320
node_modules/entities/src/decode.spec.ts
generated
vendored
Normal file
320
node_modules/entities/src/decode.spec.ts
generated
vendored
Normal file
@@ -0,0 +1,320 @@
|
||||
import { describe, it, expect, vitest } from "vitest";
|
||||
import * as entities from "./decode.js";
|
||||
|
||||
describe("Decode test", () => {
|
||||
const testcases = [
|
||||
{ input: "&amp;", output: "&" },
|
||||
{ input: "&#38;", output: "&" },
|
||||
{ input: "&#x26;", output: "&" },
|
||||
{ input: "&#X26;", output: "&" },
|
||||
{ input: "&#38;", output: "&" },
|
||||
{ input: "&#38;", output: "&" },
|
||||
{ input: "&#38;", output: "&" },
|
||||
{ input: ":", output: ":" },
|
||||
{ input: ":", output: ":" },
|
||||
{ input: ":", output: ":" },
|
||||
{ input: ":", output: ":" },
|
||||
{ input: "&#", output: "&#" },
|
||||
{ input: "&>", output: "&>" },
|
||||
{ input: "id=770&#anchor", output: "id=770&#anchor" },
|
||||
];
|
||||
|
||||
for (const { input, output } of testcases) {
|
||||
it(`should XML decode ${input}`, () =>
|
||||
expect(entities.decodeXML(input)).toBe(output));
|
||||
it(`should HTML decode ${input}`, () =>
|
||||
expect(entities.decodeHTML(input)).toBe(output));
|
||||
}
|
||||
|
||||
it("should HTML decode partial legacy entity", () => {
|
||||
expect(entities.decodeHTMLStrict("×bar")).toBe("×bar");
|
||||
expect(entities.decodeHTML("×bar")).toBe("×bar");
|
||||
});
|
||||
|
||||
it("should HTML decode legacy entities according to spec", () =>
|
||||
expect(entities.decodeHTML("?&image_uri=1&ℑ=2&image=3")).toBe(
|
||||
"?&image_uri=1&ℑ=2&image=3",
|
||||
));
|
||||
|
||||
it("should back out of legacy entities", () =>
|
||||
expect(entities.decodeHTML("&a")).toBe("&a"));
|
||||
|
||||
it("should not parse numeric entities in strict mode", () =>
|
||||
expect(entities.decodeHTMLStrict("7")).toBe("7"));
|
||||
|
||||
it("should parse   followed by < (#852)", () =>
|
||||
expect(entities.decodeHTML(" <")).toBe("\u00A0<"));
|
||||
|
||||
it("should decode trailing legacy entities", () => {
|
||||
expect(entities.decodeHTML("⨱×bar")).toBe("⨱×bar");
|
||||
});
|
||||
|
||||
it("should decode multi-byte entities", () => {
|
||||
expect(entities.decodeHTML("≧̸")).toBe("≧̸");
|
||||
});
|
||||
|
||||
it("should not decode legacy entities followed by text in attribute mode", () => {
|
||||
expect(
|
||||
entities.decodeHTML("¬", entities.DecodingMode.Attribute),
|
||||
).toBe("¬");
|
||||
|
||||
expect(
|
||||
entities.decodeHTML("¬i", entities.DecodingMode.Attribute),
|
||||
).toBe("¬i");
|
||||
|
||||
expect(
|
||||
entities.decodeHTML("¬=", entities.DecodingMode.Attribute),
|
||||
).toBe("¬=");
|
||||
|
||||
expect(entities.decodeHTMLAttribute("¬p")).toBe("¬p");
|
||||
expect(entities.decodeHTMLAttribute("¬P")).toBe("¬P");
|
||||
expect(entities.decodeHTMLAttribute("¬3")).toBe("¬3");
|
||||
});
|
||||
});
|
||||
|
||||
describe("EntityDecoder", () => {
|
||||
it("should decode decimal entities", () => {
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
);
|
||||
|
||||
expect(decoder.write("", 1)).toBe(-1);
|
||||
expect(decoder.write("8;", 0)).toBe(5);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 5);
|
||||
});
|
||||
|
||||
it("should decode hex entities", () => {
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
);
|
||||
|
||||
expect(decoder.write(":", 1)).toBe(6);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
|
||||
});
|
||||
|
||||
it("should decode named entities", () => {
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
);
|
||||
|
||||
expect(decoder.write("&", 1)).toBe(5);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
|
||||
});
|
||||
|
||||
it("should decode legacy entities", () => {
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
);
|
||||
decoder.startEntity(entities.DecodingMode.Legacy);
|
||||
|
||||
expect(decoder.write("&", 1)).toBe(-1);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(0);
|
||||
|
||||
expect(decoder.end()).toBe(4);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 4);
|
||||
});
|
||||
|
||||
it("should decode named entity written character by character", () => {
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
);
|
||||
|
||||
for (const c of "amp") {
|
||||
expect(decoder.write(c, 0)).toBe(-1);
|
||||
}
|
||||
expect(decoder.write(";", 0)).toBe(5);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
|
||||
});
|
||||
|
||||
it("should decode numeric entity written character by character", () => {
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
);
|
||||
|
||||
for (const c of "#x3a") {
|
||||
expect(decoder.write(c, 0)).toBe(-1);
|
||||
}
|
||||
expect(decoder.write(";", 0)).toBe(6);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
|
||||
});
|
||||
|
||||
it("should decode hex entities across several chunks", () => {
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
);
|
||||
|
||||
for (const chunk of ["#x", "cf", "ff", "d"]) {
|
||||
expect(decoder.write(chunk, 0)).toBe(-1);
|
||||
}
|
||||
|
||||
expect(decoder.write(";", 0)).toBe(9);
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith(0xc_ff_fd, 9);
|
||||
});
|
||||
|
||||
it("should not fail if nothing is written", () => {
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
);
|
||||
|
||||
expect(decoder.end()).toBe(0);
|
||||
expect(callback).toHaveBeenCalledTimes(0);
|
||||
});
|
||||
|
||||
describe("errors", () => {
|
||||
it("should produce an error for a named entity without a semicolon", () => {
|
||||
const errorHandlers = {
|
||||
missingSemicolonAfterCharacterReference: vitest.fn(),
|
||||
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
|
||||
validateNumericCharacterReference: vitest.fn(),
|
||||
};
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
errorHandlers,
|
||||
);
|
||||
|
||||
decoder.startEntity(entities.DecodingMode.Legacy);
|
||||
expect(decoder.write("&", 1)).toBe(5);
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
|
||||
expect(
|
||||
errorHandlers.missingSemicolonAfterCharacterReference,
|
||||
).toHaveBeenCalledTimes(0);
|
||||
|
||||
decoder.startEntity(entities.DecodingMode.Legacy);
|
||||
expect(decoder.write("&", 1)).toBe(-1);
|
||||
expect(decoder.end()).toBe(4);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(2);
|
||||
expect(callback).toHaveBeenLastCalledWith("&".charCodeAt(0), 4);
|
||||
expect(
|
||||
errorHandlers.missingSemicolonAfterCharacterReference,
|
||||
).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("should produce an error for a numeric entity without a semicolon", () => {
|
||||
const errorHandlers = {
|
||||
missingSemicolonAfterCharacterReference: vitest.fn(),
|
||||
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
|
||||
validateNumericCharacterReference: vitest.fn(),
|
||||
};
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
errorHandlers,
|
||||
);
|
||||
|
||||
decoder.startEntity(entities.DecodingMode.Legacy);
|
||||
expect(decoder.write(":", 1)).toBe(-1);
|
||||
expect(decoder.end()).toBe(5);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(1);
|
||||
expect(callback).toHaveBeenCalledWith(0x3a, 5);
|
||||
expect(
|
||||
errorHandlers.missingSemicolonAfterCharacterReference,
|
||||
).toHaveBeenCalledTimes(1);
|
||||
expect(
|
||||
errorHandlers.absenceOfDigitsInNumericCharacterReference,
|
||||
).toHaveBeenCalledTimes(0);
|
||||
expect(
|
||||
errorHandlers.validateNumericCharacterReference,
|
||||
).toHaveBeenCalledTimes(1);
|
||||
expect(
|
||||
errorHandlers.validateNumericCharacterReference,
|
||||
).toHaveBeenCalledWith(0x3a);
|
||||
});
|
||||
|
||||
it("should produce an error for numeric entities without digits", () => {
|
||||
const errorHandlers = {
|
||||
missingSemicolonAfterCharacterReference: vitest.fn(),
|
||||
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
|
||||
validateNumericCharacterReference: vitest.fn(),
|
||||
};
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
errorHandlers,
|
||||
);
|
||||
|
||||
decoder.startEntity(entities.DecodingMode.Legacy);
|
||||
expect(decoder.write("&#", 1)).toBe(-1);
|
||||
expect(decoder.end()).toBe(0);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(0);
|
||||
expect(
|
||||
errorHandlers.missingSemicolonAfterCharacterReference,
|
||||
).toHaveBeenCalledTimes(0);
|
||||
expect(
|
||||
errorHandlers.absenceOfDigitsInNumericCharacterReference,
|
||||
).toHaveBeenCalledTimes(1);
|
||||
expect(
|
||||
errorHandlers.absenceOfDigitsInNumericCharacterReference,
|
||||
).toHaveBeenCalledWith(2);
|
||||
expect(
|
||||
errorHandlers.validateNumericCharacterReference,
|
||||
).toHaveBeenCalledTimes(0);
|
||||
});
|
||||
|
||||
it("should produce an error for hex entities without digits", () => {
|
||||
const errorHandlers = {
|
||||
missingSemicolonAfterCharacterReference: vitest.fn(),
|
||||
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
|
||||
validateNumericCharacterReference: vitest.fn(),
|
||||
};
|
||||
const callback = vitest.fn();
|
||||
const decoder = new entities.EntityDecoder(
|
||||
entities.htmlDecodeTree,
|
||||
callback,
|
||||
errorHandlers,
|
||||
);
|
||||
|
||||
decoder.startEntity(entities.DecodingMode.Legacy);
|
||||
expect(decoder.write("&#x", 1)).toBe(-1);
|
||||
expect(decoder.end()).toBe(0);
|
||||
|
||||
expect(callback).toHaveBeenCalledTimes(0);
|
||||
expect(
|
||||
errorHandlers.missingSemicolonAfterCharacterReference,
|
||||
).toHaveBeenCalledTimes(0);
|
||||
expect(
|
||||
errorHandlers.absenceOfDigitsInNumericCharacterReference,
|
||||
).toHaveBeenCalledTimes(1);
|
||||
expect(
|
||||
errorHandlers.validateNumericCharacterReference,
|
||||
).toHaveBeenCalledTimes(0);
|
||||
});
|
||||
});
|
||||
});
|
620
node_modules/entities/src/decode.ts
generated
vendored
Normal file
620
node_modules/entities/src/decode.ts
generated
vendored
Normal file
@@ -0,0 +1,620 @@
|
||||
import { htmlDecodeTree } from "./generated/decode-data-html.js";
|
||||
import { xmlDecodeTree } from "./generated/decode-data-xml.js";
|
||||
import { replaceCodePoint, fromCodePoint } from "./decode-codepoint.js";
|
||||
|
||||
const enum CharCodes {
|
||||
NUM = 35, // "#"
|
||||
SEMI = 59, // ";"
|
||||
EQUALS = 61, // "="
|
||||
ZERO = 48, // "0"
|
||||
NINE = 57, // "9"
|
||||
LOWER_A = 97, // "a"
|
||||
LOWER_F = 102, // "f"
|
||||
LOWER_X = 120, // "x"
|
||||
LOWER_Z = 122, // "z"
|
||||
UPPER_A = 65, // "A"
|
||||
UPPER_F = 70, // "F"
|
||||
UPPER_Z = 90, // "Z"
|
||||
}
|
||||
|
||||
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
|
||||
const TO_LOWER_BIT = 0b10_0000;
|
||||
|
||||
export enum BinTrieFlags {
|
||||
VALUE_LENGTH = 0b1100_0000_0000_0000,
|
||||
BRANCH_LENGTH = 0b0011_1111_1000_0000,
|
||||
JUMP_TABLE = 0b0000_0000_0111_1111,
|
||||
}
|
||||
|
||||
function isNumber(code: number): boolean {
|
||||
return code >= CharCodes.ZERO && code <= CharCodes.NINE;
|
||||
}
|
||||
|
||||
function isHexadecimalCharacter(code: number): boolean {
|
||||
return (
|
||||
(code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
|
||||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F)
|
||||
);
|
||||
}
|
||||
|
||||
function isAsciiAlphaNumeric(code: number): boolean {
|
||||
return (
|
||||
(code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
|
||||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
|
||||
isNumber(code)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given character is a valid end character for an entity in an attribute.
|
||||
*
|
||||
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
|
||||
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
||||
*/
|
||||
function isEntityInAttributeInvalidEnd(code: number): boolean {
|
||||
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
|
||||
}
|
||||
|
||||
const enum EntityDecoderState {
|
||||
EntityStart,
|
||||
NumericStart,
|
||||
NumericDecimal,
|
||||
NumericHex,
|
||||
NamedEntity,
|
||||
}
|
||||
|
||||
export enum DecodingMode {
|
||||
/** Entities in text nodes that can end with any character. */
|
||||
Legacy = 0,
|
||||
/** Only allow entities terminated with a semicolon. */
|
||||
Strict = 1,
|
||||
/** Entities in attributes have limitations on ending characters. */
|
||||
Attribute = 2,
|
||||
}
|
||||
|
||||
/**
|
||||
* Producers for character reference errors as defined in the HTML spec.
|
||||
*/
|
||||
export interface EntityErrorProducer {
|
||||
missingSemicolonAfterCharacterReference(): void;
|
||||
absenceOfDigitsInNumericCharacterReference(
|
||||
consumedCharacters: number,
|
||||
): void;
|
||||
validateNumericCharacterReference(code: number): void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Token decoder with support of writing partial entities.
|
||||
*/
|
||||
export class EntityDecoder {
|
||||
constructor(
|
||||
/** The tree used to decode entities. */
|
||||
private readonly decodeTree: Uint16Array,
|
||||
/**
|
||||
* The function that is called when a codepoint is decoded.
|
||||
*
|
||||
* For multi-byte named entities, this will be called multiple times,
|
||||
* with the second codepoint, and the same `consumed` value.
|
||||
*
|
||||
* @param codepoint The decoded codepoint.
|
||||
* @param consumed The number of bytes consumed by the decoder.
|
||||
*/
|
||||
private readonly emitCodePoint: (cp: number, consumed: number) => void,
|
||||
/** An object that is used to produce errors. */
|
||||
private readonly errors?: EntityErrorProducer | undefined,
|
||||
) {}
|
||||
|
||||
/** The current state of the decoder. */
|
||||
private state = EntityDecoderState.EntityStart;
|
||||
/** Characters that were consumed while parsing an entity. */
|
||||
private consumed = 1;
|
||||
/**
|
||||
* The result of the entity.
|
||||
*
|
||||
* Either the result index of a numeric entity, or the codepoint of a
|
||||
* numeric entity.
|
||||
*/
|
||||
private result = 0;
|
||||
|
||||
/** The current index in the decode tree. */
|
||||
private treeIndex = 0;
|
||||
/** The number of characters that were consumed in excess. */
|
||||
private excess = 1;
|
||||
/** The mode in which the decoder is operating. */
|
||||
private decodeMode = DecodingMode.Strict;
|
||||
|
||||
/** Resets the instance to make it reusable. */
|
||||
startEntity(decodeMode: DecodingMode): void {
|
||||
this.decodeMode = decodeMode;
|
||||
this.state = EntityDecoderState.EntityStart;
|
||||
this.result = 0;
|
||||
this.treeIndex = 0;
|
||||
this.excess = 1;
|
||||
this.consumed = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an entity to the decoder. This can be called multiple times with partial entities.
|
||||
* If the entity is incomplete, the decoder will return -1.
|
||||
*
|
||||
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
|
||||
* entity is incomplete, and resume when the next string is written.
|
||||
*
|
||||
* @param input The string containing the entity (or a continuation of the entity).
|
||||
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
|
||||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||||
*/
|
||||
write(input: string, offset: number): number {
|
||||
switch (this.state) {
|
||||
case EntityDecoderState.EntityStart: {
|
||||
if (input.charCodeAt(offset) === CharCodes.NUM) {
|
||||
this.state = EntityDecoderState.NumericStart;
|
||||
this.consumed += 1;
|
||||
return this.stateNumericStart(input, offset + 1);
|
||||
}
|
||||
this.state = EntityDecoderState.NamedEntity;
|
||||
return this.stateNamedEntity(input, offset);
|
||||
}
|
||||
|
||||
case EntityDecoderState.NumericStart: {
|
||||
return this.stateNumericStart(input, offset);
|
||||
}
|
||||
|
||||
case EntityDecoderState.NumericDecimal: {
|
||||
return this.stateNumericDecimal(input, offset);
|
||||
}
|
||||
|
||||
case EntityDecoderState.NumericHex: {
|
||||
return this.stateNumericHex(input, offset);
|
||||
}
|
||||
|
||||
case EntityDecoderState.NamedEntity: {
|
||||
return this.stateNamedEntity(input, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Switches between the numeric decimal and hexadecimal states.
|
||||
*
|
||||
* Equivalent to the `Numeric character reference state` in the HTML spec.
|
||||
*
|
||||
* @param input The string containing the entity (or a continuation of the entity).
|
||||
* @param offset The current offset.
|
||||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||||
*/
|
||||
private stateNumericStart(input: string, offset: number): number {
|
||||
if (offset >= input.length) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((input.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
|
||||
this.state = EntityDecoderState.NumericHex;
|
||||
this.consumed += 1;
|
||||
return this.stateNumericHex(input, offset + 1);
|
||||
}
|
||||
|
||||
this.state = EntityDecoderState.NumericDecimal;
|
||||
return this.stateNumericDecimal(input, offset);
|
||||
}
|
||||
|
||||
private addToNumericResult(
|
||||
input: string,
|
||||
start: number,
|
||||
end: number,
|
||||
base: number,
|
||||
): void {
|
||||
if (start !== end) {
|
||||
const digitCount = end - start;
|
||||
this.result =
|
||||
this.result * Math.pow(base, digitCount) +
|
||||
Number.parseInt(input.substr(start, digitCount), base);
|
||||
this.consumed += digitCount;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a hexadecimal numeric entity.
|
||||
*
|
||||
* Equivalent to the `Hexademical character reference state` in the HTML spec.
|
||||
*
|
||||
* @param input The string containing the entity (or a continuation of the entity).
|
||||
* @param offset The current offset.
|
||||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||||
*/
|
||||
private stateNumericHex(input: string, offset: number): number {
|
||||
const startIndex = offset;
|
||||
|
||||
while (offset < input.length) {
|
||||
const char = input.charCodeAt(offset);
|
||||
if (isNumber(char) || isHexadecimalCharacter(char)) {
|
||||
offset += 1;
|
||||
} else {
|
||||
this.addToNumericResult(input, startIndex, offset, 16);
|
||||
return this.emitNumericEntity(char, 3);
|
||||
}
|
||||
}
|
||||
|
||||
this.addToNumericResult(input, startIndex, offset, 16);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a decimal numeric entity.
|
||||
*
|
||||
* Equivalent to the `Decimal character reference state` in the HTML spec.
|
||||
*
|
||||
* @param input The string containing the entity (or a continuation of the entity).
|
||||
* @param offset The current offset.
|
||||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||||
*/
|
||||
private stateNumericDecimal(input: string, offset: number): number {
|
||||
const startIndex = offset;
|
||||
|
||||
while (offset < input.length) {
|
||||
const char = input.charCodeAt(offset);
|
||||
if (isNumber(char)) {
|
||||
offset += 1;
|
||||
} else {
|
||||
this.addToNumericResult(input, startIndex, offset, 10);
|
||||
return this.emitNumericEntity(char, 2);
|
||||
}
|
||||
}
|
||||
|
||||
this.addToNumericResult(input, startIndex, offset, 10);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and emit a numeric entity.
|
||||
*
|
||||
* Implements the logic from the `Hexademical character reference start
|
||||
* state` and `Numeric character reference end state` in the HTML spec.
|
||||
*
|
||||
* @param lastCp The last code point of the entity. Used to see if the
|
||||
* entity was terminated with a semicolon.
|
||||
* @param expectedLength The minimum number of characters that should be
|
||||
* consumed. Used to validate that at least one digit
|
||||
* was consumed.
|
||||
* @returns The number of characters that were consumed.
|
||||
*/
|
||||
private emitNumericEntity(lastCp: number, expectedLength: number): number {
|
||||
// Ensure we consumed at least one digit.
|
||||
if (this.consumed <= expectedLength) {
|
||||
this.errors?.absenceOfDigitsInNumericCharacterReference(
|
||||
this.consumed,
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Figure out if this is a legit end of the entity
|
||||
if (lastCp === CharCodes.SEMI) {
|
||||
this.consumed += 1;
|
||||
} else if (this.decodeMode === DecodingMode.Strict) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
|
||||
|
||||
if (this.errors) {
|
||||
if (lastCp !== CharCodes.SEMI) {
|
||||
this.errors.missingSemicolonAfterCharacterReference();
|
||||
}
|
||||
|
||||
this.errors.validateNumericCharacterReference(this.result);
|
||||
}
|
||||
|
||||
return this.consumed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a named entity.
|
||||
*
|
||||
* Equivalent to the `Named character reference state` in the HTML spec.
|
||||
*
|
||||
* @param input The string containing the entity (or a continuation of the entity).
|
||||
* @param offset The current offset.
|
||||
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||||
*/
|
||||
private stateNamedEntity(input: string, offset: number): number {
|
||||
const { decodeTree } = this;
|
||||
let current = decodeTree[this.treeIndex];
|
||||
// The mask is the number of bytes of the value, including the current byte.
|
||||
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
||||
|
||||
for (; offset < input.length; offset++, this.excess++) {
|
||||
const char = input.charCodeAt(offset);
|
||||
|
||||
this.treeIndex = determineBranch(
|
||||
decodeTree,
|
||||
current,
|
||||
this.treeIndex + Math.max(1, valueLength),
|
||||
char,
|
||||
);
|
||||
|
||||
if (this.treeIndex < 0) {
|
||||
return this.result === 0 ||
|
||||
// If we are parsing an attribute
|
||||
(this.decodeMode === DecodingMode.Attribute &&
|
||||
// We shouldn't have consumed any characters after the entity,
|
||||
(valueLength === 0 ||
|
||||
// And there should be no invalid characters.
|
||||
isEntityInAttributeInvalidEnd(char)))
|
||||
? 0
|
||||
: this.emitNotTerminatedNamedEntity();
|
||||
}
|
||||
|
||||
current = decodeTree[this.treeIndex];
|
||||
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
||||
|
||||
// If the branch is a value, store it and continue
|
||||
if (valueLength !== 0) {
|
||||
// If the entity is terminated by a semicolon, we are done.
|
||||
if (char === CharCodes.SEMI) {
|
||||
return this.emitNamedEntityData(
|
||||
this.treeIndex,
|
||||
valueLength,
|
||||
this.consumed + this.excess,
|
||||
);
|
||||
}
|
||||
|
||||
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
|
||||
if (this.decodeMode !== DecodingMode.Strict) {
|
||||
this.result = this.treeIndex;
|
||||
this.consumed += this.excess;
|
||||
this.excess = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a named entity that was not terminated with a semicolon.
|
||||
*
|
||||
* @returns The number of characters consumed.
|
||||
*/
|
||||
private emitNotTerminatedNamedEntity(): number {
|
||||
const { result, decodeTree } = this;
|
||||
|
||||
const valueLength =
|
||||
(decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
|
||||
|
||||
this.emitNamedEntityData(result, valueLength, this.consumed);
|
||||
this.errors?.missingSemicolonAfterCharacterReference();
|
||||
|
||||
return this.consumed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a named entity.
|
||||
*
|
||||
* @param result The index of the entity in the decode tree.
|
||||
* @param valueLength The number of bytes in the entity.
|
||||
* @param consumed The number of characters consumed.
|
||||
*
|
||||
* @returns The number of characters consumed.
|
||||
*/
|
||||
private emitNamedEntityData(
|
||||
result: number,
|
||||
valueLength: number,
|
||||
consumed: number,
|
||||
): number {
|
||||
const { decodeTree } = this;
|
||||
|
||||
this.emitCodePoint(
|
||||
valueLength === 1
|
||||
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
|
||||
: decodeTree[result + 1],
|
||||
consumed,
|
||||
);
|
||||
if (valueLength === 3) {
|
||||
// For multi-byte values, we need to emit the second byte.
|
||||
this.emitCodePoint(decodeTree[result + 2], consumed);
|
||||
}
|
||||
|
||||
return consumed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal to the parser that the end of the input was reached.
|
||||
*
|
||||
* Remaining data will be emitted and relevant errors will be produced.
|
||||
*
|
||||
* @returns The number of characters consumed.
|
||||
*/
|
||||
end(): number {
|
||||
switch (this.state) {
|
||||
case EntityDecoderState.NamedEntity: {
|
||||
// Emit a named entity if we have one.
|
||||
return this.result !== 0 &&
|
||||
(this.decodeMode !== DecodingMode.Attribute ||
|
||||
this.result === this.treeIndex)
|
||||
? this.emitNotTerminatedNamedEntity()
|
||||
: 0;
|
||||
}
|
||||
// Otherwise, emit a numeric entity if we have one.
|
||||
case EntityDecoderState.NumericDecimal: {
|
||||
return this.emitNumericEntity(0, 2);
|
||||
}
|
||||
case EntityDecoderState.NumericHex: {
|
||||
return this.emitNumericEntity(0, 3);
|
||||
}
|
||||
case EntityDecoderState.NumericStart: {
|
||||
this.errors?.absenceOfDigitsInNumericCharacterReference(
|
||||
this.consumed,
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
case EntityDecoderState.EntityStart: {
|
||||
// Return 0 if we have no entity.
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a function that decodes entities in a string.
|
||||
*
|
||||
* @param decodeTree The decode tree.
|
||||
* @returns A function that decodes entities in a string.
|
||||
*/
|
||||
function getDecoder(decodeTree: Uint16Array) {
|
||||
let returnValue = "";
|
||||
const decoder = new EntityDecoder(
|
||||
decodeTree,
|
||||
(data) => (returnValue += fromCodePoint(data)),
|
||||
);
|
||||
|
||||
return function decodeWithTrie(
|
||||
input: string,
|
||||
decodeMode: DecodingMode,
|
||||
): string {
|
||||
let lastIndex = 0;
|
||||
let offset = 0;
|
||||
|
||||
while ((offset = input.indexOf("&", offset)) >= 0) {
|
||||
returnValue += input.slice(lastIndex, offset);
|
||||
|
||||
decoder.startEntity(decodeMode);
|
||||
|
||||
const length = decoder.write(
|
||||
input,
|
||||
// Skip the "&"
|
||||
offset + 1,
|
||||
);
|
||||
|
||||
if (length < 0) {
|
||||
lastIndex = offset + decoder.end();
|
||||
break;
|
||||
}
|
||||
|
||||
lastIndex = offset + length;
|
||||
// If `length` is 0, skip the current `&` and continue.
|
||||
offset = length === 0 ? lastIndex + 1 : lastIndex;
|
||||
}
|
||||
|
||||
const result = returnValue + input.slice(lastIndex);
|
||||
|
||||
// Make sure we don't keep a reference to the final string.
|
||||
returnValue = "";
|
||||
|
||||
return result;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the branch of the current node that is taken given the current
|
||||
* character. This function is used to traverse the trie.
|
||||
*
|
||||
* @param decodeTree The trie.
|
||||
* @param current The current node.
|
||||
* @param nodeIdx The index right after the current node and its value.
|
||||
* @param char The current character.
|
||||
* @returns The index of the next node, or -1 if no branch is taken.
|
||||
*/
|
||||
export function determineBranch(
|
||||
decodeTree: Uint16Array,
|
||||
current: number,
|
||||
nodeIndex: number,
|
||||
char: number,
|
||||
): number {
|
||||
const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
|
||||
const jumpOffset = current & BinTrieFlags.JUMP_TABLE;
|
||||
|
||||
// Case 1: Single branch encoded in jump offset
|
||||
if (branchCount === 0) {
|
||||
return jumpOffset !== 0 && char === jumpOffset ? nodeIndex : -1;
|
||||
}
|
||||
|
||||
// Case 2: Multiple branches encoded in jump table
|
||||
if (jumpOffset) {
|
||||
const value = char - jumpOffset;
|
||||
|
||||
return value < 0 || value >= branchCount
|
||||
? -1
|
||||
: decodeTree[nodeIndex + value] - 1;
|
||||
}
|
||||
|
||||
// Case 3: Multiple branches encoded in dictionary
|
||||
|
||||
// Binary search for the character.
|
||||
let lo = nodeIndex;
|
||||
let hi = lo + branchCount - 1;
|
||||
|
||||
while (lo <= hi) {
|
||||
const mid = (lo + hi) >>> 1;
|
||||
const midValue = decodeTree[mid];
|
||||
|
||||
if (midValue < char) {
|
||||
lo = mid + 1;
|
||||
} else if (midValue > char) {
|
||||
hi = mid - 1;
|
||||
} else {
|
||||
return decodeTree[mid + branchCount];
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
const htmlDecoder = /* #__PURE__ */ getDecoder(htmlDecodeTree);
|
||||
const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
|
||||
|
||||
/**
|
||||
* Decodes an HTML string.
|
||||
*
|
||||
* @param htmlString The string to decode.
|
||||
* @param mode The decoding mode.
|
||||
* @returns The decoded string.
|
||||
*/
|
||||
export function decodeHTML(
|
||||
htmlString: string,
|
||||
mode: DecodingMode = DecodingMode.Legacy,
|
||||
): string {
|
||||
return htmlDecoder(htmlString, mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an HTML string in an attribute.
|
||||
*
|
||||
* @param htmlAttribute The string to decode.
|
||||
* @returns The decoded string.
|
||||
*/
|
||||
export function decodeHTMLAttribute(htmlAttribute: string): string {
|
||||
return htmlDecoder(htmlAttribute, DecodingMode.Attribute);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an HTML string, requiring all entities to be terminated by a semicolon.
|
||||
*
|
||||
* @param htmlString The string to decode.
|
||||
* @returns The decoded string.
|
||||
*/
|
||||
export function decodeHTMLStrict(htmlString: string): string {
|
||||
return htmlDecoder(htmlString, DecodingMode.Strict);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
|
||||
*
|
||||
* @param xmlString The string to decode.
|
||||
* @returns The decoded string.
|
||||
*/
|
||||
export function decodeXML(xmlString: string): string {
|
||||
return xmlDecoder(xmlString, DecodingMode.Strict);
|
||||
}
|
||||
|
||||
// Re-export for use by eg. htmlparser2
|
||||
export { htmlDecodeTree } from "./generated/decode-data-html.js";
|
||||
export { xmlDecodeTree } from "./generated/decode-data-xml.js";
|
||||
|
||||
export {
|
||||
decodeCodePoint,
|
||||
replaceCodePoint,
|
||||
fromCodePoint,
|
||||
} from "./decode-codepoint.js";
|
78
node_modules/entities/src/encode.spec.ts
generated
vendored
Normal file
78
node_modules/entities/src/encode.spec.ts
generated
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import * as entities from "./index.js";
|
||||
|
||||
describe("Encode->decode test", () => {
|
||||
const testcases = [
|
||||
{
|
||||
input: "asdf & ÿ ü '",
|
||||
xml: "asdf & ÿ ü '",
|
||||
html: "asdf & ÿ ü '",
|
||||
},
|
||||
{
|
||||
input: "&",
|
||||
xml: "&#38;",
|
||||
html: "&#38;",
|
||||
},
|
||||
];
|
||||
|
||||
for (const { input, xml, html } of testcases) {
|
||||
const encodedXML = entities.encodeXML(input);
|
||||
it(`should XML encode ${input}`, () => expect(encodedXML).toBe(xml));
|
||||
it(`should default to XML encode ${input}`, () =>
|
||||
expect(entities.encode(input)).toBe(xml));
|
||||
it(`should XML decode ${encodedXML}`, () =>
|
||||
expect(entities.decodeXML(encodedXML)).toBe(input));
|
||||
it(`should default to XML encode ${encodedXML}`, () =>
|
||||
expect(entities.decode(encodedXML)).toBe(input));
|
||||
it(`should default strict to XML encode ${encodedXML}`, () =>
|
||||
expect(entities.decodeStrict(encodedXML)).toBe(input));
|
||||
|
||||
const encodedHTML5 = entities.encodeHTML5(input);
|
||||
it(`should HTML5 encode ${input}`, () =>
|
||||
expect(encodedHTML5).toBe(html));
|
||||
it(`should HTML5 decode ${encodedHTML5}`, () =>
|
||||
expect(entities.decodeHTML(encodedHTML5)).toBe(input));
|
||||
it("should encode emojis", () =>
|
||||
expect(entities.encodeHTML5("😄🍾🥳💥😇")).toBe(
|
||||
"😄🍾🥳💥😇",
|
||||
));
|
||||
}
|
||||
|
||||
it("should encode data URIs (issue #16)", () => {
|
||||
const data =
|
||||
"";
|
||||
expect(entities.decode(entities.encode(data))).toBe(data);
|
||||
});
|
||||
|
||||
it("should HTML encode all ASCII characters", () => {
|
||||
for (let index = 0; index < 128; index++) {
|
||||
const char = String.fromCharCode(index);
|
||||
const encoded = entities.encodeHTML(char);
|
||||
const decoded = entities.decodeHTML(encoded);
|
||||
expect(decoded).toBe(char);
|
||||
}
|
||||
});
|
||||
|
||||
it("should encode trailing parts of entities", () =>
|
||||
expect(entities.encodeHTML("\uD835")).toBe("�"));
|
||||
|
||||
it("should encode surrogate pair with first surrogate equivalent of entity, without corresponding entity", () =>
|
||||
expect(entities.encodeHTML("\u{1D4A4}")).toBe("𝒤"));
|
||||
});
|
||||
|
||||
describe("encodeNonAsciiHTML", () => {
|
||||
it("should encode all non-ASCII characters", () =>
|
||||
expect(entities.encodeNonAsciiHTML("<test> #123! übermaßen")).toBe(
|
||||
"<test> #123! übermaßen",
|
||||
));
|
||||
|
||||
it("should encode emojis", () =>
|
||||
expect(entities.encodeNonAsciiHTML("😄🍾🥳💥😇")).toBe(
|
||||
"😄🍾🥳💥😇",
|
||||
));
|
||||
|
||||
it("should encode chars above surrogates", () =>
|
||||
expect(entities.encodeNonAsciiHTML("♒️♓️♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️")).toBe(
|
||||
"♒️♓️♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️",
|
||||
));
|
||||
});
|
77
node_modules/entities/src/encode.ts
generated
vendored
Normal file
77
node_modules/entities/src/encode.ts
generated
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
import { htmlTrie } from "./generated/encode-html.js";
|
||||
import { xmlReplacer, getCodePoint } from "./escape.js";
|
||||
|
||||
const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
|
||||
|
||||
/**
|
||||
* Encodes all characters in the input using HTML entities. This includes
|
||||
* characters that are valid ASCII characters in HTML documents, such as `#`.
|
||||
*
|
||||
* To get a more compact output, consider using the `encodeNonAsciiHTML`
|
||||
* function, which will only encode characters that are not valid in HTML
|
||||
* documents, as well as non-ASCII characters.
|
||||
*
|
||||
* If a character has no equivalent entity, a numeric hexadecimal reference
|
||||
* (eg. `ü`) will be used.
|
||||
*/
|
||||
export function encodeHTML(input: string): string {
|
||||
return encodeHTMLTrieRe(htmlReplacer, input);
|
||||
}
|
||||
/**
|
||||
* Encodes all non-ASCII characters, as well as characters not valid in HTML
|
||||
* documents using HTML entities. This function will not encode characters that
|
||||
* are valid in HTML documents, such as `#`.
|
||||
*
|
||||
* If a character has no equivalent entity, a numeric hexadecimal reference
|
||||
* (eg. `ü`) will be used.
|
||||
*/
|
||||
export function encodeNonAsciiHTML(input: string): string {
|
||||
return encodeHTMLTrieRe(xmlReplacer, input);
|
||||
}
|
||||
|
||||
function encodeHTMLTrieRe(regExp: RegExp, input: string): string {
|
||||
let returnValue = "";
|
||||
let lastIndex = 0;
|
||||
let match;
|
||||
|
||||
while ((match = regExp.exec(input)) !== null) {
|
||||
const { index } = match;
|
||||
returnValue += input.substring(lastIndex, index);
|
||||
const char = input.charCodeAt(index);
|
||||
let next = htmlTrie.get(char);
|
||||
|
||||
if (typeof next === "object") {
|
||||
// We are in a branch. Try to match the next char.
|
||||
if (index + 1 < input.length) {
|
||||
const nextChar = input.charCodeAt(index + 1);
|
||||
const value =
|
||||
typeof next.n === "number"
|
||||
? next.n === nextChar
|
||||
? next.o
|
||||
: undefined
|
||||
: next.n.get(nextChar);
|
||||
|
||||
if (value !== undefined) {
|
||||
returnValue += value;
|
||||
lastIndex = regExp.lastIndex += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
next = next.v;
|
||||
}
|
||||
|
||||
// We might have a tree node without a value; skip and use a numeric entity.
|
||||
if (next === undefined) {
|
||||
const cp = getCodePoint(input, index);
|
||||
returnValue += `&#x${cp.toString(16)};`;
|
||||
// Increase by 1 if we have a surrogate pair
|
||||
lastIndex = regExp.lastIndex += Number(cp !== char);
|
||||
} else {
|
||||
returnValue += next;
|
||||
lastIndex = index + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return returnValue + input.substr(lastIndex);
|
||||
}
|
14
node_modules/entities/src/escape.spec.ts
generated
vendored
Normal file
14
node_modules/entities/src/escape.spec.ts
generated
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import * as entities from "./index.js";
|
||||
|
||||
describe("escape HTML", () => {
|
||||
it("should escape HTML attribute values", () =>
|
||||
expect(entities.escapeAttribute('<a " attr > & value \u00A0!')).toBe(
|
||||
"<a " attr > & value !",
|
||||
));
|
||||
|
||||
it("should escape HTML text", () =>
|
||||
expect(entities.escapeText('<a " text > & value \u00A0!')).toBe(
|
||||
'<a " text > & value !',
|
||||
));
|
||||
});
|
148
node_modules/entities/src/escape.ts
generated
vendored
Normal file
148
node_modules/entities/src/escape.ts
generated
vendored
Normal file
@@ -0,0 +1,148 @@
|
||||
export const xmlReplacer: RegExp = /["$&'<>\u0080-\uFFFF]/g;
|
||||
|
||||
const xmlCodeMap = new Map([
|
||||
[34, """],
|
||||
[38, "&"],
|
||||
[39, "'"],
|
||||
[60, "<"],
|
||||
[62, ">"],
|
||||
]);
|
||||
|
||||
// For compatibility with node < 4, we wrap `codePointAt`
|
||||
export const getCodePoint: (c: string, index: number) => number =
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
String.prototype.codePointAt == null
|
||||
? (c: string, index: number): number =>
|
||||
(c.charCodeAt(index) & 0xfc_00) === 0xd8_00
|
||||
? (c.charCodeAt(index) - 0xd8_00) * 0x4_00 +
|
||||
c.charCodeAt(index + 1) -
|
||||
0xdc_00 +
|
||||
0x1_00_00
|
||||
: c.charCodeAt(index)
|
||||
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||||
(input: string, index: number): number => input.codePointAt(index)!;
|
||||
|
||||
/**
|
||||
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
||||
* documents using XML entities.
|
||||
*
|
||||
* If a character has no equivalent entity, a
|
||||
* numeric hexadecimal reference (eg. `ü`) will be used.
|
||||
*/
|
||||
export function encodeXML(input: string): string {
|
||||
let returnValue = "";
|
||||
let lastIndex = 0;
|
||||
let match;
|
||||
|
||||
while ((match = xmlReplacer.exec(input)) !== null) {
|
||||
const { index } = match;
|
||||
const char = input.charCodeAt(index);
|
||||
const next = xmlCodeMap.get(char);
|
||||
|
||||
if (next === undefined) {
|
||||
returnValue += `${input.substring(lastIndex, index)}&#x${getCodePoint(
|
||||
input,
|
||||
index,
|
||||
).toString(16)};`;
|
||||
// Increase by 1 if we have a surrogate pair
|
||||
lastIndex = xmlReplacer.lastIndex += Number(
|
||||
(char & 0xfc_00) === 0xd8_00,
|
||||
);
|
||||
} else {
|
||||
returnValue += input.substring(lastIndex, index) + next;
|
||||
lastIndex = index + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return returnValue + input.substr(lastIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes all non-ASCII characters, as well as characters not valid in XML
|
||||
* documents using numeric hexadecimal reference (eg. `ü`).
|
||||
*
|
||||
* Have a look at `escapeUTF8` if you want a more concise output at the expense
|
||||
* of reduced transportability.
|
||||
*
|
||||
* @param data String to escape.
|
||||
*/
|
||||
export const escape: typeof encodeXML = encodeXML;
|
||||
|
||||
/**
|
||||
* Creates a function that escapes all characters matched by the given regular
|
||||
* expression using the given map of characters to escape to their entities.
|
||||
*
|
||||
* @param regex Regular expression to match characters to escape.
|
||||
* @param map Map of characters to escape to their entities.
|
||||
*
|
||||
* @returns Function that escapes all characters matched by the given regular
|
||||
* expression using the given map of characters to escape to their entities.
|
||||
*/
|
||||
function getEscaper(
|
||||
regex: RegExp,
|
||||
map: Map<number, string>,
|
||||
): (data: string) => string {
|
||||
return function escape(data: string): string {
|
||||
let match;
|
||||
let lastIndex = 0;
|
||||
let result = "";
|
||||
|
||||
while ((match = regex.exec(data))) {
|
||||
if (lastIndex !== match.index) {
|
||||
result += data.substring(lastIndex, match.index);
|
||||
}
|
||||
|
||||
// We know that this character will be in the map.
|
||||
result += map.get(match[0].charCodeAt(0))!;
|
||||
|
||||
// Every match will be of length 1
|
||||
lastIndex = match.index + 1;
|
||||
}
|
||||
|
||||
return result + data.substring(lastIndex);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes all characters not valid in XML documents using XML entities.
|
||||
*
|
||||
* Note that the output will be character-set dependent.
|
||||
*
|
||||
* @param data String to escape.
|
||||
*/
|
||||
export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
|
||||
/["&'<>]/g,
|
||||
xmlCodeMap,
|
||||
);
|
||||
|
||||
/**
|
||||
* Encodes all characters that have to be escaped in HTML attributes,
|
||||
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
||||
*
|
||||
* @param data String to escape.
|
||||
*/
|
||||
export const escapeAttribute: (data: string) => string =
|
||||
/* #__PURE__ */ getEscaper(
|
||||
/["&\u00A0]/g,
|
||||
new Map([
|
||||
[34, """],
|
||||
[38, "&"],
|
||||
[160, " "],
|
||||
]),
|
||||
);
|
||||
|
||||
/**
|
||||
* Encodes all characters that have to be escaped in HTML text,
|
||||
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
||||
*
|
||||
* @param data String to escape.
|
||||
*/
|
||||
export const escapeText: (data: string) => string = /* #__PURE__ */ getEscaper(
|
||||
/[&<>\u00A0]/g,
|
||||
new Map([
|
||||
[38, "&"],
|
||||
[60, "<"],
|
||||
[62, ">"],
|
||||
[160, " "],
|
||||
]),
|
||||
);
|
10
node_modules/entities/src/generated/.eslintrc.json
generated
vendored
Normal file
10
node_modules/entities/src/generated/.eslintrc.json
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"rules": {
|
||||
"multiline-comment-style": 0,
|
||||
"capitalized-comments": 0,
|
||||
"unicorn/escape-case": 0,
|
||||
"unicorn/no-hex-escape": 0,
|
||||
"unicorn/numeric-separators-style": 0,
|
||||
"unicorn/prefer-spread": 0
|
||||
}
|
||||
}
|
8
node_modules/entities/src/generated/decode-data-html.ts
generated
vendored
Normal file
8
node_modules/entities/src/generated/decode-data-html.ts
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
8
node_modules/entities/src/generated/decode-data-xml.ts
generated
vendored
Normal file
8
node_modules/entities/src/generated/decode-data-xml.ts
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
// Generated using scripts/write-decode-map.ts
|
||||
|
||||
export const xmlDecodeTree: Uint16Array = /* #__PURE__ */ new Uint16Array(
|
||||
// prettier-ignore
|
||||
/* #__PURE__ */ "\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022"
|
||||
.split("")
|
||||
.map((c) => c.charCodeAt(0)),
|
||||
);
|
17
node_modules/entities/src/generated/encode-html.ts
generated
vendored
Normal file
17
node_modules/entities/src/generated/encode-html.ts
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
125
node_modules/entities/src/index.spec.ts
generated
vendored
Normal file
125
node_modules/entities/src/index.spec.ts
generated
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
import { readFileSync } from "node:fs";
|
||||
import { describe, it, expect } from "vitest";
|
||||
import * as entities from "./index.js";
|
||||
import legacy from "../maps/legacy.json" assert { type: "json" };
|
||||
|
||||
const levels = ["xml", "entities"];
|
||||
|
||||
describe("Documents", () => {
|
||||
const levelDocuments = levels
|
||||
.map((name) => new URL(`../maps/${name}.json`, import.meta.url))
|
||||
.map((url) => JSON.parse(readFileSync(url, "utf8")))
|
||||
.map((document, index) => [index, document]);
|
||||
|
||||
for (const [level, document] of levelDocuments) {
|
||||
describe("Decode", () => {
|
||||
it(levels[level], () => {
|
||||
for (const entity of Object.keys(document)) {
|
||||
for (let l = level; l < levels.length; l++) {
|
||||
expect(entities.decode(`&${entity};`, l)).toBe(
|
||||
document[entity],
|
||||
);
|
||||
expect(
|
||||
entities.decode(`&${entity};`, { level: l }),
|
||||
).toBe(document[entity]);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("Decode strict", () => {
|
||||
it(levels[level], () => {
|
||||
for (const entity of Object.keys(document)) {
|
||||
for (let l = level; l < levels.length; l++) {
|
||||
expect(entities.decodeStrict(`&${entity};`, l)).toBe(
|
||||
document[entity],
|
||||
);
|
||||
expect(
|
||||
entities.decode(`&${entity};`, {
|
||||
level: l,
|
||||
mode: entities.DecodingMode.Strict,
|
||||
}),
|
||||
).toBe(document[entity]);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("Encode", () => {
|
||||
it(levels[level], () => {
|
||||
for (const entity of Object.keys(document)) {
|
||||
for (let l = level; l < levels.length; l++) {
|
||||
const encoded = entities.encode(document[entity], l);
|
||||
const decoded = entities.decode(encoded, l);
|
||||
expect(decoded).toBe(document[entity]);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("should only encode non-ASCII values if asked", () =>
|
||||
expect(
|
||||
entities.encode("Great #'s of 🎁", {
|
||||
level,
|
||||
mode: entities.EncodingMode.ASCII,
|
||||
}),
|
||||
).toBe("Great #'s of 🎁"));
|
||||
});
|
||||
}
|
||||
|
||||
describe("Legacy", () => {
|
||||
const legacyMap: Record<string, string> = legacy;
|
||||
it("should decode", () => {
|
||||
for (const entity of Object.keys(legacyMap)) {
|
||||
expect(entities.decodeHTML(`&${entity}`)).toBe(
|
||||
legacyMap[entity],
|
||||
);
|
||||
expect(
|
||||
entities.decodeStrict(`&${entity}`, {
|
||||
level: entities.EntityLevel.HTML,
|
||||
mode: entities.DecodingMode.Legacy,
|
||||
}),
|
||||
).toBe(legacyMap[entity]);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
const astral = [
|
||||
["1d306", "\uD834\uDF06"],
|
||||
["1d11e", "\uD834\uDD1E"],
|
||||
];
|
||||
|
||||
const astralSpecial = [
|
||||
["80", "\u20AC"],
|
||||
["110000", "\uFFFD"],
|
||||
];
|
||||
|
||||
describe("Astral entities", () => {
|
||||
for (const [c, value] of astral) {
|
||||
it(`should decode ${value}`, () =>
|
||||
expect(entities.decode(`&#x${c};`)).toBe(value));
|
||||
|
||||
it(`should encode ${value}`, () =>
|
||||
expect(entities.encode(value)).toBe(`&#x${c};`));
|
||||
|
||||
it(`should escape ${value}`, () =>
|
||||
expect(entities.escape(value)).toBe(`&#x${c};`));
|
||||
}
|
||||
|
||||
for (const [c, value] of astralSpecial) {
|
||||
it(`should decode special \\u${c}`, () =>
|
||||
expect(entities.decode(`&#x${c};`)).toBe(value));
|
||||
}
|
||||
});
|
||||
|
||||
describe("Escape", () => {
|
||||
it("should always decode ASCII chars", () => {
|
||||
for (let index = 0; index < 0x7f; index++) {
|
||||
const c = String.fromCharCode(index);
|
||||
expect(entities.decodeXML(entities.escape(c))).toBe(c);
|
||||
}
|
||||
});
|
||||
|
||||
it("should keep UTF8 characters", () =>
|
||||
expect(entities.escapeUTF8('ß < "ü"')).toBe(`ß < "ü"`));
|
||||
});
|
188
node_modules/entities/src/index.ts
generated
vendored
Normal file
188
node_modules/entities/src/index.ts
generated
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
import { decodeXML, decodeHTML, DecodingMode } from "./decode.js";
|
||||
import { encodeHTML, encodeNonAsciiHTML } from "./encode.js";
|
||||
import {
|
||||
encodeXML,
|
||||
escapeUTF8,
|
||||
escapeAttribute,
|
||||
escapeText,
|
||||
} from "./escape.js";
|
||||
|
||||
/** The level of entities to support. */
|
||||
export enum EntityLevel {
|
||||
/** Support only XML entities. */
|
||||
XML = 0,
|
||||
/** Support HTML entities, which are a superset of XML entities. */
|
||||
HTML = 1,
|
||||
}
|
||||
|
||||
export enum EncodingMode {
|
||||
/**
|
||||
* The output is UTF-8 encoded. Only characters that need escaping within
|
||||
* XML will be escaped.
|
||||
*/
|
||||
UTF8,
|
||||
/**
|
||||
* The output consists only of ASCII characters. Characters that need
|
||||
* escaping within HTML, and characters that aren't ASCII characters will
|
||||
* be escaped.
|
||||
*/
|
||||
ASCII,
|
||||
/**
|
||||
* Encode all characters that have an equivalent entity, as well as all
|
||||
* characters that are not ASCII characters.
|
||||
*/
|
||||
Extensive,
|
||||
/**
|
||||
* Encode all characters that have to be escaped in HTML attributes,
|
||||
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
||||
*/
|
||||
Attribute,
|
||||
/**
|
||||
* Encode all characters that have to be escaped in HTML text,
|
||||
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
|
||||
*/
|
||||
Text,
|
||||
}
|
||||
|
||||
export interface DecodingOptions {
|
||||
/**
|
||||
* The level of entities to support.
|
||||
* @default {@link EntityLevel.XML}
|
||||
*/
|
||||
level?: EntityLevel;
|
||||
/**
|
||||
* Decoding mode. If `Legacy`, will support legacy entities not terminated
|
||||
* with a semicolon (`;`).
|
||||
*
|
||||
* Always `Strict` for XML. For HTML, set this to `true` if you are parsing
|
||||
* an attribute value.
|
||||
*
|
||||
* The deprecated `decodeStrict` function defaults this to `Strict`.
|
||||
*
|
||||
* @default {@link DecodingMode.Legacy}
|
||||
*/
|
||||
mode?: DecodingMode | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a string with entities.
|
||||
*
|
||||
* @param input String to decode.
|
||||
* @param options Decoding options.
|
||||
*/
|
||||
export function decode(
|
||||
input: string,
|
||||
options: DecodingOptions | EntityLevel = EntityLevel.XML,
|
||||
): string {
|
||||
const level = typeof options === "number" ? options : options.level;
|
||||
|
||||
if (level === EntityLevel.HTML) {
|
||||
const mode = typeof options === "object" ? options.mode : undefined;
|
||||
return decodeHTML(input, mode);
|
||||
}
|
||||
|
||||
return decodeXML(input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a string with entities. Does not allow missing trailing semicolons for entities.
|
||||
*
|
||||
* @param input String to decode.
|
||||
* @param options Decoding options.
|
||||
* @deprecated Use `decode` with the `mode` set to `Strict`.
|
||||
*/
|
||||
export function decodeStrict(
|
||||
input: string,
|
||||
options: DecodingOptions | EntityLevel = EntityLevel.XML,
|
||||
): string {
|
||||
const normalizedOptions =
|
||||
typeof options === "number" ? { level: options } : options;
|
||||
normalizedOptions.mode ??= DecodingMode.Strict;
|
||||
|
||||
return decode(input, normalizedOptions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for `encode`.
|
||||
*/
|
||||
export interface EncodingOptions {
|
||||
/**
|
||||
* The level of entities to support.
|
||||
* @default {@link EntityLevel.XML}
|
||||
*/
|
||||
level?: EntityLevel;
|
||||
/**
|
||||
* Output format.
|
||||
* @default {@link EncodingMode.Extensive}
|
||||
*/
|
||||
mode?: EncodingMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string with entities.
|
||||
*
|
||||
* @param input String to encode.
|
||||
* @param options Encoding options.
|
||||
*/
|
||||
export function encode(
|
||||
input: string,
|
||||
options: EncodingOptions | EntityLevel = EntityLevel.XML,
|
||||
): string {
|
||||
const { mode = EncodingMode.Extensive, level = EntityLevel.XML } =
|
||||
typeof options === "number" ? { level: options } : options;
|
||||
|
||||
switch (mode) {
|
||||
case EncodingMode.UTF8: {
|
||||
return escapeUTF8(input);
|
||||
}
|
||||
case EncodingMode.Attribute: {
|
||||
return escapeAttribute(input);
|
||||
}
|
||||
case EncodingMode.Text: {
|
||||
return escapeText(input);
|
||||
}
|
||||
case EncodingMode.ASCII: {
|
||||
return level === EntityLevel.HTML
|
||||
? encodeNonAsciiHTML(input)
|
||||
: encodeXML(input);
|
||||
}
|
||||
// eslint-disable-next-line unicorn/no-useless-switch-case
|
||||
case EncodingMode.Extensive:
|
||||
default: {
|
||||
return level === EntityLevel.HTML
|
||||
? encodeHTML(input)
|
||||
: encodeXML(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export {
|
||||
encodeXML,
|
||||
escape,
|
||||
escapeUTF8,
|
||||
escapeAttribute,
|
||||
escapeText,
|
||||
} from "./escape.js";
|
||||
|
||||
export {
|
||||
encodeHTML,
|
||||
encodeNonAsciiHTML,
|
||||
// Legacy aliases (deprecated)
|
||||
encodeHTML as encodeHTML4,
|
||||
encodeHTML as encodeHTML5,
|
||||
} from "./encode.js";
|
||||
|
||||
export {
|
||||
EntityDecoder,
|
||||
DecodingMode,
|
||||
decodeXML,
|
||||
decodeHTML,
|
||||
decodeHTMLStrict,
|
||||
decodeHTMLAttribute,
|
||||
// Legacy aliases (deprecated)
|
||||
decodeHTML as decodeHTML4,
|
||||
decodeHTML as decodeHTML5,
|
||||
decodeHTMLStrict as decodeHTML4Strict,
|
||||
decodeHTMLStrict as decodeHTML5Strict,
|
||||
decodeXML as decodeXMLStrict,
|
||||
} from "./decode.js";
|
Reference in New Issue
Block a user