Refactor routing in App component to enhance navigation and improve error handling by integrating dynamic routes and updating the NotFound route.

This commit is contained in:
becarta
2025-05-23 12:43:00 +02:00
parent f40db0f5c9
commit a544759a3b
11127 changed files with 1647032 additions and 0 deletions

187
node_modules/regex/src/atomic.js generated vendored Normal file
View File

@@ -0,0 +1,187 @@
import {emulationGroupMarker} from './subclass.js';
import {noncapturingDelim, spliceStr} from './utils-internals.js';
import {Context, replaceUnescaped} from 'regex-utilities';
const atomicPluginToken = new RegExp(String.raw`(?<noncapturingStart>${noncapturingDelim})|(?<capturingStart>\((?:\?<[^>]+>)?)|\\?.`, 'gsu');
/**
Apply transformations for atomic groups: `(?>…)`.
@param {string} expression
@param {import('./regex.js').PluginData} [data]
@returns {string}
*/
function atomic(expression, data) {
if (!/\(\?>/.test(expression)) {
return expression;
}
const aGDelim = '(?>';
const emulatedAGDelim = `(?:(?=(${data?.useEmulationGroups ? emulationGroupMarker : ''}`;
const captureNumMap = [0];
let numCapturesBeforeAG = 0;
let numAGs = 0;
let aGPos = NaN;
let hasProcessedAG;
do {
hasProcessedAG = false;
let numCharClassesOpen = 0;
let numGroupsOpenInAG = 0;
let inAG = false;
let match;
atomicPluginToken.lastIndex = Number.isNaN(aGPos) ? 0 : aGPos + emulatedAGDelim.length;
while (match = atomicPluginToken.exec(expression)) {
const {0: m, index, groups: {capturingStart, noncapturingStart}} = match;
if (m === '[') {
numCharClassesOpen++;
} else if (!numCharClassesOpen) {
if (m === aGDelim && !inAG) {
aGPos = index;
inAG = true;
} else if (inAG && noncapturingStart) {
numGroupsOpenInAG++;
} else if (capturingStart) {
if (inAG) {
numGroupsOpenInAG++;
} else {
numCapturesBeforeAG++;
captureNumMap.push(numCapturesBeforeAG + numAGs);
}
} else if (m === ')' && inAG) {
if (!numGroupsOpenInAG) {
numAGs++;
// Replace `expression` and use `<$$N>` as a temporary wrapper for the backref so it
// can avoid backref renumbering afterward. Need to wrap the whole substitution
// (including the lookahead and following backref) in a noncapturing group to handle
// following quantifiers and literal digits
expression = `${expression.slice(0, aGPos)}${emulatedAGDelim}${
expression.slice(aGPos + aGDelim.length, index)
}))<$$${numAGs + numCapturesBeforeAG}>)${expression.slice(index + 1)}`;
hasProcessedAG = true;
break;
}
numGroupsOpenInAG--;
}
} else if (m === ']') {
numCharClassesOpen--;
}
}
// Start over from the beginning of the last atomic group's contents, in case the processed group
// contains additional atomic groups
} while (hasProcessedAG);
// Second pass to adjust numbered backrefs
expression = replaceUnescaped(
expression,
String.raw`\\(?<backrefNum>[1-9]\d*)|<\$\$(?<wrappedBackrefNum>\d+)>`,
({0: m, groups: {backrefNum, wrappedBackrefNum}}) => {
if (backrefNum) {
const bNum = +backrefNum;
if (bNum > captureNumMap.length - 1) {
throw new Error(`Backref "${m}" greater than number of captures`);
}
return `\\${captureNumMap[bNum]}`;
}
return `\\${wrappedBackrefNum}`;
},
Context.DEFAULT
);
return expression;
}
const baseQuantifier = String.raw`(?:[?*+]|\{\d+(?:,\d*)?\})`;
// Complete tokenizer for base syntax; doesn't (need to) know about character-class-only syntax
const possessivePluginToken = new RegExp(String.raw`
\\(?: \d+
| c[A-Za-z]
| [gk]<[^>]+>
| [pPu]\{[^\}]+\}
| u[A-Fa-f\d]{4}
| x[A-Fa-f\d]{2}
)
| \((?: \? (?: [:=!>]
| <(?:[=!]|[^>]+>)
| [A-Za-z\-]+:
| \(DEFINE\)
))?
| (?<qBase>${baseQuantifier})(?<qMod>[?+]?)(?<invalidQ>[?*+\{]?)
| \\?.
`.replace(/\s+/g, ''), 'gsu');
/**
Transform posessive quantifiers into atomic groups. The posessessive quantifiers are:
`?+`, `*+`, `++`, `{N}+`, `{N,}+`, `{N,N}+`.
This follows Java, PCRE, Perl, and Python.
Possessive quantifiers in Oniguruma and Onigmo are only: `?+`, `*+`, `++`.
@param {string} expression
@returns {string}
*/
function possessive(expression) {
if (!(new RegExp(`${baseQuantifier}\\+`).test(expression))) {
return expression;
}
const openGroupIndices = [];
let lastGroupIndex = null;
let lastCharClassIndex = null;
let lastToken = '';
let numCharClassesOpen = 0;
let match;
possessivePluginToken.lastIndex = 0;
while (match = possessivePluginToken.exec(expression)) {
const {0: m, index, groups: {qBase, qMod, invalidQ}} = match;
if (m === '[') {
if (!numCharClassesOpen) {
lastCharClassIndex = index;
}
numCharClassesOpen++;
} else if (m === ']') {
if (numCharClassesOpen) {
numCharClassesOpen--;
// Unmatched `]`
} else {
lastCharClassIndex = null;
}
} else if (!numCharClassesOpen) {
if (qMod === '+' && lastToken && !lastToken.startsWith('(')) {
// Invalid following quantifier would become valid via the wrapping group
if (invalidQ) {
throw new Error(`Invalid quantifier "${m}"`);
}
let charsAdded = -1; // -1 for removed trailing `+`
// Possessivizing fixed repetition quantifiers like `{2}` does't change their behavior, so
// avoid doing so (convert them to greedy)
if (/^\{\d+\}$/.test(qBase)) {
expression = spliceStr(expression, index + qBase.length, qMod, '');
} else {
if (lastToken === ')' || lastToken === ']') {
const nodeIndex = lastToken === ')' ? lastGroupIndex : lastCharClassIndex;
// Unmatched `)` would break out of the wrapping group and mess with handling.
// Unmatched `]` wouldn't be a problem, but it's unnecessary to have dedicated support
// for unescaped `]++` since this won't work with flag u or v anyway
if (nodeIndex === null) {
throw new Error(`Invalid unmatched "${lastToken}"`);
}
expression = `${expression.slice(0, nodeIndex)}(?>${expression.slice(nodeIndex, index)}${qBase})${expression.slice(index + m.length)}`;
} else {
expression = `${expression.slice(0, index - lastToken.length)}(?>${lastToken}${qBase})${expression.slice(index + m.length)}`;
}
charsAdded += 4; // `(?>)`
}
possessivePluginToken.lastIndex += charsAdded;
} else if (m[0] === '(') {
openGroupIndices.push(index);
} else if (m === ')') {
lastGroupIndex = openGroupIndices.length ? openGroupIndices.pop() : null;
}
}
lastToken = m;
}
return expression;
}
export {
atomic,
possessive,
};

59
node_modules/regex/src/backcompat.js generated vendored Normal file
View File

@@ -0,0 +1,59 @@
import {doublePunctuatorChars} from './utils.js';
const incompatibleEscapeChars = '&!#%,:;<=>@`~';
const token = new RegExp(String.raw`
\[\^?-?
| --?\]
| (?<dp>[${doublePunctuatorChars}])\k<dp>
| --
| \\(?<vOnlyEscape>[${incompatibleEscapeChars}])
| \\[pPu]\{[^}]+\}
| \\?.
`.replace(/\s+/g, ''), 'gsu');
/**
Applies flag v rules when using flag u, for forward compatibility.
Assumes flag u and doesn't worry about syntax errors that are caught by it.
@param {string} expression
@returns {string}
*/
function backcompatPlugin(expression) {
const unescapedLiteralHyphenMsg = 'Invalid unescaped "-" in character class';
let inCharClass = false;
let result = '';
for (const {0: m, groups: {dp, vOnlyEscape}} of expression.matchAll(token)) {
if (m[0] === '[') {
if (inCharClass) {
throw new Error('Invalid nested character class when flag v not supported; possibly from interpolation');
}
if (m.endsWith('-')) {
throw new Error(unescapedLiteralHyphenMsg);
}
inCharClass = true;
} else if (m.endsWith(']')) {
if (m[0] === '-') {
throw new Error(unescapedLiteralHyphenMsg);
}
inCharClass = false;
} else if (inCharClass) {
if (m === '&&' || m === '--') {
throw new Error(`Invalid set operator "${m}" when flag v not supported`);
} else if (dp) {
throw new Error(`Invalid double punctuator "${m}", reserved by flag v`);
} else if ('(){}/|'.includes(m)) {
throw new Error(`Invalid unescaped "${m}" in character class`);
} else if (vOnlyEscape) {
// Remove the escaping backslash to emulate flag v rules, since this character is allowed
// to be escaped within character classes with flag v but not with flag u
result += vOnlyEscape;
continue;
}
}
result += m;
}
return result;
}
export {
backcompatPlugin,
};

46
node_modules/regex/src/flag-n.js generated vendored Normal file
View File

@@ -0,0 +1,46 @@
import {getEndContextForIncompleteExpression, RegexContext} from './utils.js';
import {noncapturingDelim} from './utils-internals.js';
const token = new RegExp(String.raw`
${noncapturingDelim}
| \(\?<
| (?<backrefNum>\\[1-9]\d*)
| \\?.
`.replace(/\s+/g, ''), 'gsu');
/**
Apply transformations for flag n (named capture only).
Preprocessors are applied to the outer regex and interpolated patterns, but not interpolated
regexes or strings.
@type {import('./utils.js').Preprocessor}
*/
function flagNPreprocessor(value, runningContext) {
value = String(value);
let expression = '';
let transformed = '';
for (const {0: m, groups: {backrefNum}} of value.matchAll(token)) {
expression += m;
runningContext = getEndContextForIncompleteExpression(expression, runningContext);
const {regexContext} = runningContext;
if (regexContext === RegexContext.DEFAULT) {
if (m === '(') {
transformed += '(?:';
} else if (backrefNum) {
throw new Error(`Invalid decimal escape "${m}" with implicit flag n; replace with named backreference`);
} else {
transformed += m;
}
} else {
transformed += m;
}
}
return {
transformed,
runningContext,
};
}
export {
flagNPreprocessor,
};

195
node_modules/regex/src/flag-x.js generated vendored Normal file
View File

@@ -0,0 +1,195 @@
import {emulationGroupMarker} from './subclass.js';
import {CharClassContext, doublePunctuatorChars, getEndContextForIncompleteExpression, RegexContext, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
import {noncapturingDelim} from './utils-internals.js';
import {Context, replaceUnescaped} from 'regex-utilities';
const ws = /^\s$/;
const escapedWsOrHash = /^\\[\s#]$/;
const charClassWs = /^[ \t]$/;
const escapedCharClassWs = /^\\[ \t]$/;
const token = new RegExp(String.raw`
\\(?: [gk]<
| [pPu]\{
| c[A-Za-z]
| u[A-Fa-f\d]{4}
| x[A-Fa-f\d]{2}
| 0\d+
)
| \[\^
| ${noncapturingDelim}
| \(\?<
| (?<dp>[${doublePunctuatorChars}])\k<dp>
| --
| \\?.
`.replace(/\s+/g, ''), 'gsu');
/**
Apply transformations for flag x (insignificant whitespace and line comments).
Preprocessors are applied to the outer regex and interpolated patterns, but not interpolated
regexes or strings.
@type {import('./utils.js').Preprocessor}
*/
function flagXPreprocessor(value, runningContext, options) {
value = String(value);
let ignoringWs = false;
let ignoringCharClassWs = false;
let ignoringComment = false;
let expression = '';
let transformed = '';
let lastSignificantToken = '';
let lastSignificantCharClassContext = '';
let separatorNeeded = false;
const update = (str, options) => {
const opts = {
prefix: true,
postfix: false,
...options,
};
str = (separatorNeeded && opts.prefix ? '(?:)' : '') + str + (opts.postfix ? '(?:)' : '');
separatorNeeded = false;
return str;
};
for (const {0: m, index} of value.matchAll(token)) {
if (ignoringComment) {
if (m === '\n') {
ignoringComment = false;
separatorNeeded = true;
}
continue;
}
if (ignoringWs) {
if (ws.test(m)) {
continue;
}
ignoringWs = false;
separatorNeeded = true;
} else if (ignoringCharClassWs) {
if (charClassWs.test(m)) {
continue;
}
ignoringCharClassWs = false;
}
expression += m;
runningContext = getEndContextForIncompleteExpression(expression, runningContext);
const {regexContext, charClassContext} = runningContext;
if (
// `--` is matched in one step, so boundary chars aren't `-` unless separated by whitespace
m === '-' &&
regexContext === RegexContext.CHAR_CLASS &&
lastSignificantCharClassContext === CharClassContext.RANGE &&
(options.flags.includes('v') || options.unicodeSetsPlugin)
) {
// Need to handle this here since the main regex-parsing code would think the hyphen forms
// part of a subtraction operator since we've removed preceding whitespace
throw new Error('Invalid unescaped hyphen as the end value for a range');
}
if (
// `??` is matched in one step by the double punctuator token
(regexContext === RegexContext.DEFAULT && /^(?:[?*+]|\?\?)$/.test(m)) ||
(regexContext === RegexContext.INTERVAL_QUANTIFIER && m === '{')
) {
// Skip the separator prefix and connect the quantifier to the previous token. This also
// allows whitespace between a quantifier and the `?` that makes it lazy. Add a postfix
// separator if `m` is `?` and we're following token `(`, to sandbox the `?` from following
// tokens (since `?` can be a group-type marker). Ex: `( ?:)` becomes `(?(?:):)` and throws.
// The loop we're in matches valid group openings in one step, so we won't arrive here if
// matching e.g. `(?:`. Flag n could prevent the need for the postfix since bare `(` is
// converted to `(?:`, but flag x handling always comes first and flag n can be turned off
transformed += update(m, {prefix: false, postfix: lastSignificantToken === '(' && m === '?'});
} else if (regexContext === RegexContext.DEFAULT) {
if (ws.test(m)) {
ignoringWs = true;
} else if (m.startsWith('#')) {
ignoringComment = true;
} else if (escapedWsOrHash.test(m)) {
transformed += update(m[1], {prefix: false});
} else {
transformed += update(m);
}
} else if (regexContext === RegexContext.CHAR_CLASS && m !== '[' && m !== '[^') {
if (
charClassWs.test(m) &&
( charClassContext === CharClassContext.DEFAULT ||
charClassContext === CharClassContext.ENCLOSED_Q ||
charClassContext === CharClassContext.RANGE
)
) {
ignoringCharClassWs = true;
} else if (charClassContext === CharClassContext.INVALID_INCOMPLETE_TOKEN) {
// Need to handle this here since the main regex-parsing code wouldn't know where the token
// ends if we removed whitespace after an incomplete token that is followed by something
// that completes the token
throw new Error(`Invalid incomplete token in character class: "${m}"`);
} else if (
escapedCharClassWs.test(m) &&
(charClassContext === CharClassContext.DEFAULT || charClassContext === CharClassContext.ENCLOSED_Q)
) {
transformed += update(m[1], {prefix: false});
} else if (charClassContext === CharClassContext.DEFAULT) {
const nextChar = value[index + 1] ?? '';
let updated = sandboxUnsafeNulls(m);
// Avoid escaping lone double punctuators unless required, since some of them are not
// allowed to be escaped with flag u (the `unicodeSetsPlugin` already unescapes them when
// using flag u, but it can be set to `null` via an option)
if (charClassWs.test(nextChar) || m === '^') {
updated = sandboxLoneDoublePunctuatorChar(updated);
}
transformed += update(updated);
} else {
transformed += update(m);
}
} else {
transformed += update(m);
}
if (!(ignoringWs || ignoringCharClassWs || ignoringComment)) {
lastSignificantToken = m;
lastSignificantCharClassContext = charClassContext;
}
}
return {
transformed,
runningContext,
};
}
/**
Remove `(?:)` token separators (most likely added by flag x) in cases where it's safe to do so.
@param {string} expression
@returns {string}
*/
function clean(expression) {
const sep = String.raw`\(\?:\)`;
// No need for repeated separators
expression = replaceUnescaped(expression, `(?:${sep}){2,}`, '(?:)', Context.DEFAULT);
// No need for separators at:
// - The beginning, if not followed by a quantifier.
// - The end.
// - Outside of character classes:
// - If followed by one of `)|.[$\\`, or `(` if that's not followed by `DEFINE)`.
// - Technically we shouldn't remove `(?:)` if preceded by `(?(DEFINE` and followed by `)`,
// but in this case flag x injects a sandboxing `(?:)` after the preceding invalid `(?`,
// so we already get an error from that.
// - If preceded by one of `()|.]^>`, `\\[bBdDfnrsStvwW]`, `(?:`, or a lookaround opening.
// - So long as the separator is not followed by a quantifier.
// - And, not followed by an emulation group marker.
// Examples of things that are not safe to remove `(?:)` at the boundaries of:
// - Anywhere: Letters, numbers, or any of `-=_,<?*+{}`.
// - If followed by any of `:!>`.
// - If preceded by any of `\\[cgkpPux]`.
// - Anything inside character classes.
const marker = emulationGroupMarker.replace(/\$/g, '\\$');
expression = replaceUnescaped(
expression,
String.raw`(?:${sep}(?=[)|.[$\\]|\((?!DEFINE)|$)|(?<=[()|.\]^>]|\\[bBdDfnrsStvwW]|\(\?(?:[:=!]|<[=!])|^)${sep}(?![?*+{]))(?!${marker})`,
'',
Context.DEFAULT
);
return expression;
}
export {
clean,
flagXPreprocessor,
};

2
node_modules/regex/src/internals.js generated vendored Normal file
View File

@@ -0,0 +1,2 @@
export {atomic, possessive} from './atomic.js';
export {emulationGroupMarker, RegExpSubclass} from './subclass.js';

45
node_modules/regex/src/pattern.js generated vendored Normal file
View File

@@ -0,0 +1,45 @@
class Pattern {
#value;
/** @param {string} value */
constructor(value) {
this.#value = value;
}
/** @returns {string} */
toString() {
return String(this.#value);
}
}
/**
Returns a value that can be interpolated into a `regex` template string without having its special
characters escaped.
Can be called as a function or template tag:
- `pattern(value)` - String or value coerced to string.
- `` pattern`…` `` - Same as ``pattern(String.raw`…`)``.
@overload
@param {string | number} value
@returns {Pattern}
@overload
@param {TemplateStringsArray} template
@param {...string} substitutions
@returns {Pattern}
*/
function pattern(first, ...substitutions) {
if (Array.isArray(first?.raw)) {
return new Pattern(
// Intersperse raw template strings and substitutions
first.raw.flatMap((raw, i) => i < first.raw.length - 1 ? [raw, substitutions[i]] : raw).join('')
);
} else if (!substitutions.length) {
return new Pattern(first === undefined ? '' : first);
}
throw new Error(`Unexpected arguments: ${JSON.stringify([first, ...substitutions])}`);
}
export {
Pattern,
pattern,
};

352
node_modules/regex/src/regex.js generated vendored Normal file
View File

@@ -0,0 +1,352 @@
import {atomic, possessive} from './atomic.js';
import {backcompatPlugin} from './backcompat.js';
import {flagNPreprocessor} from './flag-n.js';
import {clean, flagXPreprocessor} from './flag-x.js';
import {Pattern, pattern} from './pattern.js';
import {RegExpSubclass} from './subclass.js';
import {subroutines} from './subroutines.js';
import {adjustNumberedBackrefs, CharClassContext, containsCharClassUnion, countCaptures, enclosedTokenCharClassContexts, enclosedTokenRegexContexts, envSupportsFlagGroups, envSupportsFlagV, escapeV, getBreakoutChar, getEndContextForIncompleteExpression, preprocess, RegexContext, sandboxLoneCharClassCaret, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
import {Context, hasUnescaped, replaceUnescaped} from 'regex-utilities';
/**
@typedef {string | RegExp | Pattern | number} InterpolatedValue
@typedef {{
flags?: string;
useEmulationGroups?: boolean;
}} PluginData
@typedef {TemplateStringsArray | {raw: Array<string>}} RawTemplate
@typedef {{
flags?: string;
subclass?: boolean;
plugins?: Array<(expression: string, data: PluginData) => string>;
unicodeSetsPlugin?: ((expression: string, data: PluginData) => string) | null;
disable?: {
x?: boolean;
n?: boolean;
v?: boolean;
atomic?: boolean;
subroutines?: boolean;
};
force?: {
v?: boolean;
};
}} RegexTagOptions
*/
/**
@template T
@typedef RegexTag
@type {{
(template: RawTemplate, ...substitutions: ReadonlyArray<InterpolatedValue>): T;
(flags?: string): RegexTag<T>;
(options: RegexTagOptions & {subclass?: false}): RegexTag<T>;
(options: RegexTagOptions & {subclass: true}): RegexTag<RegExpSubclass>;
}}
*/
/**
Template tag for constructing a regex with extended syntax and context-aware interpolation of
regexes, strings, and patterns.
Can be called in several ways:
1. `` regex`…` `` - Regex pattern as a raw string.
2. `` regex('gi')`…` `` - To specify flags.
3. `` regex({flags: 'gi'})`…` `` - With options.
@type {RegexTag<RegExp>}
*/
const regex = (first, ...substitutions) => {
// Given a template
if (Array.isArray(first?.raw)) {
return regexFromTemplate({}, first, ...substitutions);
// Given flags
} else if ((typeof first === 'string' || first === undefined) && !substitutions.length) {
return regexFromTemplate.bind(null, {flags: first ?? ''});
// Given an options object
} else if ({}.toString.call(first) === '[object Object]' && !substitutions.length) {
return regexFromTemplate.bind(null, first);
}
throw new Error(`Unexpected arguments: ${JSON.stringify([first, ...substitutions])}`);
};
/**
@template T
@typedef RegexFromTemplate
@type {{
(options: RegexTagOptions, template: RawTemplate, ...substitutions: ReadonlyArray<InterpolatedValue>) : T;
}}
*/
/**
Returns a RegExp from a template and substitutions to fill the template holes.
@type {RegexFromTemplate<RegExp>}
*/
const regexFromTemplate = (options, template, ...substitutions) => {
const opts = getOptions(options);
const prepped = handlePreprocessors(template, substitutions, opts);
let precedingCaptures = 0;
let expression = '';
let runningContext;
// Intersperse raw template strings and substitutions
prepped.template.raw.forEach((raw, i) => {
const wrapEscapedStr = !!(prepped.template.raw[i] || prepped.template.raw[i + 1]);
// Even with flag n enabled, we might have named captures
precedingCaptures += countCaptures(raw);
// Sandbox `\0` in character classes. Not needed outside character classes because in other
// cases a following interpolated value would always be atomized
expression += sandboxUnsafeNulls(raw, Context.CHAR_CLASS);
runningContext = getEndContextForIncompleteExpression(expression, runningContext);
const {regexContext, charClassContext} = runningContext;
if (i < prepped.template.raw.length - 1) {
const substitution = prepped.substitutions[i];
expression += interpolate(substitution, opts.flags, regexContext, charClassContext, wrapEscapedStr, precedingCaptures);
if (substitution instanceof RegExp) {
precedingCaptures += countCaptures(substitution.source);
} else if (substitution instanceof Pattern) {
precedingCaptures += countCaptures(String(substitution));
}
}
});
expression = handlePlugins(expression, opts);
try {
return opts.subclass ?
new RegExpSubclass(expression, opts.flags, {useEmulationGroups: true}) :
new RegExp(expression, opts.flags);
} catch (err) {
// Improve DX by always including the generated source in the error message. Some browsers
// include it automatically, but not Firefox or Safari
const stripped = err.message.replace(/ \/.+\/[a-z]*:/, '');
err.message = `${stripped}: /${expression}/${opts.flags}`;
throw err;
}
};
/**
Returns the processed expression and flags as strings.
@param {string} expression
@param {RegexTagOptions} [options]
@returns {{expression: string; flags: string;}}
*/
function rewrite(expression = '', options) {
const opts = getOptions(options);
if (opts.subclass) {
// Don't allow including emulation group markers in output
throw new Error('Cannot use option subclass');
}
return {
expression: handlePlugins(
handlePreprocessors({raw: [expression]}, [], opts).template.raw[0],
opts
),
flags: opts.flags,
};
}
/**
Returns a complete set of options, with default values set for options that weren't provided, and
some options augmented for use.
@param {RegexTagOptions} [options]
@returns {Required<RegexTagOptions>}
*/
function getOptions(options) {
const opts = {
flags: '',
subclass: false,
plugins: [],
unicodeSetsPlugin: backcompatPlugin,
disable: {/* n, v, x, atomic, subroutines */},
force: {/* v */},
...options,
};
if (/[nuvx]/.test(opts.flags)) {
throw new Error('Implicit flags v/u/x/n cannot be explicitly added');
}
const useFlagV = opts.force.v || (opts.disable.v ? false : envSupportsFlagV);
opts.flags += useFlagV ? 'v' : 'u';
if (useFlagV) {
opts.unicodeSetsPlugin = null;
}
return opts;
}
/**
@param {RawTemplate} template
@param {ReadonlyArray<InterpolatedValue>} substitutions
@param {Required<RegexTagOptions>} options
@returns {{
template: RawTemplate;
substitutions: ReadonlyArray<InterpolatedValue>;
}}
*/
function handlePreprocessors(template, substitutions, options) {
const preprocessors = [];
// Implicit flag x is handled first because otherwise some regex syntax (if unescaped) within
// comments could cause problems when parsing
if (!options.disable.x) {
preprocessors.push(flagXPreprocessor);
}
// Implicit flag n is a preprocessor because capturing groups affect backreference rewriting in
// both interpolation and plugins
if (!options.disable.n) {
preprocessors.push(flagNPreprocessor);
}
for (const pp of preprocessors) {
({template, substitutions} = preprocess(template, substitutions, pp, options));
}
return {
template,
substitutions,
};
}
/**
@param {string} expression
@param {Required<RegexTagOptions>} options
@returns {string}
*/
function handlePlugins(expression, options) {
const {flags, plugins, unicodeSetsPlugin, disable, subclass} = options;
[ ...plugins, // Run first, so provided plugins can output extended syntax
...(disable.subroutines ? [] : [subroutines]),
...(disable.atomic ? [] : [possessive, atomic]),
...(disable.x ? [] : [clean]),
// Run last, so it doesn't have to worry about parsing extended syntax
...(!unicodeSetsPlugin ? [] : [unicodeSetsPlugin]),
].forEach(p => expression = p(expression, {flags, useEmulationGroups: subclass}));
return expression;
}
/**
@param {InterpolatedValue} value
@param {string} flags
@param {string} regexContext
@param {string} charClassContext
@param {boolean} wrapEscapedStr
@param {number} precedingCaptures
@returns {string}
*/
function interpolate(value, flags, regexContext, charClassContext, wrapEscapedStr, precedingCaptures) {
if (value instanceof RegExp && regexContext !== RegexContext.DEFAULT) {
throw new Error('Cannot interpolate a RegExp at this position because the syntax context does not match');
}
if (regexContext === RegexContext.INVALID_INCOMPLETE_TOKEN || charClassContext === CharClassContext.INVALID_INCOMPLETE_TOKEN) {
// Throw in all cases, but only *need* to handle a preceding unescaped backslash (which would
// break sandboxing) since other errors would be handled by the invalid generated regex syntax
throw new Error('Interpolation preceded by invalid incomplete token');
}
if (
typeof value === 'number' &&
(regexContext === RegexContext.ENCLOSED_U || charClassContext === CharClassContext.ENCLOSED_U)
) {
return value.toString(16);
}
const isPattern = value instanceof Pattern;
let escapedValue = '';
if (!(value instanceof RegExp)) {
value = String(value);
if (!isPattern) {
escapedValue = escapeV(
value,
regexContext === RegexContext.CHAR_CLASS ? Context.CHAR_CLASS : Context.DEFAULT
);
}
// Check `escapedValue` (not just patterns) since possible breakout char `>` isn't escaped
const breakoutChar = getBreakoutChar(escapedValue || value, regexContext, charClassContext);
if (breakoutChar) {
throw new Error(`Unescaped stray "${breakoutChar}" in the interpolated value would have side effects outside it`);
}
}
if (
regexContext === RegexContext.INTERVAL_QUANTIFIER ||
regexContext === RegexContext.GROUP_NAME ||
enclosedTokenRegexContexts.has(regexContext) ||
enclosedTokenCharClassContexts.has(charClassContext)
) {
return isPattern ? String(value) : escapedValue;
} else if (regexContext === RegexContext.CHAR_CLASS) {
if (isPattern) {
if (hasUnescaped(String(value), '^-|^&&|-$|&&$')) {
// Sandboxing so we don't change the chars outside the pattern into being part of an
// operation they didn't initiate. Same problem as starting a pattern with a quantifier
throw new Error('Cannot use range or set operator at boundary of interpolated pattern; move the operation into the pattern or the operator outside of it');
}
const sandboxedValue = sandboxLoneCharClassCaret(sandboxLoneDoublePunctuatorChar(value));
// Atomize via nested character class `[…]` if it contains implicit or explicit union (check
// the unadjusted value)
return containsCharClassUnion(value) ? `[${sandboxedValue}]` : sandboxUnsafeNulls(sandboxedValue);
}
// Atomize via nested character class `[…]` if more than one node
return containsCharClassUnion(escapedValue) ? `[${escapedValue}]` : escapedValue;
}
// `RegexContext.DEFAULT`
if (value instanceof RegExp) {
const transformed = transformForLocalFlags(value, flags);
const backrefsAdjusted = adjustNumberedBackrefs(transformed.value, precedingCaptures);
// Sandbox and atomize; if we used a pattern modifier it has the same effect
return transformed.usedModifier ? backrefsAdjusted : `(?:${backrefsAdjusted})`;
}
if (isPattern) {
// Sandbox and atomize
return `(?:${value})`;
}
// Sandbox and atomize
return wrapEscapedStr ? `(?:${escapedValue})` : escapedValue;
}
/**
@param {RegExp} re
@param {string} outerFlags
@returns {{value: string; usedModifier?: boolean;}}
*/
function transformForLocalFlags(re, outerFlags) {
/** @type {{i: boolean | null; m: boolean | null; s: boolean | null;}} */
const modFlagsObj = {
i: null,
m: null,
s: null,
};
const newlines = '\\n\\r\\u2028\\u2029';
let value = re.source;
if (re.ignoreCase !== outerFlags.includes('i')) {
if (envSupportsFlagGroups) {
modFlagsObj.i = re.ignoreCase;
} else {
throw new Error('Pattern modifiers not supported, so flag i on the outer and interpolated regex must match');
}
}
if (re.dotAll !== outerFlags.includes('s')) {
if (envSupportsFlagGroups) {
modFlagsObj.s = re.dotAll;
} else {
value = replaceUnescaped(value, '\\.', (re.dotAll ? '[^]' : `[^${newlines}]`), Context.DEFAULT);
}
}
if (re.multiline !== outerFlags.includes('m')) {
if (envSupportsFlagGroups) {
modFlagsObj.m = re.multiline;
} else {
value = replaceUnescaped(value, '\\^', (re.multiline ? `(?<=^|[${newlines}])` : '(?<![^])'), Context.DEFAULT);
value = replaceUnescaped(value, '\\$', (re.multiline ? `(?=$|[${newlines}])` : '(?![^])'), Context.DEFAULT);
}
}
if (envSupportsFlagGroups) {
const keys = Object.keys(modFlagsObj);
let modifier = keys.filter(k => modFlagsObj[k] === true).join('');
const modOff = keys.filter(k => modFlagsObj[k] === false).join('');
if (modOff) {
modifier += `-${modOff}`;
}
if (modifier) {
return {
value: `(?${modifier}:${value})`,
usedModifier: true,
};
}
}
return {value};
}
export {
pattern,
regex,
rewrite,
};

147
node_modules/regex/src/subclass.js generated vendored Normal file
View File

@@ -0,0 +1,147 @@
import {Context, replaceUnescaped} from 'regex-utilities';
// This marker was chosen because it's impossible to match (so its extemely unlikely to be used in
// a user-provided regex); it's not at risk of being optimized away, transformed, or flagged as an
// error by a plugin; and it ends with an unquantifiable token
const emulationGroupMarker = '$E$';
// Note: Emulation groups with transfer are also supported. They look like `($N$E$…)` where `N` is
// an integer 1 or greater. They're not used directly by Regex+ but can be used by plugins and
// libraries that use Regex+ internals. Emulation groups with transfer are not only excluded from
// match results, but additionally transfer their match to the group specified by `N`
/**
Works the same as JavaScript's native `RegExp` constructor in all contexts, but automatically
adjusts matches and subpattern indices (with flag `d`) to account for injected emulation groups.
*/
class RegExpSubclass extends RegExp {
// Avoid `#private` to allow for subclassing
/**
@private
@type {Array<{
exclude: boolean;
transfer?: number;
}> | undefined}
*/
_captureMap;
/**
@private
@type {Record<number, string> | undefined}
*/
_namesByIndex;
/**
@param {string | RegExpSubclass} expression
@param {string} [flags]
@param {{useEmulationGroups: boolean;}} [options]
*/
constructor(expression, flags, options) {
if (expression instanceof RegExp && options) {
throw new Error('Cannot provide options when copying a regexp');
}
const useEmulationGroups = !!options?.useEmulationGroups;
const unmarked = useEmulationGroups ? unmarkEmulationGroups(expression) : null;
super(unmarked?.expression || expression, flags);
// The third argument `options` isn't provided when regexes are copied as part of the internal
// handling of string methods `matchAll` and `split`
const src = useEmulationGroups ? unmarked : (expression instanceof RegExpSubclass ? expression : null);
if (src) {
this._captureMap = src._captureMap;
this._namesByIndex = src._namesByIndex;
}
}
/**
Called internally by all String/RegExp methods that use regexes.
@override
@param {string} str
@returns {RegExpExecArray | null}
*/
exec(str) {
const match = RegExp.prototype.exec.call(this, str);
if (!match || !this._captureMap) {
return match;
}
const matchCopy = [...match];
// Empty all but the first value of the array while preserving its other properties
match.length = 1;
let indicesCopy;
if (this.hasIndices) {
indicesCopy = [...match.indices];
match.indices.length = 1;
}
for (let i = 1; i < matchCopy.length; i++) {
if (this._captureMap[i].exclude) {
const transfer = this._captureMap[i].transfer;
if (transfer && match.length > transfer) {
match[transfer] = matchCopy[i];
const transferName = this._namesByIndex[transfer];
if (transferName) {
match.groups[transferName] = matchCopy[i];
if (this.hasIndices) {
match.indices.groups[transferName] = indicesCopy[i];
}
}
if (this.hasIndices) {
match.indices[transfer] = indicesCopy[i];
}
}
} else {
match.push(matchCopy[i]);
if (this.hasIndices) {
match.indices.push(indicesCopy[i]);
}
}
}
return match;
}
}
/**
Build the capturing group map (with emulation groups marked to indicate their submatches shouldn't
appear in results), and remove the markers for captures that were added to emulate extended syntax.
@param {string} expression
@returns {{
_captureMap: Array<{
exclude: boolean;
transfer?: number;
}>;
_namesByIndex: Record<number, string>;
expression: string;
}}
*/
function unmarkEmulationGroups(expression) {
const marker = emulationGroupMarker.replace(/\$/g, '\\$');
const _captureMap = [{exclude: false}];
const _namesByIndex = {0: ''};
let realCaptureNum = 0;
expression = replaceUnescaped(
expression,
String.raw`\((?:(?!\?)|\?<(?![=!])(?<name>[^>]+)>)(?<mark>(?:\$(?<transfer>[1-9]\d*))?${marker})?`,
({0: m, groups: {name, mark, transfer}}) => {
if (mark) {
_captureMap.push({
exclude: true,
transfer: transfer && +transfer,
});
return m.slice(0, -mark.length);
}
realCaptureNum++;
if (name) {
_namesByIndex[realCaptureNum] = name;
}
_captureMap.push({
exclude: false,
});
return m;
},
Context.DEFAULT
);
return {
_captureMap,
_namesByIndex,
expression,
};
}
export {
emulationGroupMarker,
RegExpSubclass,
};

341
node_modules/regex/src/subroutines.js generated vendored Normal file
View File

@@ -0,0 +1,341 @@
import {emulationGroupMarker} from './subclass.js';
import {capturingDelim, countCaptures, namedCapturingDelim} from './utils.js';
import {spliceStr} from './utils-internals.js';
import {Context, execUnescaped, forEachUnescaped, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities';
/**
@param {string} expression
@param {import('./regex.js').PluginData} [data]
@returns {string}
*/
function subroutines(expression, data) {
// NOTE: subroutines and definition groups fully support numbered backreferences and unnamed
// captures (from interpolated regexes or from turning implicit flag n off), and all of the
// complex forward and backward backreference adjustments that can result
const namedGroups = getNamedCapturingGroups(expression, {includeContents: true});
const transformed = processSubroutines(expression, namedGroups, !!data?.useEmulationGroups);
return processDefinitionGroup(transformed, namedGroups);
}
// Explicitly exclude `&` from subroutine name chars because it's used by extension
// `regex-recursion` for recursive subroutines via `\g<name&R=N>`
const subroutinePattern = String.raw`\\g<(?<subroutineName>[^>&]+)>`;
const token = new RegExp(String.raw`
${subroutinePattern}
| (?<capturingStart>${capturingDelim})
| \\(?<backrefNum>[1-9]\d*)
| \\k<(?<backrefName>[^>]+)>
| \\?.
`.replace(/\s+/g, ''), 'gsu');
/**
@typedef {
Map<string, {
isUnique: boolean;
contents?: string;
groupNum?: number;
numCaptures?: number;
}>} NamedCapturingGroupsMap
*/
/**
Apply transformations for subroutines: `\g<name>`.
@param {string} expression
@param {NamedCapturingGroupsMap} namedGroups
@param {boolean} useEmulationGroups
@returns {string}
*/
function processSubroutines(expression, namedGroups, useEmulationGroups) {
if (!/\\g</.test(expression)) {
return expression;
}
// Can skip a lot of processing and avoid adding captures if there are no backrefs
const hasBackrefs = hasUnescaped(expression, '\\\\(?:[1-9]|k<[^>]+>)', Context.DEFAULT);
const subroutineWrapper = hasBackrefs ? `(${useEmulationGroups ? emulationGroupMarker : ''}` : '(?:';
const openSubroutines = new Map();
const openSubroutinesStack = [];
const captureNumMap = [0];
let numCapturesPassedOutsideSubroutines = 0;
let numCapturesPassedInsideSubroutines = 0;
let numCapturesPassedInsideThisSubroutine = 0;
let numSubroutineCapturesTrackedInRemap = 0;
let numCharClassesOpen = 0;
let result = expression;
let match;
token.lastIndex = 0;
while (match = token.exec(result)) {
const {0: m, index, groups: {subroutineName, capturingStart, backrefNum, backrefName}} = match;
if (m === '[') {
numCharClassesOpen++;
} else if (!numCharClassesOpen) {
if (subroutineName) {
if (!namedGroups.has(subroutineName)) {
throw new Error(`Invalid named capture referenced by subroutine ${m}`);
}
if (openSubroutines.has(subroutineName)) {
throw new Error(`Subroutine ${m} followed a recursive reference`);
}
const contents = namedGroups.get(subroutineName).contents;
// Wrap value in case it has top-level alternation or is followed by a quantifier. The
// wrapper also marks the end of the expanded contents, which we'll track using
// `unclosedGroupCount`. If there are any backrefs in the expression, wrap with `()`
// instead of `(?:)` in case there are backrefs inside the subroutine that refer to their
// containing capturing group
const subroutineValue = `${subroutineWrapper}${contents})`;
if (hasBackrefs) {
numCapturesPassedInsideThisSubroutine = 0;
numCapturesPassedInsideSubroutines++;
}
openSubroutines.set(subroutineName, {
// Incrementally decremented to track when we've left the group
unclosedGroupCount: countOpenParens(subroutineValue),
});
openSubroutinesStack.push(subroutineName);
// Expand the subroutine's contents into the pattern we're looping over
result = spliceStr(result, index, m, subroutineValue);
token.lastIndex -= m.length - subroutineWrapper.length;
} else if (capturingStart) {
// Somewhere within an expanded subroutine
if (openSubroutines.size) {
if (hasBackrefs) {
numCapturesPassedInsideThisSubroutine++;
numCapturesPassedInsideSubroutines++;
}
// Named capturing group
if (m !== '(') {
// Replace named with unnamed capture. Subroutines ideally wouldn't create any new
// captures, but it can't be helped since we need any backrefs to this capture to work.
// Given that flag n prevents unnamed capture and thereby requires you to rely on named
// backrefs and `groups`, switching to unnamed essentially accomplishes not creating a
// capture. Can fully avoid capturing if there are no backrefs in the expression
result = spliceStr(result, index, m, subroutineWrapper);
token.lastIndex -= m.length - subroutineWrapper.length;
}
} else if (hasBackrefs) {
captureNumMap.push(
lastOf(captureNumMap) + 1 +
numCapturesPassedInsideSubroutines -
numSubroutineCapturesTrackedInRemap
);
numSubroutineCapturesTrackedInRemap = numCapturesPassedInsideSubroutines;
numCapturesPassedOutsideSubroutines++;
}
} else if ((backrefNum || backrefName) && openSubroutines.size) {
// Unify handling for named and unnamed by always using the backref num
const num = backrefNum ? +backrefNum : namedGroups.get(backrefName)?.groupNum;
let isGroupFromThisSubroutine = false;
// Search for the group in the contents of the subroutine stack
for (const s of openSubroutinesStack) {
const group = namedGroups.get(s);
if (num >= group.groupNum && num <= (group.groupNum + group.numCaptures)) {
isGroupFromThisSubroutine = true;
break;
}
}
if (isGroupFromThisSubroutine) {
const group = namedGroups.get(lastOf(openSubroutinesStack));
// Replace the backref with metadata we'll need to rewrite it later, using
// `\k<$$bNsNrNcN>` as a temporary wrapper:
// - b: The unmodified matched backref num, or the corresponding num of a named backref
// - s: The capture num of the subroutine we're most deeply nested in, including captures
// added by expanding the contents of preceding subroutines
// - r: The original capture num of the group that the subroutine we're most deeply
// nested in references, not counting the effects of subroutines
// - c: The number of captures within `r`, not counting the effects of subroutines
const subroutineNum = numCapturesPassedOutsideSubroutines + numCapturesPassedInsideSubroutines - numCapturesPassedInsideThisSubroutine;
const metadata = `\\k<$$b${num}s${subroutineNum}r${group.groupNum}c${group.numCaptures}>`;
result = spliceStr(result, index, m, metadata);
token.lastIndex += metadata.length - m.length;
}
} else if (m === ')') {
if (openSubroutines.size) {
const subroutine = openSubroutines.get(lastOf(openSubroutinesStack));
subroutine.unclosedGroupCount--;
if (!subroutine.unclosedGroupCount) {
openSubroutines.delete(openSubroutinesStack.pop());
}
}
}
} else if (m === ']') {
numCharClassesOpen--;
}
}
if (hasBackrefs) {
// Second pass to adjust backrefs
result = replaceUnescaped(
result,
String.raw`\\(?:(?<bNum>[1-9]\d*)|k<\$\$b(?<bNumSub>\d+)s(?<subNum>\d+)r(?<refNum>\d+)c(?<refCaps>\d+)>)`,
({0: m, groups: {bNum, bNumSub, subNum, refNum, refCaps}}) => {
if (bNum) {
const backrefNum = +bNum;
if (backrefNum > captureNumMap.length - 1) {
throw new Error(`Backref "${m}" greater than number of captures`);
}
return `\\${captureNumMap[backrefNum]}`;
}
const backrefNumInSubroutine = +bNumSub;
const subroutineGroupNum = +subNum;
const refGroupNum = +refNum;
const numCapturesInRef = +refCaps;
if (backrefNumInSubroutine < refGroupNum || backrefNumInSubroutine > (refGroupNum + numCapturesInRef)) {
return `\\${captureNumMap[backrefNumInSubroutine]}`;
}
return `\\${subroutineGroupNum - refGroupNum + backrefNumInSubroutine}`;
},
Context.DEFAULT
);
}
return result;
}
// `(?:)` allowed because it can be added by flag x's preprocessing of whitespace and comments
const defineGroupToken = new RegExp(String.raw`${namedCapturingDelim}|\(\?:\)|(?<invalid>\\?.)`, 'gsu');
/**
Remove valid subroutine definition groups: `(?(DEFINE)…)`.
@param {string} expression
@param {NamedCapturingGroupsMap} namedGroups
IMPORTANT: Avoid using the `contents` property of `namedGroups` objects, because at this point
subroutine substitution has been performed on the corresponding substrings in `expression`
@returns {string}
*/
function processDefinitionGroup(expression, namedGroups) {
const defineMatch = execUnescaped(expression, String.raw`\(\?\(DEFINE\)`, 0, Context.DEFAULT);
if (!defineMatch) {
return expression;
}
const defineGroup = getGroup(expression, defineMatch);
if (defineGroup.afterPos < expression.length) {
// Supporting DEFINE at positions other than the end would complicate backref handling.
// NOTE: Flag x's preprocessing permits trailing whitespace and comments
throw new Error('DEFINE group allowed only at the end of a regex');
} else if (defineGroup.afterPos > expression.length) {
throw new Error('DEFINE group is unclosed');
}
let match;
defineGroupToken.lastIndex = 0;
while (match = defineGroupToken.exec(defineGroup.contents)) {
const {captureName, invalid} = match.groups;
if (captureName) {
const group = getGroup(defineGroup.contents, match);
let duplicateName;
if (!namedGroups.get(captureName).isUnique) {
duplicateName = captureName;
} else {
const nestedNamedGroups = getNamedCapturingGroups(group.contents, {includeContents: false});
for (const name of nestedNamedGroups.keys()) {
if (!namedGroups.get(name).isUnique) {
duplicateName = name;
break;
}
}
}
if (duplicateName) {
throw new Error(`Duplicate group name "${duplicateName}" within DEFINE`);
}
defineGroupToken.lastIndex = group.afterPos;
} else if (invalid) {
// Since a DEFINE group is stripped from its expression, we can't easily determine whether
// unreferenced top-level syntax within it is valid. Such syntax serves no purpose, so it's
// easiest to not allow it
throw new Error(`DEFINE group includes unsupported syntax at top level`);
}
}
return expression.slice(0, defineMatch.index);
}
/**
Counts unescaped open parens outside of character classes, regardless of group type
@param {string} expression
@returns {number}
*/
function countOpenParens(expression) {
let num = 0;
forEachUnescaped(expression, '\\(', () => num++, Context.DEFAULT);
return num;
}
/**
@param {string} expression
@param {string} groupName
@returns {number}
*/
function getCaptureNum(expression, groupName) {
let num = 0;
let pos = 0;
let match;
while (match = execUnescaped(expression, capturingDelim, pos, Context.DEFAULT)) {
const {0: m, index, groups: {captureName}} = match;
num++;
if (captureName === groupName) {
break;
}
pos = index + m.length;
}
return num;
}
/**
@param {string} expression
@param {RegExpExecArray} delimMatch
@returns {{contents: string; afterPos: number}}
*/
function getGroup(expression, delimMatch) {
const contentsStart = delimMatch.index + delimMatch[0].length;
const contents = getGroupContents(expression, contentsStart);
const afterPos = contentsStart + contents.length + 1;
return {
contents,
afterPos,
};
}
/**
@param {string} expression
@param {{includeContents: boolean}} options
@returns {NamedCapturingGroupsMap}
*/
function getNamedCapturingGroups(expression, {includeContents}) {
const namedGroups = new Map();
forEachUnescaped(
expression,
namedCapturingDelim,
({0: m, index, groups: {captureName}}) => {
// If there are duplicate capture names, subroutines refer to the first instance of the given
// group (matching the behavior of PCRE and Perl)
if (namedGroups.has(captureName)) {
namedGroups.get(captureName).isUnique = false;
} else {
const group = {isUnique: true};
if (includeContents) {
const contents = getGroupContents(expression, index + m.length);
Object.assign(group, {
contents,
groupNum: getCaptureNum(expression, captureName),
numCaptures: countCaptures(contents),
});
}
namedGroups.set(captureName, group);
}
},
Context.DEFAULT
);
return namedGroups;
}
/**
@param {Array<any>} arr
@returns {any}
*/
function lastOf(arr) {
// Remove when support for ES2022 array method `at` (Node.js 16.6) is no longer an issue:
// <https://caniuse.com/mdn-javascript_builtins_array_at>
return arr[arr.length - 1];
}
export {
subroutines,
};

19
node_modules/regex/src/utils-internals.js generated vendored Normal file
View File

@@ -0,0 +1,19 @@
// Separating some utils for improved tree shaking of the `./internals` export
const noncapturingDelim = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;
/**
@param {string} str
@param {number} pos
@param {string} oldValue
@param {string} newValue
@returns {string}
*/
function spliceStr(str, pos, oldValue, newValue) {
return str.slice(0, pos) + newValue + str.slice(pos + oldValue.length);
}
export {
noncapturingDelim,
spliceStr,
};

398
node_modules/regex/src/utils.js generated vendored Normal file
View File

@@ -0,0 +1,398 @@
import {Pattern, pattern} from './pattern.js';
import {Context, forEachUnescaped, replaceUnescaped} from 'regex-utilities';
const RegexContext = {
DEFAULT: 'DEFAULT',
CHAR_CLASS: 'CHAR_CLASS',
ENCLOSED_P: 'ENCLOSED_P',
ENCLOSED_U: 'ENCLOSED_U',
GROUP_NAME: 'GROUP_NAME',
INTERVAL_QUANTIFIER: 'INTERVAL_QUANTIFIER',
INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN',
};
const CharClassContext = {
DEFAULT: 'DEFAULT',
ENCLOSED_P: 'ENCLOSED_P',
ENCLOSED_Q: 'ENCLOSED_Q',
ENCLOSED_U: 'ENCLOSED_U',
INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN',
RANGE: 'RANGE',
};
const enclosedTokenRegexContexts = new Set([
RegexContext.ENCLOSED_P,
RegexContext.ENCLOSED_U,
]);
const enclosedTokenCharClassContexts = new Set([
CharClassContext.ENCLOSED_P,
CharClassContext.ENCLOSED_Q,
CharClassContext.ENCLOSED_U,
]);
const envSupportsFlagGroups = (() => {
try {
new RegExp('(?i:)');
} catch {
return false;
}
return true;
})();
const envSupportsFlagV = (() => {
try {
new RegExp('', 'v');
} catch {
return false;
}
return true;
})();
const doublePunctuatorChars = '&!#$%*+,.:;<=>?@^`~';
const namedCapturingDelim = String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`;
const capturingDelim = String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${namedCapturingDelim}`;
/**
@param {string} expression
@param {number} precedingCaptures
@returns {string}
*/
function adjustNumberedBackrefs(expression, precedingCaptures) {
return replaceUnescaped(
expression,
String.raw`\\(?<num>[1-9]\d*)`,
({groups: {num}}) => `\\${+num + precedingCaptures}`,
Context.DEFAULT
);
}
// Properties of strings as of ES2024
const stringPropertyNames = [
'Basic_Emoji',
'Emoji_Keycap_Sequence',
'RGI_Emoji_Modifier_Sequence',
'RGI_Emoji_Flag_Sequence',
'RGI_Emoji_Tag_Sequence',
'RGI_Emoji_ZWJ_Sequence',
'RGI_Emoji',
].join('|');
const charClassUnionToken = new RegExp(String.raw`
\\(?: c[A-Za-z]
| p\{(?<pStrProp>${stringPropertyNames})\}
| [pP]\{[^\}]+\}
| (?<qStrProp>q)
| u(?:[A-Fa-f\d]{4}|\{[A-Fa-f\d]+\})
| x[A-Fa-f\d]{2}
| .
)
| --
| &&
| .
`.replace(/\s+/g, ''), 'gsu');
// Assumes flag v and doesn't worry about syntax errors that are caught by it
function containsCharClassUnion(charClassPattern) {
// Return `true` if it contains:
// - `\p` (lowercase only) and the name is a property of strings (case sensitive).
// - `\q`.
// - Two single-char-matching tokens in sequence.
// - One single-char-matching token followed immediately by unescaped `[`.
// - One single-char-matching token preceded immediately by unescaped `]`.
// Else, `false`.
// Ranges with `-` create a single token.
// Subtraction and intersection with `--` and `&&` create a single token.
// Supports any number of nested classes
let hasFirst = false;
let lastM;
for (const {0: m, groups} of charClassPattern.matchAll(charClassUnionToken)) {
if (groups.pStrProp || groups.qStrProp) {
return true;
}
if (m === '[' && hasFirst) {
return true;
}
if (['-', '--', '&&'].includes(m)) {
hasFirst = false;
} else if (m !== '[' && m !== ']') {
if (hasFirst || lastM === ']') {
return true;
}
hasFirst = true;
}
lastM = m;
}
return false;
}
/**
@param {string} expression
@returns {number}
*/
function countCaptures(expression) {
let num = 0;
forEachUnescaped(expression, capturingDelim, () => num++, Context.DEFAULT);
return num;
}
/**
Escape special characters for the given context, assuming flag v.
@param {string} str String to escape
@param {'DEFAULT' | 'CHAR_CLASS'} context `Context` option from lib `regex-utilities`
@returns {string} Escaped string
*/
function escapeV(str, context) {
if (context === Context.CHAR_CLASS) {
// Escape all double punctuators (including ^, which is special on its own in the first
// position) in case they're bordered by the same character in or outside of the escaped string
return str.replace(new RegExp(String.raw`[()\[\]{}|\\/\-${doublePunctuatorChars}]`, 'g'), '\\$&');
}
return str.replace(/[()\[\]{}|\\^$*+?.]/g, '\\$&');
}
// Look for characters that would change the meaning of subsequent tokens outside an interpolated value
function getBreakoutChar(expression, regexContext, charClassContext) {
const escapesRemoved = expression.replace(/\\./gsu, '');
// Trailing unescaped `\`; checking `.includes('\\')` would also work
if (escapesRemoved.endsWith('\\')) {
return '\\';
}
if (regexContext === RegexContext.DEFAULT) {
// Unbalanced `[` or `]` are also errors but don't breakout; they're caught by the wrapper
return getUnbalancedChar(escapesRemoved, '(', ')');
} else if (
regexContext === RegexContext.CHAR_CLASS &&
!enclosedTokenCharClassContexts.has(charClassContext)
) {
return getUnbalancedChar(escapesRemoved, '[', ']');
} else if (
regexContext === RegexContext.INTERVAL_QUANTIFIER ||
enclosedTokenRegexContexts.has(regexContext) ||
enclosedTokenCharClassContexts.has(charClassContext)
) {
if (escapesRemoved.includes('}')) {
return '}';
}
} else if (regexContext === RegexContext.GROUP_NAME) {
if (escapesRemoved.includes('>')) {
return '>';
}
}
return '';
}
const contextToken = new RegExp(String.raw`
(?<groupN>\(\?<(?![=!])|\\[gk]<)
| (?<enclosedPU>\\[pPu]\{)
| (?<enclosedQ>\\q\{)
| (?<intervalQ>\{)
| (?<incompleteT>\\(?: $
| c(?![A-Za-z])
| u(?![A-Fa-f\d]{4})[A-Fa-f\d]{0,3}
| x(?![A-Fa-f\d]{2})[A-Fa-f\d]?
)
)
| --
| \\?.
`.replace(/\s+/g, ''), 'gsu');
/**
@typedef {{
regexContext: string;
charClassContext: string;
charClassDepth: number;
lastPos: number;
}} RunningContext
*/
/**
Accepts and returns its full state so it doesn't have to reprocess parts that have already been
seen. Assumes flag v and doesn't worry about syntax errors that are caught by it.
@param {string} incompleteExpression
@param {Partial<RunningContext>} [runningContext]
@returns {RunningContext}
*/
function getEndContextForIncompleteExpression(incompleteExpression, runningContext) {
let {regexContext, charClassContext, charClassDepth, lastPos} = {
regexContext: RegexContext.DEFAULT,
charClassContext: CharClassContext.DEFAULT,
charClassDepth: 0,
lastPos: 0,
...runningContext,
};
contextToken.lastIndex = lastPos;
let match;
while (match = contextToken.exec(incompleteExpression)) {
const {0: m, groups: {groupN, enclosedPU, enclosedQ, intervalQ, incompleteT}} = match;
if (m === '[') {
charClassDepth++;
regexContext = RegexContext.CHAR_CLASS;
charClassContext = CharClassContext.DEFAULT;
} else if (m === ']' && regexContext === RegexContext.CHAR_CLASS) {
if (charClassDepth) {
charClassDepth--;
}
if (!charClassDepth) {
regexContext = RegexContext.DEFAULT;
}
charClassContext = CharClassContext.DEFAULT;
} else if (regexContext === RegexContext.CHAR_CLASS) {
if (incompleteT) {
charClassContext = CharClassContext.INVALID_INCOMPLETE_TOKEN;
} else if (m === '-') {
charClassContext = CharClassContext.RANGE;
} else if (enclosedPU) {
charClassContext = m[1] === 'u' ? CharClassContext.ENCLOSED_U : CharClassContext.ENCLOSED_P;
} else if (enclosedQ) {
charClassContext = CharClassContext.ENCLOSED_Q;
} else if (
(m === '}' && enclosedTokenCharClassContexts.has(charClassContext)) ||
// Don't continue in these contexts since we've advanced another token
charClassContext === CharClassContext.INVALID_INCOMPLETE_TOKEN ||
charClassContext === CharClassContext.RANGE
) {
charClassContext = CharClassContext.DEFAULT;
}
} else {
if (incompleteT) {
regexContext = RegexContext.INVALID_INCOMPLETE_TOKEN;
} else if (groupN) {
regexContext = RegexContext.GROUP_NAME;
} else if (enclosedPU) {
regexContext = m[1] === 'u' ? RegexContext.ENCLOSED_U : RegexContext.ENCLOSED_P;
} else if (intervalQ) {
regexContext = RegexContext.INTERVAL_QUANTIFIER;
} else if (
(m === '>' && regexContext === RegexContext.GROUP_NAME) ||
(m === '}' && (regexContext === RegexContext.INTERVAL_QUANTIFIER || enclosedTokenRegexContexts.has(regexContext))) ||
// Don't continue in this context since we've advanced another token
regexContext === RegexContext.INVALID_INCOMPLETE_TOKEN
) {
regexContext = RegexContext.DEFAULT;
}
}
}
return {
regexContext,
charClassContext,
charClassDepth,
lastPos: incompleteExpression.length,
};
}
// No special handling for escaped versions of the characters
function getUnbalancedChar(expression, leftChar, rightChar) {
let numOpen = 0;
for (const [m] of expression.matchAll(new RegExp(`[${escapeV(leftChar + rightChar, Context.CHAR_CLASS)}]`, 'g'))) {
numOpen += m === leftChar ? 1 : -1;
if (numOpen < 0) {
return rightChar;
}
}
if (numOpen > 0) {
return leftChar;
}
return '';
}
/**
@typedef {import('./regex.js').InterpolatedValue} InterpolatedValue
@typedef {import('./regex.js').RawTemplate} RawTemplate
@typedef {import('./regex.js').RegexTagOptions} RegexTagOptions
@typedef {(
value: InterpolatedValue,
runningContext: RunningContext,
options: Required<RegexTagOptions>
) => {
transformed: string;
runningContext: RunningContext;
}} Preprocessor
*/
/**
Returns transformed versions of a template and substitutions, using the given preprocessor. Only
processes substitutions that are instanceof `Pattern`.
@param {RawTemplate} template
@param {ReadonlyArray<InterpolatedValue>} substitutions
@param {Preprocessor} preprocessor
@param {Required<RegexTagOptions>} options
@returns {{template: RawTemplate; substitutions: ReadonlyArray<InterpolatedValue>;}}
*/
function preprocess(template, substitutions, preprocessor, options) {
let /** @type {RawTemplate} */ newTemplate = {raw: []};
let newSubstitutions = [];
let runningContext;
template.raw.forEach((raw, i) => {
const result = preprocessor(raw, {...runningContext, lastPos: 0}, options);
newTemplate.raw.push(result.transformed);
runningContext = result.runningContext;
if (i < template.raw.length - 1) {
const substitution = substitutions[i];
if (substitution instanceof Pattern) {
const result = preprocessor(substitution, {...runningContext, lastPos: 0}, options);
newSubstitutions.push(pattern(result.transformed));
runningContext = result.runningContext;
} else {
newSubstitutions.push(substitution);
}
}
});
return {
template: newTemplate,
substitutions: newSubstitutions,
};
}
// Sandbox `^` if relevant, done so it can't change the meaning of the surrounding character class
// if we happen to be at the first position. See `sandboxLoneDoublePunctuatorChar` for more details
function sandboxLoneCharClassCaret(str) {
return str.replace(/^\^/, '\\^^');
}
// Sandbox without escaping by repeating the character and escaping only the first one. The second
// one is so that, if followed by the same symbol, the resulting double punctuator will still throw
// as expected. Details:
// - Only need to check the first position because, if it's part of an implicit union,
// interpolation handling will wrap it in nested `[…]`.
// - Can't just wrap in nested `[…]` here, since the value might be used in a range.
// - Can't add a second unescaped symbol if a lone symbol is the entire string because it might be
// followed by the same unescaped symbol outside an interpolation, and since it won't be wrapped,
// the second symbol wouldn't be sandboxed from the one following it.
function sandboxLoneDoublePunctuatorChar(str) {
return str.replace(new RegExp(`^([${doublePunctuatorChars}])(?!\\1)`), (m, _, pos) => {
return `\\${m}${pos + 1 === str.length ? '' : m}`;
});
}
/**
Converts `\0` tokens to `\x00` in the given context.
@param {string} str
@param {'DEFAULT' | 'CHAR_CLASS'} [context] `Context` option from lib `regex-utilities`
@returns {string}
*/
function sandboxUnsafeNulls(str, context) {
// regex`[\0${0}]` and regex`[${pattern`\0`}0]` can't be guarded against via nested `[…]`
// sandboxing in character classes if the interpolated value doesn't contain union (since it
// might be placed on a range boundary). So escape `\0` in character classes as `\x00`
return replaceUnescaped(str, String.raw`\\0(?!\d)`, '\\x00', context);
}
export {
adjustNumberedBackrefs,
capturingDelim,
CharClassContext,
containsCharClassUnion,
countCaptures,
doublePunctuatorChars,
enclosedTokenCharClassContexts,
enclosedTokenRegexContexts,
envSupportsFlagGroups,
envSupportsFlagV,
escapeV,
getBreakoutChar,
getEndContextForIncompleteExpression,
namedCapturingDelim,
preprocess,
RegexContext,
sandboxLoneCharClassCaret,
sandboxLoneDoublePunctuatorChar,
sandboxUnsafeNulls,
};