Refactor routing in App component to enhance navigation and improve error handling by integrating dynamic routes and updating the NotFound route.
This commit is contained in:
187
node_modules/regex/src/atomic.js
generated
vendored
Normal file
187
node_modules/regex/src/atomic.js
generated
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
import {emulationGroupMarker} from './subclass.js';
|
||||
import {noncapturingDelim, spliceStr} from './utils-internals.js';
|
||||
import {Context, replaceUnescaped} from 'regex-utilities';
|
||||
|
||||
const atomicPluginToken = new RegExp(String.raw`(?<noncapturingStart>${noncapturingDelim})|(?<capturingStart>\((?:\?<[^>]+>)?)|\\?.`, 'gsu');
|
||||
|
||||
/**
|
||||
Apply transformations for atomic groups: `(?>…)`.
|
||||
@param {string} expression
|
||||
@param {import('./regex.js').PluginData} [data]
|
||||
@returns {string}
|
||||
*/
|
||||
function atomic(expression, data) {
|
||||
if (!/\(\?>/.test(expression)) {
|
||||
return expression;
|
||||
}
|
||||
const aGDelim = '(?>';
|
||||
const emulatedAGDelim = `(?:(?=(${data?.useEmulationGroups ? emulationGroupMarker : ''}`;
|
||||
const captureNumMap = [0];
|
||||
let numCapturesBeforeAG = 0;
|
||||
let numAGs = 0;
|
||||
let aGPos = NaN;
|
||||
let hasProcessedAG;
|
||||
do {
|
||||
hasProcessedAG = false;
|
||||
let numCharClassesOpen = 0;
|
||||
let numGroupsOpenInAG = 0;
|
||||
let inAG = false;
|
||||
let match;
|
||||
atomicPluginToken.lastIndex = Number.isNaN(aGPos) ? 0 : aGPos + emulatedAGDelim.length;
|
||||
while (match = atomicPluginToken.exec(expression)) {
|
||||
const {0: m, index, groups: {capturingStart, noncapturingStart}} = match;
|
||||
if (m === '[') {
|
||||
numCharClassesOpen++;
|
||||
} else if (!numCharClassesOpen) {
|
||||
|
||||
if (m === aGDelim && !inAG) {
|
||||
aGPos = index;
|
||||
inAG = true;
|
||||
} else if (inAG && noncapturingStart) {
|
||||
numGroupsOpenInAG++;
|
||||
} else if (capturingStart) {
|
||||
if (inAG) {
|
||||
numGroupsOpenInAG++;
|
||||
} else {
|
||||
numCapturesBeforeAG++;
|
||||
captureNumMap.push(numCapturesBeforeAG + numAGs);
|
||||
}
|
||||
} else if (m === ')' && inAG) {
|
||||
if (!numGroupsOpenInAG) {
|
||||
numAGs++;
|
||||
// Replace `expression` and use `<$$N>` as a temporary wrapper for the backref so it
|
||||
// can avoid backref renumbering afterward. Need to wrap the whole substitution
|
||||
// (including the lookahead and following backref) in a noncapturing group to handle
|
||||
// following quantifiers and literal digits
|
||||
expression = `${expression.slice(0, aGPos)}${emulatedAGDelim}${
|
||||
expression.slice(aGPos + aGDelim.length, index)
|
||||
}))<$$${numAGs + numCapturesBeforeAG}>)${expression.slice(index + 1)}`;
|
||||
hasProcessedAG = true;
|
||||
break;
|
||||
}
|
||||
numGroupsOpenInAG--;
|
||||
}
|
||||
|
||||
} else if (m === ']') {
|
||||
numCharClassesOpen--;
|
||||
}
|
||||
}
|
||||
// Start over from the beginning of the last atomic group's contents, in case the processed group
|
||||
// contains additional atomic groups
|
||||
} while (hasProcessedAG);
|
||||
|
||||
// Second pass to adjust numbered backrefs
|
||||
expression = replaceUnescaped(
|
||||
expression,
|
||||
String.raw`\\(?<backrefNum>[1-9]\d*)|<\$\$(?<wrappedBackrefNum>\d+)>`,
|
||||
({0: m, groups: {backrefNum, wrappedBackrefNum}}) => {
|
||||
if (backrefNum) {
|
||||
const bNum = +backrefNum;
|
||||
if (bNum > captureNumMap.length - 1) {
|
||||
throw new Error(`Backref "${m}" greater than number of captures`);
|
||||
}
|
||||
return `\\${captureNumMap[bNum]}`;
|
||||
}
|
||||
return `\\${wrappedBackrefNum}`;
|
||||
},
|
||||
Context.DEFAULT
|
||||
);
|
||||
return expression;
|
||||
}
|
||||
|
||||
const baseQuantifier = String.raw`(?:[?*+]|\{\d+(?:,\d*)?\})`;
|
||||
// Complete tokenizer for base syntax; doesn't (need to) know about character-class-only syntax
|
||||
const possessivePluginToken = new RegExp(String.raw`
|
||||
\\(?: \d+
|
||||
| c[A-Za-z]
|
||||
| [gk]<[^>]+>
|
||||
| [pPu]\{[^\}]+\}
|
||||
| u[A-Fa-f\d]{4}
|
||||
| x[A-Fa-f\d]{2}
|
||||
)
|
||||
| \((?: \? (?: [:=!>]
|
||||
| <(?:[=!]|[^>]+>)
|
||||
| [A-Za-z\-]+:
|
||||
| \(DEFINE\)
|
||||
))?
|
||||
| (?<qBase>${baseQuantifier})(?<qMod>[?+]?)(?<invalidQ>[?*+\{]?)
|
||||
| \\?.
|
||||
`.replace(/\s+/g, ''), 'gsu');
|
||||
|
||||
/**
|
||||
Transform posessive quantifiers into atomic groups. The posessessive quantifiers are:
|
||||
`?+`, `*+`, `++`, `{N}+`, `{N,}+`, `{N,N}+`.
|
||||
This follows Java, PCRE, Perl, and Python.
|
||||
Possessive quantifiers in Oniguruma and Onigmo are only: `?+`, `*+`, `++`.
|
||||
@param {string} expression
|
||||
@returns {string}
|
||||
*/
|
||||
function possessive(expression) {
|
||||
if (!(new RegExp(`${baseQuantifier}\\+`).test(expression))) {
|
||||
return expression;
|
||||
}
|
||||
const openGroupIndices = [];
|
||||
let lastGroupIndex = null;
|
||||
let lastCharClassIndex = null;
|
||||
let lastToken = '';
|
||||
let numCharClassesOpen = 0;
|
||||
let match;
|
||||
possessivePluginToken.lastIndex = 0;
|
||||
while (match = possessivePluginToken.exec(expression)) {
|
||||
const {0: m, index, groups: {qBase, qMod, invalidQ}} = match;
|
||||
if (m === '[') {
|
||||
if (!numCharClassesOpen) {
|
||||
lastCharClassIndex = index;
|
||||
}
|
||||
numCharClassesOpen++;
|
||||
} else if (m === ']') {
|
||||
if (numCharClassesOpen) {
|
||||
numCharClassesOpen--;
|
||||
// Unmatched `]`
|
||||
} else {
|
||||
lastCharClassIndex = null;
|
||||
}
|
||||
} else if (!numCharClassesOpen) {
|
||||
|
||||
if (qMod === '+' && lastToken && !lastToken.startsWith('(')) {
|
||||
// Invalid following quantifier would become valid via the wrapping group
|
||||
if (invalidQ) {
|
||||
throw new Error(`Invalid quantifier "${m}"`);
|
||||
}
|
||||
let charsAdded = -1; // -1 for removed trailing `+`
|
||||
// Possessivizing fixed repetition quantifiers like `{2}` does't change their behavior, so
|
||||
// avoid doing so (convert them to greedy)
|
||||
if (/^\{\d+\}$/.test(qBase)) {
|
||||
expression = spliceStr(expression, index + qBase.length, qMod, '');
|
||||
} else {
|
||||
if (lastToken === ')' || lastToken === ']') {
|
||||
const nodeIndex = lastToken === ')' ? lastGroupIndex : lastCharClassIndex;
|
||||
// Unmatched `)` would break out of the wrapping group and mess with handling.
|
||||
// Unmatched `]` wouldn't be a problem, but it's unnecessary to have dedicated support
|
||||
// for unescaped `]++` since this won't work with flag u or v anyway
|
||||
if (nodeIndex === null) {
|
||||
throw new Error(`Invalid unmatched "${lastToken}"`);
|
||||
}
|
||||
expression = `${expression.slice(0, nodeIndex)}(?>${expression.slice(nodeIndex, index)}${qBase})${expression.slice(index + m.length)}`;
|
||||
} else {
|
||||
expression = `${expression.slice(0, index - lastToken.length)}(?>${lastToken}${qBase})${expression.slice(index + m.length)}`;
|
||||
}
|
||||
charsAdded += 4; // `(?>)`
|
||||
}
|
||||
possessivePluginToken.lastIndex += charsAdded;
|
||||
} else if (m[0] === '(') {
|
||||
openGroupIndices.push(index);
|
||||
} else if (m === ')') {
|
||||
lastGroupIndex = openGroupIndices.length ? openGroupIndices.pop() : null;
|
||||
}
|
||||
|
||||
}
|
||||
lastToken = m;
|
||||
}
|
||||
return expression;
|
||||
}
|
||||
|
||||
export {
|
||||
atomic,
|
||||
possessive,
|
||||
};
|
59
node_modules/regex/src/backcompat.js
generated
vendored
Normal file
59
node_modules/regex/src/backcompat.js
generated
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
import {doublePunctuatorChars} from './utils.js';
|
||||
|
||||
const incompatibleEscapeChars = '&!#%,:;<=>@`~';
|
||||
const token = new RegExp(String.raw`
|
||||
\[\^?-?
|
||||
| --?\]
|
||||
| (?<dp>[${doublePunctuatorChars}])\k<dp>
|
||||
| --
|
||||
| \\(?<vOnlyEscape>[${incompatibleEscapeChars}])
|
||||
| \\[pPu]\{[^}]+\}
|
||||
| \\?.
|
||||
`.replace(/\s+/g, ''), 'gsu');
|
||||
|
||||
/**
|
||||
Applies flag v rules when using flag u, for forward compatibility.
|
||||
Assumes flag u and doesn't worry about syntax errors that are caught by it.
|
||||
@param {string} expression
|
||||
@returns {string}
|
||||
*/
|
||||
function backcompatPlugin(expression) {
|
||||
const unescapedLiteralHyphenMsg = 'Invalid unescaped "-" in character class';
|
||||
let inCharClass = false;
|
||||
let result = '';
|
||||
for (const {0: m, groups: {dp, vOnlyEscape}} of expression.matchAll(token)) {
|
||||
if (m[0] === '[') {
|
||||
if (inCharClass) {
|
||||
throw new Error('Invalid nested character class when flag v not supported; possibly from interpolation');
|
||||
}
|
||||
if (m.endsWith('-')) {
|
||||
throw new Error(unescapedLiteralHyphenMsg);
|
||||
}
|
||||
inCharClass = true;
|
||||
} else if (m.endsWith(']')) {
|
||||
if (m[0] === '-') {
|
||||
throw new Error(unescapedLiteralHyphenMsg);
|
||||
}
|
||||
inCharClass = false;
|
||||
} else if (inCharClass) {
|
||||
if (m === '&&' || m === '--') {
|
||||
throw new Error(`Invalid set operator "${m}" when flag v not supported`);
|
||||
} else if (dp) {
|
||||
throw new Error(`Invalid double punctuator "${m}", reserved by flag v`);
|
||||
} else if ('(){}/|'.includes(m)) {
|
||||
throw new Error(`Invalid unescaped "${m}" in character class`);
|
||||
} else if (vOnlyEscape) {
|
||||
// Remove the escaping backslash to emulate flag v rules, since this character is allowed
|
||||
// to be escaped within character classes with flag v but not with flag u
|
||||
result += vOnlyEscape;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
result += m;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export {
|
||||
backcompatPlugin,
|
||||
};
|
46
node_modules/regex/src/flag-n.js
generated
vendored
Normal file
46
node_modules/regex/src/flag-n.js
generated
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
import {getEndContextForIncompleteExpression, RegexContext} from './utils.js';
|
||||
import {noncapturingDelim} from './utils-internals.js';
|
||||
|
||||
const token = new RegExp(String.raw`
|
||||
${noncapturingDelim}
|
||||
| \(\?<
|
||||
| (?<backrefNum>\\[1-9]\d*)
|
||||
| \\?.
|
||||
`.replace(/\s+/g, ''), 'gsu');
|
||||
|
||||
/**
|
||||
Apply transformations for flag n (named capture only).
|
||||
|
||||
Preprocessors are applied to the outer regex and interpolated patterns, but not interpolated
|
||||
regexes or strings.
|
||||
@type {import('./utils.js').Preprocessor}
|
||||
*/
|
||||
function flagNPreprocessor(value, runningContext) {
|
||||
value = String(value);
|
||||
let expression = '';
|
||||
let transformed = '';
|
||||
for (const {0: m, groups: {backrefNum}} of value.matchAll(token)) {
|
||||
expression += m;
|
||||
runningContext = getEndContextForIncompleteExpression(expression, runningContext);
|
||||
const {regexContext} = runningContext;
|
||||
if (regexContext === RegexContext.DEFAULT) {
|
||||
if (m === '(') {
|
||||
transformed += '(?:';
|
||||
} else if (backrefNum) {
|
||||
throw new Error(`Invalid decimal escape "${m}" with implicit flag n; replace with named backreference`);
|
||||
} else {
|
||||
transformed += m;
|
||||
}
|
||||
} else {
|
||||
transformed += m;
|
||||
}
|
||||
}
|
||||
return {
|
||||
transformed,
|
||||
runningContext,
|
||||
};
|
||||
}
|
||||
|
||||
export {
|
||||
flagNPreprocessor,
|
||||
};
|
195
node_modules/regex/src/flag-x.js
generated
vendored
Normal file
195
node_modules/regex/src/flag-x.js
generated
vendored
Normal file
@@ -0,0 +1,195 @@
|
||||
import {emulationGroupMarker} from './subclass.js';
|
||||
import {CharClassContext, doublePunctuatorChars, getEndContextForIncompleteExpression, RegexContext, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
|
||||
import {noncapturingDelim} from './utils-internals.js';
|
||||
import {Context, replaceUnescaped} from 'regex-utilities';
|
||||
|
||||
const ws = /^\s$/;
|
||||
const escapedWsOrHash = /^\\[\s#]$/;
|
||||
const charClassWs = /^[ \t]$/;
|
||||
const escapedCharClassWs = /^\\[ \t]$/;
|
||||
const token = new RegExp(String.raw`
|
||||
\\(?: [gk]<
|
||||
| [pPu]\{
|
||||
| c[A-Za-z]
|
||||
| u[A-Fa-f\d]{4}
|
||||
| x[A-Fa-f\d]{2}
|
||||
| 0\d+
|
||||
)
|
||||
| \[\^
|
||||
| ${noncapturingDelim}
|
||||
| \(\?<
|
||||
| (?<dp>[${doublePunctuatorChars}])\k<dp>
|
||||
| --
|
||||
| \\?.
|
||||
`.replace(/\s+/g, ''), 'gsu');
|
||||
|
||||
/**
|
||||
Apply transformations for flag x (insignificant whitespace and line comments).
|
||||
|
||||
Preprocessors are applied to the outer regex and interpolated patterns, but not interpolated
|
||||
regexes or strings.
|
||||
@type {import('./utils.js').Preprocessor}
|
||||
*/
|
||||
function flagXPreprocessor(value, runningContext, options) {
|
||||
value = String(value);
|
||||
let ignoringWs = false;
|
||||
let ignoringCharClassWs = false;
|
||||
let ignoringComment = false;
|
||||
let expression = '';
|
||||
let transformed = '';
|
||||
let lastSignificantToken = '';
|
||||
let lastSignificantCharClassContext = '';
|
||||
let separatorNeeded = false;
|
||||
const update = (str, options) => {
|
||||
const opts = {
|
||||
prefix: true,
|
||||
postfix: false,
|
||||
...options,
|
||||
};
|
||||
str = (separatorNeeded && opts.prefix ? '(?:)' : '') + str + (opts.postfix ? '(?:)' : '');
|
||||
separatorNeeded = false;
|
||||
return str;
|
||||
};
|
||||
for (const {0: m, index} of value.matchAll(token)) {
|
||||
if (ignoringComment) {
|
||||
if (m === '\n') {
|
||||
ignoringComment = false;
|
||||
separatorNeeded = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (ignoringWs) {
|
||||
if (ws.test(m)) {
|
||||
continue;
|
||||
}
|
||||
ignoringWs = false;
|
||||
separatorNeeded = true;
|
||||
} else if (ignoringCharClassWs) {
|
||||
if (charClassWs.test(m)) {
|
||||
continue;
|
||||
}
|
||||
ignoringCharClassWs = false;
|
||||
}
|
||||
|
||||
expression += m;
|
||||
runningContext = getEndContextForIncompleteExpression(expression, runningContext);
|
||||
const {regexContext, charClassContext} = runningContext;
|
||||
if (
|
||||
// `--` is matched in one step, so boundary chars aren't `-` unless separated by whitespace
|
||||
m === '-' &&
|
||||
regexContext === RegexContext.CHAR_CLASS &&
|
||||
lastSignificantCharClassContext === CharClassContext.RANGE &&
|
||||
(options.flags.includes('v') || options.unicodeSetsPlugin)
|
||||
) {
|
||||
// Need to handle this here since the main regex-parsing code would think the hyphen forms
|
||||
// part of a subtraction operator since we've removed preceding whitespace
|
||||
throw new Error('Invalid unescaped hyphen as the end value for a range');
|
||||
}
|
||||
if (
|
||||
// `??` is matched in one step by the double punctuator token
|
||||
(regexContext === RegexContext.DEFAULT && /^(?:[?*+]|\?\?)$/.test(m)) ||
|
||||
(regexContext === RegexContext.INTERVAL_QUANTIFIER && m === '{')
|
||||
) {
|
||||
// Skip the separator prefix and connect the quantifier to the previous token. This also
|
||||
// allows whitespace between a quantifier and the `?` that makes it lazy. Add a postfix
|
||||
// separator if `m` is `?` and we're following token `(`, to sandbox the `?` from following
|
||||
// tokens (since `?` can be a group-type marker). Ex: `( ?:)` becomes `(?(?:):)` and throws.
|
||||
// The loop we're in matches valid group openings in one step, so we won't arrive here if
|
||||
// matching e.g. `(?:`. Flag n could prevent the need for the postfix since bare `(` is
|
||||
// converted to `(?:`, but flag x handling always comes first and flag n can be turned off
|
||||
transformed += update(m, {prefix: false, postfix: lastSignificantToken === '(' && m === '?'});
|
||||
} else if (regexContext === RegexContext.DEFAULT) {
|
||||
if (ws.test(m)) {
|
||||
ignoringWs = true;
|
||||
} else if (m.startsWith('#')) {
|
||||
ignoringComment = true;
|
||||
} else if (escapedWsOrHash.test(m)) {
|
||||
transformed += update(m[1], {prefix: false});
|
||||
} else {
|
||||
transformed += update(m);
|
||||
}
|
||||
} else if (regexContext === RegexContext.CHAR_CLASS && m !== '[' && m !== '[^') {
|
||||
if (
|
||||
charClassWs.test(m) &&
|
||||
( charClassContext === CharClassContext.DEFAULT ||
|
||||
charClassContext === CharClassContext.ENCLOSED_Q ||
|
||||
charClassContext === CharClassContext.RANGE
|
||||
)
|
||||
) {
|
||||
ignoringCharClassWs = true;
|
||||
} else if (charClassContext === CharClassContext.INVALID_INCOMPLETE_TOKEN) {
|
||||
// Need to handle this here since the main regex-parsing code wouldn't know where the token
|
||||
// ends if we removed whitespace after an incomplete token that is followed by something
|
||||
// that completes the token
|
||||
throw new Error(`Invalid incomplete token in character class: "${m}"`);
|
||||
} else if (
|
||||
escapedCharClassWs.test(m) &&
|
||||
(charClassContext === CharClassContext.DEFAULT || charClassContext === CharClassContext.ENCLOSED_Q)
|
||||
) {
|
||||
transformed += update(m[1], {prefix: false});
|
||||
} else if (charClassContext === CharClassContext.DEFAULT) {
|
||||
const nextChar = value[index + 1] ?? '';
|
||||
let updated = sandboxUnsafeNulls(m);
|
||||
// Avoid escaping lone double punctuators unless required, since some of them are not
|
||||
// allowed to be escaped with flag u (the `unicodeSetsPlugin` already unescapes them when
|
||||
// using flag u, but it can be set to `null` via an option)
|
||||
if (charClassWs.test(nextChar) || m === '^') {
|
||||
updated = sandboxLoneDoublePunctuatorChar(updated);
|
||||
}
|
||||
transformed += update(updated);
|
||||
} else {
|
||||
transformed += update(m);
|
||||
}
|
||||
} else {
|
||||
transformed += update(m);
|
||||
}
|
||||
if (!(ignoringWs || ignoringCharClassWs || ignoringComment)) {
|
||||
lastSignificantToken = m;
|
||||
lastSignificantCharClassContext = charClassContext;
|
||||
}
|
||||
}
|
||||
return {
|
||||
transformed,
|
||||
runningContext,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
Remove `(?:)` token separators (most likely added by flag x) in cases where it's safe to do so.
|
||||
@param {string} expression
|
||||
@returns {string}
|
||||
*/
|
||||
function clean(expression) {
|
||||
const sep = String.raw`\(\?:\)`;
|
||||
// No need for repeated separators
|
||||
expression = replaceUnescaped(expression, `(?:${sep}){2,}`, '(?:)', Context.DEFAULT);
|
||||
// No need for separators at:
|
||||
// - The beginning, if not followed by a quantifier.
|
||||
// - The end.
|
||||
// - Outside of character classes:
|
||||
// - If followed by one of `)|.[$\\`, or `(` if that's not followed by `DEFINE)`.
|
||||
// - Technically we shouldn't remove `(?:)` if preceded by `(?(DEFINE` and followed by `)`,
|
||||
// but in this case flag x injects a sandboxing `(?:)` after the preceding invalid `(?`,
|
||||
// so we already get an error from that.
|
||||
// - If preceded by one of `()|.]^>`, `\\[bBdDfnrsStvwW]`, `(?:`, or a lookaround opening.
|
||||
// - So long as the separator is not followed by a quantifier.
|
||||
// - And, not followed by an emulation group marker.
|
||||
// Examples of things that are not safe to remove `(?:)` at the boundaries of:
|
||||
// - Anywhere: Letters, numbers, or any of `-=_,<?*+{}`.
|
||||
// - If followed by any of `:!>`.
|
||||
// - If preceded by any of `\\[cgkpPux]`.
|
||||
// - Anything inside character classes.
|
||||
const marker = emulationGroupMarker.replace(/\$/g, '\\$');
|
||||
expression = replaceUnescaped(
|
||||
expression,
|
||||
String.raw`(?:${sep}(?=[)|.[$\\]|\((?!DEFINE)|$)|(?<=[()|.\]^>]|\\[bBdDfnrsStvwW]|\(\?(?:[:=!]|<[=!])|^)${sep}(?![?*+{]))(?!${marker})`,
|
||||
'',
|
||||
Context.DEFAULT
|
||||
);
|
||||
return expression;
|
||||
}
|
||||
|
||||
export {
|
||||
clean,
|
||||
flagXPreprocessor,
|
||||
};
|
2
node_modules/regex/src/internals.js
generated
vendored
Normal file
2
node_modules/regex/src/internals.js
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export {atomic, possessive} from './atomic.js';
|
||||
export {emulationGroupMarker, RegExpSubclass} from './subclass.js';
|
45
node_modules/regex/src/pattern.js
generated
vendored
Normal file
45
node_modules/regex/src/pattern.js
generated
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
class Pattern {
|
||||
#value;
|
||||
/** @param {string} value */
|
||||
constructor(value) {
|
||||
this.#value = value;
|
||||
}
|
||||
/** @returns {string} */
|
||||
toString() {
|
||||
return String(this.#value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
Returns a value that can be interpolated into a `regex` template string without having its special
|
||||
characters escaped.
|
||||
|
||||
Can be called as a function or template tag:
|
||||
- `pattern(value)` - String or value coerced to string.
|
||||
- `` pattern`…` `` - Same as ``pattern(String.raw`…`)``.
|
||||
|
||||
@overload
|
||||
@param {string | number} value
|
||||
@returns {Pattern}
|
||||
|
||||
@overload
|
||||
@param {TemplateStringsArray} template
|
||||
@param {...string} substitutions
|
||||
@returns {Pattern}
|
||||
*/
|
||||
function pattern(first, ...substitutions) {
|
||||
if (Array.isArray(first?.raw)) {
|
||||
return new Pattern(
|
||||
// Intersperse raw template strings and substitutions
|
||||
first.raw.flatMap((raw, i) => i < first.raw.length - 1 ? [raw, substitutions[i]] : raw).join('')
|
||||
);
|
||||
} else if (!substitutions.length) {
|
||||
return new Pattern(first === undefined ? '' : first);
|
||||
}
|
||||
throw new Error(`Unexpected arguments: ${JSON.stringify([first, ...substitutions])}`);
|
||||
}
|
||||
|
||||
export {
|
||||
Pattern,
|
||||
pattern,
|
||||
};
|
352
node_modules/regex/src/regex.js
generated
vendored
Normal file
352
node_modules/regex/src/regex.js
generated
vendored
Normal file
@@ -0,0 +1,352 @@
|
||||
import {atomic, possessive} from './atomic.js';
|
||||
import {backcompatPlugin} from './backcompat.js';
|
||||
import {flagNPreprocessor} from './flag-n.js';
|
||||
import {clean, flagXPreprocessor} from './flag-x.js';
|
||||
import {Pattern, pattern} from './pattern.js';
|
||||
import {RegExpSubclass} from './subclass.js';
|
||||
import {subroutines} from './subroutines.js';
|
||||
import {adjustNumberedBackrefs, CharClassContext, containsCharClassUnion, countCaptures, enclosedTokenCharClassContexts, enclosedTokenRegexContexts, envSupportsFlagGroups, envSupportsFlagV, escapeV, getBreakoutChar, getEndContextForIncompleteExpression, preprocess, RegexContext, sandboxLoneCharClassCaret, sandboxLoneDoublePunctuatorChar, sandboxUnsafeNulls} from './utils.js';
|
||||
import {Context, hasUnescaped, replaceUnescaped} from 'regex-utilities';
|
||||
|
||||
/**
|
||||
@typedef {string | RegExp | Pattern | number} InterpolatedValue
|
||||
@typedef {{
|
||||
flags?: string;
|
||||
useEmulationGroups?: boolean;
|
||||
}} PluginData
|
||||
@typedef {TemplateStringsArray | {raw: Array<string>}} RawTemplate
|
||||
@typedef {{
|
||||
flags?: string;
|
||||
subclass?: boolean;
|
||||
plugins?: Array<(expression: string, data: PluginData) => string>;
|
||||
unicodeSetsPlugin?: ((expression: string, data: PluginData) => string) | null;
|
||||
disable?: {
|
||||
x?: boolean;
|
||||
n?: boolean;
|
||||
v?: boolean;
|
||||
atomic?: boolean;
|
||||
subroutines?: boolean;
|
||||
};
|
||||
force?: {
|
||||
v?: boolean;
|
||||
};
|
||||
}} RegexTagOptions
|
||||
*/
|
||||
/**
|
||||
@template T
|
||||
@typedef RegexTag
|
||||
@type {{
|
||||
(template: RawTemplate, ...substitutions: ReadonlyArray<InterpolatedValue>): T;
|
||||
(flags?: string): RegexTag<T>;
|
||||
(options: RegexTagOptions & {subclass?: false}): RegexTag<T>;
|
||||
(options: RegexTagOptions & {subclass: true}): RegexTag<RegExpSubclass>;
|
||||
}}
|
||||
*/
|
||||
/**
|
||||
Template tag for constructing a regex with extended syntax and context-aware interpolation of
|
||||
regexes, strings, and patterns.
|
||||
|
||||
Can be called in several ways:
|
||||
1. `` regex`…` `` - Regex pattern as a raw string.
|
||||
2. `` regex('gi')`…` `` - To specify flags.
|
||||
3. `` regex({flags: 'gi'})`…` `` - With options.
|
||||
@type {RegexTag<RegExp>}
|
||||
*/
|
||||
const regex = (first, ...substitutions) => {
|
||||
// Given a template
|
||||
if (Array.isArray(first?.raw)) {
|
||||
return regexFromTemplate({}, first, ...substitutions);
|
||||
// Given flags
|
||||
} else if ((typeof first === 'string' || first === undefined) && !substitutions.length) {
|
||||
return regexFromTemplate.bind(null, {flags: first ?? ''});
|
||||
// Given an options object
|
||||
} else if ({}.toString.call(first) === '[object Object]' && !substitutions.length) {
|
||||
return regexFromTemplate.bind(null, first);
|
||||
}
|
||||
throw new Error(`Unexpected arguments: ${JSON.stringify([first, ...substitutions])}`);
|
||||
};
|
||||
|
||||
/**
|
||||
@template T
|
||||
@typedef RegexFromTemplate
|
||||
@type {{
|
||||
(options: RegexTagOptions, template: RawTemplate, ...substitutions: ReadonlyArray<InterpolatedValue>) : T;
|
||||
}}
|
||||
*/
|
||||
/**
|
||||
Returns a RegExp from a template and substitutions to fill the template holes.
|
||||
@type {RegexFromTemplate<RegExp>}
|
||||
*/
|
||||
const regexFromTemplate = (options, template, ...substitutions) => {
|
||||
const opts = getOptions(options);
|
||||
const prepped = handlePreprocessors(template, substitutions, opts);
|
||||
|
||||
let precedingCaptures = 0;
|
||||
let expression = '';
|
||||
let runningContext;
|
||||
// Intersperse raw template strings and substitutions
|
||||
prepped.template.raw.forEach((raw, i) => {
|
||||
const wrapEscapedStr = !!(prepped.template.raw[i] || prepped.template.raw[i + 1]);
|
||||
// Even with flag n enabled, we might have named captures
|
||||
precedingCaptures += countCaptures(raw);
|
||||
// Sandbox `\0` in character classes. Not needed outside character classes because in other
|
||||
// cases a following interpolated value would always be atomized
|
||||
expression += sandboxUnsafeNulls(raw, Context.CHAR_CLASS);
|
||||
runningContext = getEndContextForIncompleteExpression(expression, runningContext);
|
||||
const {regexContext, charClassContext} = runningContext;
|
||||
if (i < prepped.template.raw.length - 1) {
|
||||
const substitution = prepped.substitutions[i];
|
||||
expression += interpolate(substitution, opts.flags, regexContext, charClassContext, wrapEscapedStr, precedingCaptures);
|
||||
if (substitution instanceof RegExp) {
|
||||
precedingCaptures += countCaptures(substitution.source);
|
||||
} else if (substitution instanceof Pattern) {
|
||||
precedingCaptures += countCaptures(String(substitution));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
expression = handlePlugins(expression, opts);
|
||||
try {
|
||||
return opts.subclass ?
|
||||
new RegExpSubclass(expression, opts.flags, {useEmulationGroups: true}) :
|
||||
new RegExp(expression, opts.flags);
|
||||
} catch (err) {
|
||||
// Improve DX by always including the generated source in the error message. Some browsers
|
||||
// include it automatically, but not Firefox or Safari
|
||||
const stripped = err.message.replace(/ \/.+\/[a-z]*:/, '');
|
||||
err.message = `${stripped}: /${expression}/${opts.flags}`;
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
Returns the processed expression and flags as strings.
|
||||
@param {string} expression
|
||||
@param {RegexTagOptions} [options]
|
||||
@returns {{expression: string; flags: string;}}
|
||||
*/
|
||||
function rewrite(expression = '', options) {
|
||||
const opts = getOptions(options);
|
||||
if (opts.subclass) {
|
||||
// Don't allow including emulation group markers in output
|
||||
throw new Error('Cannot use option subclass');
|
||||
}
|
||||
return {
|
||||
expression: handlePlugins(
|
||||
handlePreprocessors({raw: [expression]}, [], opts).template.raw[0],
|
||||
opts
|
||||
),
|
||||
flags: opts.flags,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
Returns a complete set of options, with default values set for options that weren't provided, and
|
||||
some options augmented for use.
|
||||
@param {RegexTagOptions} [options]
|
||||
@returns {Required<RegexTagOptions>}
|
||||
*/
|
||||
function getOptions(options) {
|
||||
const opts = {
|
||||
flags: '',
|
||||
subclass: false,
|
||||
plugins: [],
|
||||
unicodeSetsPlugin: backcompatPlugin,
|
||||
disable: {/* n, v, x, atomic, subroutines */},
|
||||
force: {/* v */},
|
||||
...options,
|
||||
};
|
||||
if (/[nuvx]/.test(opts.flags)) {
|
||||
throw new Error('Implicit flags v/u/x/n cannot be explicitly added');
|
||||
}
|
||||
const useFlagV = opts.force.v || (opts.disable.v ? false : envSupportsFlagV);
|
||||
opts.flags += useFlagV ? 'v' : 'u';
|
||||
if (useFlagV) {
|
||||
opts.unicodeSetsPlugin = null;
|
||||
}
|
||||
return opts;
|
||||
}
|
||||
|
||||
/**
|
||||
@param {RawTemplate} template
|
||||
@param {ReadonlyArray<InterpolatedValue>} substitutions
|
||||
@param {Required<RegexTagOptions>} options
|
||||
@returns {{
|
||||
template: RawTemplate;
|
||||
substitutions: ReadonlyArray<InterpolatedValue>;
|
||||
}}
|
||||
*/
|
||||
function handlePreprocessors(template, substitutions, options) {
|
||||
const preprocessors = [];
|
||||
// Implicit flag x is handled first because otherwise some regex syntax (if unescaped) within
|
||||
// comments could cause problems when parsing
|
||||
if (!options.disable.x) {
|
||||
preprocessors.push(flagXPreprocessor);
|
||||
}
|
||||
// Implicit flag n is a preprocessor because capturing groups affect backreference rewriting in
|
||||
// both interpolation and plugins
|
||||
if (!options.disable.n) {
|
||||
preprocessors.push(flagNPreprocessor);
|
||||
}
|
||||
for (const pp of preprocessors) {
|
||||
({template, substitutions} = preprocess(template, substitutions, pp, options));
|
||||
}
|
||||
return {
|
||||
template,
|
||||
substitutions,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@param {Required<RegexTagOptions>} options
|
||||
@returns {string}
|
||||
*/
|
||||
function handlePlugins(expression, options) {
|
||||
const {flags, plugins, unicodeSetsPlugin, disable, subclass} = options;
|
||||
[ ...plugins, // Run first, so provided plugins can output extended syntax
|
||||
...(disable.subroutines ? [] : [subroutines]),
|
||||
...(disable.atomic ? [] : [possessive, atomic]),
|
||||
...(disable.x ? [] : [clean]),
|
||||
// Run last, so it doesn't have to worry about parsing extended syntax
|
||||
...(!unicodeSetsPlugin ? [] : [unicodeSetsPlugin]),
|
||||
].forEach(p => expression = p(expression, {flags, useEmulationGroups: subclass}));
|
||||
return expression;
|
||||
}
|
||||
|
||||
/**
|
||||
@param {InterpolatedValue} value
|
||||
@param {string} flags
|
||||
@param {string} regexContext
|
||||
@param {string} charClassContext
|
||||
@param {boolean} wrapEscapedStr
|
||||
@param {number} precedingCaptures
|
||||
@returns {string}
|
||||
*/
|
||||
function interpolate(value, flags, regexContext, charClassContext, wrapEscapedStr, precedingCaptures) {
|
||||
if (value instanceof RegExp && regexContext !== RegexContext.DEFAULT) {
|
||||
throw new Error('Cannot interpolate a RegExp at this position because the syntax context does not match');
|
||||
}
|
||||
if (regexContext === RegexContext.INVALID_INCOMPLETE_TOKEN || charClassContext === CharClassContext.INVALID_INCOMPLETE_TOKEN) {
|
||||
// Throw in all cases, but only *need* to handle a preceding unescaped backslash (which would
|
||||
// break sandboxing) since other errors would be handled by the invalid generated regex syntax
|
||||
throw new Error('Interpolation preceded by invalid incomplete token');
|
||||
}
|
||||
if (
|
||||
typeof value === 'number' &&
|
||||
(regexContext === RegexContext.ENCLOSED_U || charClassContext === CharClassContext.ENCLOSED_U)
|
||||
) {
|
||||
return value.toString(16);
|
||||
}
|
||||
const isPattern = value instanceof Pattern;
|
||||
let escapedValue = '';
|
||||
if (!(value instanceof RegExp)) {
|
||||
value = String(value);
|
||||
if (!isPattern) {
|
||||
escapedValue = escapeV(
|
||||
value,
|
||||
regexContext === RegexContext.CHAR_CLASS ? Context.CHAR_CLASS : Context.DEFAULT
|
||||
);
|
||||
}
|
||||
// Check `escapedValue` (not just patterns) since possible breakout char `>` isn't escaped
|
||||
const breakoutChar = getBreakoutChar(escapedValue || value, regexContext, charClassContext);
|
||||
if (breakoutChar) {
|
||||
throw new Error(`Unescaped stray "${breakoutChar}" in the interpolated value would have side effects outside it`);
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
regexContext === RegexContext.INTERVAL_QUANTIFIER ||
|
||||
regexContext === RegexContext.GROUP_NAME ||
|
||||
enclosedTokenRegexContexts.has(regexContext) ||
|
||||
enclosedTokenCharClassContexts.has(charClassContext)
|
||||
) {
|
||||
return isPattern ? String(value) : escapedValue;
|
||||
} else if (regexContext === RegexContext.CHAR_CLASS) {
|
||||
if (isPattern) {
|
||||
if (hasUnescaped(String(value), '^-|^&&|-$|&&$')) {
|
||||
// Sandboxing so we don't change the chars outside the pattern into being part of an
|
||||
// operation they didn't initiate. Same problem as starting a pattern with a quantifier
|
||||
throw new Error('Cannot use range or set operator at boundary of interpolated pattern; move the operation into the pattern or the operator outside of it');
|
||||
}
|
||||
const sandboxedValue = sandboxLoneCharClassCaret(sandboxLoneDoublePunctuatorChar(value));
|
||||
// Atomize via nested character class `[…]` if it contains implicit or explicit union (check
|
||||
// the unadjusted value)
|
||||
return containsCharClassUnion(value) ? `[${sandboxedValue}]` : sandboxUnsafeNulls(sandboxedValue);
|
||||
}
|
||||
// Atomize via nested character class `[…]` if more than one node
|
||||
return containsCharClassUnion(escapedValue) ? `[${escapedValue}]` : escapedValue;
|
||||
}
|
||||
// `RegexContext.DEFAULT`
|
||||
if (value instanceof RegExp) {
|
||||
const transformed = transformForLocalFlags(value, flags);
|
||||
const backrefsAdjusted = adjustNumberedBackrefs(transformed.value, precedingCaptures);
|
||||
// Sandbox and atomize; if we used a pattern modifier it has the same effect
|
||||
return transformed.usedModifier ? backrefsAdjusted : `(?:${backrefsAdjusted})`;
|
||||
}
|
||||
if (isPattern) {
|
||||
// Sandbox and atomize
|
||||
return `(?:${value})`;
|
||||
}
|
||||
// Sandbox and atomize
|
||||
return wrapEscapedStr ? `(?:${escapedValue})` : escapedValue;
|
||||
}
|
||||
|
||||
/**
|
||||
@param {RegExp} re
|
||||
@param {string} outerFlags
|
||||
@returns {{value: string; usedModifier?: boolean;}}
|
||||
*/
|
||||
function transformForLocalFlags(re, outerFlags) {
|
||||
/** @type {{i: boolean | null; m: boolean | null; s: boolean | null;}} */
|
||||
const modFlagsObj = {
|
||||
i: null,
|
||||
m: null,
|
||||
s: null,
|
||||
};
|
||||
const newlines = '\\n\\r\\u2028\\u2029';
|
||||
let value = re.source;
|
||||
if (re.ignoreCase !== outerFlags.includes('i')) {
|
||||
if (envSupportsFlagGroups) {
|
||||
modFlagsObj.i = re.ignoreCase;
|
||||
} else {
|
||||
throw new Error('Pattern modifiers not supported, so flag i on the outer and interpolated regex must match');
|
||||
}
|
||||
}
|
||||
if (re.dotAll !== outerFlags.includes('s')) {
|
||||
if (envSupportsFlagGroups) {
|
||||
modFlagsObj.s = re.dotAll;
|
||||
} else {
|
||||
value = replaceUnescaped(value, '\\.', (re.dotAll ? '[^]' : `[^${newlines}]`), Context.DEFAULT);
|
||||
}
|
||||
}
|
||||
if (re.multiline !== outerFlags.includes('m')) {
|
||||
if (envSupportsFlagGroups) {
|
||||
modFlagsObj.m = re.multiline;
|
||||
} else {
|
||||
value = replaceUnescaped(value, '\\^', (re.multiline ? `(?<=^|[${newlines}])` : '(?<![^])'), Context.DEFAULT);
|
||||
value = replaceUnescaped(value, '\\$', (re.multiline ? `(?=$|[${newlines}])` : '(?![^])'), Context.DEFAULT);
|
||||
}
|
||||
}
|
||||
if (envSupportsFlagGroups) {
|
||||
const keys = Object.keys(modFlagsObj);
|
||||
let modifier = keys.filter(k => modFlagsObj[k] === true).join('');
|
||||
const modOff = keys.filter(k => modFlagsObj[k] === false).join('');
|
||||
if (modOff) {
|
||||
modifier += `-${modOff}`;
|
||||
}
|
||||
if (modifier) {
|
||||
return {
|
||||
value: `(?${modifier}:${value})`,
|
||||
usedModifier: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
return {value};
|
||||
}
|
||||
|
||||
export {
|
||||
pattern,
|
||||
regex,
|
||||
rewrite,
|
||||
};
|
147
node_modules/regex/src/subclass.js
generated
vendored
Normal file
147
node_modules/regex/src/subclass.js
generated
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
import {Context, replaceUnescaped} from 'regex-utilities';
|
||||
|
||||
// This marker was chosen because it's impossible to match (so its extemely unlikely to be used in
|
||||
// a user-provided regex); it's not at risk of being optimized away, transformed, or flagged as an
|
||||
// error by a plugin; and it ends with an unquantifiable token
|
||||
const emulationGroupMarker = '$E$';
|
||||
// Note: Emulation groups with transfer are also supported. They look like `($N$E$…)` where `N` is
|
||||
// an integer 1 or greater. They're not used directly by Regex+ but can be used by plugins and
|
||||
// libraries that use Regex+ internals. Emulation groups with transfer are not only excluded from
|
||||
// match results, but additionally transfer their match to the group specified by `N`
|
||||
|
||||
/**
|
||||
Works the same as JavaScript's native `RegExp` constructor in all contexts, but automatically
|
||||
adjusts matches and subpattern indices (with flag `d`) to account for injected emulation groups.
|
||||
*/
|
||||
class RegExpSubclass extends RegExp {
|
||||
// Avoid `#private` to allow for subclassing
|
||||
/**
|
||||
@private
|
||||
@type {Array<{
|
||||
exclude: boolean;
|
||||
transfer?: number;
|
||||
}> | undefined}
|
||||
*/
|
||||
_captureMap;
|
||||
/**
|
||||
@private
|
||||
@type {Record<number, string> | undefined}
|
||||
*/
|
||||
_namesByIndex;
|
||||
/**
|
||||
@param {string | RegExpSubclass} expression
|
||||
@param {string} [flags]
|
||||
@param {{useEmulationGroups: boolean;}} [options]
|
||||
*/
|
||||
constructor(expression, flags, options) {
|
||||
if (expression instanceof RegExp && options) {
|
||||
throw new Error('Cannot provide options when copying a regexp');
|
||||
}
|
||||
const useEmulationGroups = !!options?.useEmulationGroups;
|
||||
const unmarked = useEmulationGroups ? unmarkEmulationGroups(expression) : null;
|
||||
super(unmarked?.expression || expression, flags);
|
||||
// The third argument `options` isn't provided when regexes are copied as part of the internal
|
||||
// handling of string methods `matchAll` and `split`
|
||||
const src = useEmulationGroups ? unmarked : (expression instanceof RegExpSubclass ? expression : null);
|
||||
if (src) {
|
||||
this._captureMap = src._captureMap;
|
||||
this._namesByIndex = src._namesByIndex;
|
||||
}
|
||||
}
|
||||
/**
|
||||
Called internally by all String/RegExp methods that use regexes.
|
||||
@override
|
||||
@param {string} str
|
||||
@returns {RegExpExecArray | null}
|
||||
*/
|
||||
exec(str) {
|
||||
const match = RegExp.prototype.exec.call(this, str);
|
||||
if (!match || !this._captureMap) {
|
||||
return match;
|
||||
}
|
||||
const matchCopy = [...match];
|
||||
// Empty all but the first value of the array while preserving its other properties
|
||||
match.length = 1;
|
||||
let indicesCopy;
|
||||
if (this.hasIndices) {
|
||||
indicesCopy = [...match.indices];
|
||||
match.indices.length = 1;
|
||||
}
|
||||
for (let i = 1; i < matchCopy.length; i++) {
|
||||
if (this._captureMap[i].exclude) {
|
||||
const transfer = this._captureMap[i].transfer;
|
||||
if (transfer && match.length > transfer) {
|
||||
match[transfer] = matchCopy[i];
|
||||
const transferName = this._namesByIndex[transfer];
|
||||
if (transferName) {
|
||||
match.groups[transferName] = matchCopy[i];
|
||||
if (this.hasIndices) {
|
||||
match.indices.groups[transferName] = indicesCopy[i];
|
||||
}
|
||||
}
|
||||
if (this.hasIndices) {
|
||||
match.indices[transfer] = indicesCopy[i];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match.push(matchCopy[i]);
|
||||
if (this.hasIndices) {
|
||||
match.indices.push(indicesCopy[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
Build the capturing group map (with emulation groups marked to indicate their submatches shouldn't
|
||||
appear in results), and remove the markers for captures that were added to emulate extended syntax.
|
||||
@param {string} expression
|
||||
@returns {{
|
||||
_captureMap: Array<{
|
||||
exclude: boolean;
|
||||
transfer?: number;
|
||||
}>;
|
||||
_namesByIndex: Record<number, string>;
|
||||
expression: string;
|
||||
}}
|
||||
*/
|
||||
function unmarkEmulationGroups(expression) {
|
||||
const marker = emulationGroupMarker.replace(/\$/g, '\\$');
|
||||
const _captureMap = [{exclude: false}];
|
||||
const _namesByIndex = {0: ''};
|
||||
let realCaptureNum = 0;
|
||||
expression = replaceUnescaped(
|
||||
expression,
|
||||
String.raw`\((?:(?!\?)|\?<(?![=!])(?<name>[^>]+)>)(?<mark>(?:\$(?<transfer>[1-9]\d*))?${marker})?`,
|
||||
({0: m, groups: {name, mark, transfer}}) => {
|
||||
if (mark) {
|
||||
_captureMap.push({
|
||||
exclude: true,
|
||||
transfer: transfer && +transfer,
|
||||
});
|
||||
return m.slice(0, -mark.length);
|
||||
}
|
||||
realCaptureNum++;
|
||||
if (name) {
|
||||
_namesByIndex[realCaptureNum] = name;
|
||||
}
|
||||
_captureMap.push({
|
||||
exclude: false,
|
||||
});
|
||||
return m;
|
||||
},
|
||||
Context.DEFAULT
|
||||
);
|
||||
return {
|
||||
_captureMap,
|
||||
_namesByIndex,
|
||||
expression,
|
||||
};
|
||||
}
|
||||
|
||||
export {
|
||||
emulationGroupMarker,
|
||||
RegExpSubclass,
|
||||
};
|
341
node_modules/regex/src/subroutines.js
generated
vendored
Normal file
341
node_modules/regex/src/subroutines.js
generated
vendored
Normal file
@@ -0,0 +1,341 @@
|
||||
import {emulationGroupMarker} from './subclass.js';
|
||||
import {capturingDelim, countCaptures, namedCapturingDelim} from './utils.js';
|
||||
import {spliceStr} from './utils-internals.js';
|
||||
import {Context, execUnescaped, forEachUnescaped, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities';
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@param {import('./regex.js').PluginData} [data]
|
||||
@returns {string}
|
||||
*/
|
||||
function subroutines(expression, data) {
|
||||
// NOTE: subroutines and definition groups fully support numbered backreferences and unnamed
|
||||
// captures (from interpolated regexes or from turning implicit flag n off), and all of the
|
||||
// complex forward and backward backreference adjustments that can result
|
||||
const namedGroups = getNamedCapturingGroups(expression, {includeContents: true});
|
||||
const transformed = processSubroutines(expression, namedGroups, !!data?.useEmulationGroups);
|
||||
return processDefinitionGroup(transformed, namedGroups);
|
||||
}
|
||||
|
||||
// Explicitly exclude `&` from subroutine name chars because it's used by extension
|
||||
// `regex-recursion` for recursive subroutines via `\g<name&R=N>`
|
||||
const subroutinePattern = String.raw`\\g<(?<subroutineName>[^>&]+)>`;
|
||||
const token = new RegExp(String.raw`
|
||||
${subroutinePattern}
|
||||
| (?<capturingStart>${capturingDelim})
|
||||
| \\(?<backrefNum>[1-9]\d*)
|
||||
| \\k<(?<backrefName>[^>]+)>
|
||||
| \\?.
|
||||
`.replace(/\s+/g, ''), 'gsu');
|
||||
|
||||
/**
|
||||
@typedef {
|
||||
Map<string, {
|
||||
isUnique: boolean;
|
||||
contents?: string;
|
||||
groupNum?: number;
|
||||
numCaptures?: number;
|
||||
}>} NamedCapturingGroupsMap
|
||||
*/
|
||||
/**
|
||||
Apply transformations for subroutines: `\g<name>`.
|
||||
@param {string} expression
|
||||
@param {NamedCapturingGroupsMap} namedGroups
|
||||
@param {boolean} useEmulationGroups
|
||||
@returns {string}
|
||||
*/
|
||||
function processSubroutines(expression, namedGroups, useEmulationGroups) {
|
||||
if (!/\\g</.test(expression)) {
|
||||
return expression;
|
||||
}
|
||||
// Can skip a lot of processing and avoid adding captures if there are no backrefs
|
||||
const hasBackrefs = hasUnescaped(expression, '\\\\(?:[1-9]|k<[^>]+>)', Context.DEFAULT);
|
||||
const subroutineWrapper = hasBackrefs ? `(${useEmulationGroups ? emulationGroupMarker : ''}` : '(?:';
|
||||
const openSubroutines = new Map();
|
||||
const openSubroutinesStack = [];
|
||||
const captureNumMap = [0];
|
||||
let numCapturesPassedOutsideSubroutines = 0;
|
||||
let numCapturesPassedInsideSubroutines = 0;
|
||||
let numCapturesPassedInsideThisSubroutine = 0;
|
||||
let numSubroutineCapturesTrackedInRemap = 0;
|
||||
let numCharClassesOpen = 0;
|
||||
let result = expression;
|
||||
let match;
|
||||
token.lastIndex = 0;
|
||||
while (match = token.exec(result)) {
|
||||
const {0: m, index, groups: {subroutineName, capturingStart, backrefNum, backrefName}} = match;
|
||||
if (m === '[') {
|
||||
numCharClassesOpen++;
|
||||
} else if (!numCharClassesOpen) {
|
||||
|
||||
if (subroutineName) {
|
||||
if (!namedGroups.has(subroutineName)) {
|
||||
throw new Error(`Invalid named capture referenced by subroutine ${m}`);
|
||||
}
|
||||
if (openSubroutines.has(subroutineName)) {
|
||||
throw new Error(`Subroutine ${m} followed a recursive reference`);
|
||||
}
|
||||
const contents = namedGroups.get(subroutineName).contents;
|
||||
// Wrap value in case it has top-level alternation or is followed by a quantifier. The
|
||||
// wrapper also marks the end of the expanded contents, which we'll track using
|
||||
// `unclosedGroupCount`. If there are any backrefs in the expression, wrap with `()`
|
||||
// instead of `(?:)` in case there are backrefs inside the subroutine that refer to their
|
||||
// containing capturing group
|
||||
const subroutineValue = `${subroutineWrapper}${contents})`;
|
||||
if (hasBackrefs) {
|
||||
numCapturesPassedInsideThisSubroutine = 0;
|
||||
numCapturesPassedInsideSubroutines++;
|
||||
}
|
||||
openSubroutines.set(subroutineName, {
|
||||
// Incrementally decremented to track when we've left the group
|
||||
unclosedGroupCount: countOpenParens(subroutineValue),
|
||||
});
|
||||
openSubroutinesStack.push(subroutineName);
|
||||
// Expand the subroutine's contents into the pattern we're looping over
|
||||
result = spliceStr(result, index, m, subroutineValue);
|
||||
token.lastIndex -= m.length - subroutineWrapper.length;
|
||||
} else if (capturingStart) {
|
||||
// Somewhere within an expanded subroutine
|
||||
if (openSubroutines.size) {
|
||||
if (hasBackrefs) {
|
||||
numCapturesPassedInsideThisSubroutine++;
|
||||
numCapturesPassedInsideSubroutines++;
|
||||
}
|
||||
// Named capturing group
|
||||
if (m !== '(') {
|
||||
// Replace named with unnamed capture. Subroutines ideally wouldn't create any new
|
||||
// captures, but it can't be helped since we need any backrefs to this capture to work.
|
||||
// Given that flag n prevents unnamed capture and thereby requires you to rely on named
|
||||
// backrefs and `groups`, switching to unnamed essentially accomplishes not creating a
|
||||
// capture. Can fully avoid capturing if there are no backrefs in the expression
|
||||
result = spliceStr(result, index, m, subroutineWrapper);
|
||||
token.lastIndex -= m.length - subroutineWrapper.length;
|
||||
}
|
||||
} else if (hasBackrefs) {
|
||||
captureNumMap.push(
|
||||
lastOf(captureNumMap) + 1 +
|
||||
numCapturesPassedInsideSubroutines -
|
||||
numSubroutineCapturesTrackedInRemap
|
||||
);
|
||||
numSubroutineCapturesTrackedInRemap = numCapturesPassedInsideSubroutines;
|
||||
numCapturesPassedOutsideSubroutines++;
|
||||
}
|
||||
} else if ((backrefNum || backrefName) && openSubroutines.size) {
|
||||
// Unify handling for named and unnamed by always using the backref num
|
||||
const num = backrefNum ? +backrefNum : namedGroups.get(backrefName)?.groupNum;
|
||||
let isGroupFromThisSubroutine = false;
|
||||
// Search for the group in the contents of the subroutine stack
|
||||
for (const s of openSubroutinesStack) {
|
||||
const group = namedGroups.get(s);
|
||||
if (num >= group.groupNum && num <= (group.groupNum + group.numCaptures)) {
|
||||
isGroupFromThisSubroutine = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isGroupFromThisSubroutine) {
|
||||
const group = namedGroups.get(lastOf(openSubroutinesStack));
|
||||
// Replace the backref with metadata we'll need to rewrite it later, using
|
||||
// `\k<$$bNsNrNcN>` as a temporary wrapper:
|
||||
// - b: The unmodified matched backref num, or the corresponding num of a named backref
|
||||
// - s: The capture num of the subroutine we're most deeply nested in, including captures
|
||||
// added by expanding the contents of preceding subroutines
|
||||
// - r: The original capture num of the group that the subroutine we're most deeply
|
||||
// nested in references, not counting the effects of subroutines
|
||||
// - c: The number of captures within `r`, not counting the effects of subroutines
|
||||
const subroutineNum = numCapturesPassedOutsideSubroutines + numCapturesPassedInsideSubroutines - numCapturesPassedInsideThisSubroutine;
|
||||
const metadata = `\\k<$$b${num}s${subroutineNum}r${group.groupNum}c${group.numCaptures}>`;
|
||||
result = spliceStr(result, index, m, metadata);
|
||||
token.lastIndex += metadata.length - m.length;
|
||||
}
|
||||
} else if (m === ')') {
|
||||
if (openSubroutines.size) {
|
||||
const subroutine = openSubroutines.get(lastOf(openSubroutinesStack));
|
||||
subroutine.unclosedGroupCount--;
|
||||
if (!subroutine.unclosedGroupCount) {
|
||||
openSubroutines.delete(openSubroutinesStack.pop());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else if (m === ']') {
|
||||
numCharClassesOpen--;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasBackrefs) {
|
||||
// Second pass to adjust backrefs
|
||||
result = replaceUnescaped(
|
||||
result,
|
||||
String.raw`\\(?:(?<bNum>[1-9]\d*)|k<\$\$b(?<bNumSub>\d+)s(?<subNum>\d+)r(?<refNum>\d+)c(?<refCaps>\d+)>)`,
|
||||
({0: m, groups: {bNum, bNumSub, subNum, refNum, refCaps}}) => {
|
||||
if (bNum) {
|
||||
const backrefNum = +bNum;
|
||||
if (backrefNum > captureNumMap.length - 1) {
|
||||
throw new Error(`Backref "${m}" greater than number of captures`);
|
||||
}
|
||||
return `\\${captureNumMap[backrefNum]}`;
|
||||
}
|
||||
const backrefNumInSubroutine = +bNumSub;
|
||||
const subroutineGroupNum = +subNum;
|
||||
const refGroupNum = +refNum;
|
||||
const numCapturesInRef = +refCaps;
|
||||
if (backrefNumInSubroutine < refGroupNum || backrefNumInSubroutine > (refGroupNum + numCapturesInRef)) {
|
||||
return `\\${captureNumMap[backrefNumInSubroutine]}`;
|
||||
}
|
||||
return `\\${subroutineGroupNum - refGroupNum + backrefNumInSubroutine}`;
|
||||
},
|
||||
Context.DEFAULT
|
||||
);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// `(?:)` allowed because it can be added by flag x's preprocessing of whitespace and comments
|
||||
const defineGroupToken = new RegExp(String.raw`${namedCapturingDelim}|\(\?:\)|(?<invalid>\\?.)`, 'gsu');
|
||||
|
||||
/**
|
||||
Remove valid subroutine definition groups: `(?(DEFINE)…)`.
|
||||
@param {string} expression
|
||||
@param {NamedCapturingGroupsMap} namedGroups
|
||||
IMPORTANT: Avoid using the `contents` property of `namedGroups` objects, because at this point
|
||||
subroutine substitution has been performed on the corresponding substrings in `expression`
|
||||
@returns {string}
|
||||
*/
|
||||
function processDefinitionGroup(expression, namedGroups) {
|
||||
const defineMatch = execUnescaped(expression, String.raw`\(\?\(DEFINE\)`, 0, Context.DEFAULT);
|
||||
if (!defineMatch) {
|
||||
return expression;
|
||||
}
|
||||
const defineGroup = getGroup(expression, defineMatch);
|
||||
if (defineGroup.afterPos < expression.length) {
|
||||
// Supporting DEFINE at positions other than the end would complicate backref handling.
|
||||
// NOTE: Flag x's preprocessing permits trailing whitespace and comments
|
||||
throw new Error('DEFINE group allowed only at the end of a regex');
|
||||
} else if (defineGroup.afterPos > expression.length) {
|
||||
throw new Error('DEFINE group is unclosed');
|
||||
}
|
||||
let match;
|
||||
defineGroupToken.lastIndex = 0;
|
||||
while (match = defineGroupToken.exec(defineGroup.contents)) {
|
||||
const {captureName, invalid} = match.groups;
|
||||
if (captureName) {
|
||||
const group = getGroup(defineGroup.contents, match);
|
||||
let duplicateName;
|
||||
if (!namedGroups.get(captureName).isUnique) {
|
||||
duplicateName = captureName;
|
||||
} else {
|
||||
const nestedNamedGroups = getNamedCapturingGroups(group.contents, {includeContents: false});
|
||||
for (const name of nestedNamedGroups.keys()) {
|
||||
if (!namedGroups.get(name).isUnique) {
|
||||
duplicateName = name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (duplicateName) {
|
||||
throw new Error(`Duplicate group name "${duplicateName}" within DEFINE`);
|
||||
}
|
||||
defineGroupToken.lastIndex = group.afterPos;
|
||||
} else if (invalid) {
|
||||
// Since a DEFINE group is stripped from its expression, we can't easily determine whether
|
||||
// unreferenced top-level syntax within it is valid. Such syntax serves no purpose, so it's
|
||||
// easiest to not allow it
|
||||
throw new Error(`DEFINE group includes unsupported syntax at top level`);
|
||||
}
|
||||
}
|
||||
return expression.slice(0, defineMatch.index);
|
||||
}
|
||||
|
||||
/**
|
||||
Counts unescaped open parens outside of character classes, regardless of group type
|
||||
@param {string} expression
|
||||
@returns {number}
|
||||
*/
|
||||
function countOpenParens(expression) {
|
||||
let num = 0;
|
||||
forEachUnescaped(expression, '\\(', () => num++, Context.DEFAULT);
|
||||
return num;
|
||||
}
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@param {string} groupName
|
||||
@returns {number}
|
||||
*/
|
||||
function getCaptureNum(expression, groupName) {
|
||||
let num = 0;
|
||||
let pos = 0;
|
||||
let match;
|
||||
while (match = execUnescaped(expression, capturingDelim, pos, Context.DEFAULT)) {
|
||||
const {0: m, index, groups: {captureName}} = match;
|
||||
num++;
|
||||
if (captureName === groupName) {
|
||||
break;
|
||||
}
|
||||
pos = index + m.length;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@param {RegExpExecArray} delimMatch
|
||||
@returns {{contents: string; afterPos: number}}
|
||||
*/
|
||||
function getGroup(expression, delimMatch) {
|
||||
const contentsStart = delimMatch.index + delimMatch[0].length;
|
||||
const contents = getGroupContents(expression, contentsStart);
|
||||
const afterPos = contentsStart + contents.length + 1;
|
||||
return {
|
||||
contents,
|
||||
afterPos,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@param {{includeContents: boolean}} options
|
||||
@returns {NamedCapturingGroupsMap}
|
||||
*/
|
||||
function getNamedCapturingGroups(expression, {includeContents}) {
|
||||
const namedGroups = new Map();
|
||||
forEachUnescaped(
|
||||
expression,
|
||||
namedCapturingDelim,
|
||||
({0: m, index, groups: {captureName}}) => {
|
||||
// If there are duplicate capture names, subroutines refer to the first instance of the given
|
||||
// group (matching the behavior of PCRE and Perl)
|
||||
if (namedGroups.has(captureName)) {
|
||||
namedGroups.get(captureName).isUnique = false;
|
||||
} else {
|
||||
const group = {isUnique: true};
|
||||
if (includeContents) {
|
||||
const contents = getGroupContents(expression, index + m.length);
|
||||
Object.assign(group, {
|
||||
contents,
|
||||
groupNum: getCaptureNum(expression, captureName),
|
||||
numCaptures: countCaptures(contents),
|
||||
});
|
||||
}
|
||||
namedGroups.set(captureName, group);
|
||||
}
|
||||
},
|
||||
Context.DEFAULT
|
||||
);
|
||||
return namedGroups;
|
||||
}
|
||||
|
||||
/**
|
||||
@param {Array<any>} arr
|
||||
@returns {any}
|
||||
*/
|
||||
function lastOf(arr) {
|
||||
// Remove when support for ES2022 array method `at` (Node.js 16.6) is no longer an issue:
|
||||
// <https://caniuse.com/mdn-javascript_builtins_array_at>
|
||||
return arr[arr.length - 1];
|
||||
}
|
||||
|
||||
export {
|
||||
subroutines,
|
||||
};
|
19
node_modules/regex/src/utils-internals.js
generated
vendored
Normal file
19
node_modules/regex/src/utils-internals.js
generated
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
// Separating some utils for improved tree shaking of the `./internals` export
|
||||
|
||||
const noncapturingDelim = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;
|
||||
|
||||
/**
|
||||
@param {string} str
|
||||
@param {number} pos
|
||||
@param {string} oldValue
|
||||
@param {string} newValue
|
||||
@returns {string}
|
||||
*/
|
||||
function spliceStr(str, pos, oldValue, newValue) {
|
||||
return str.slice(0, pos) + newValue + str.slice(pos + oldValue.length);
|
||||
}
|
||||
|
||||
export {
|
||||
noncapturingDelim,
|
||||
spliceStr,
|
||||
};
|
398
node_modules/regex/src/utils.js
generated
vendored
Normal file
398
node_modules/regex/src/utils.js
generated
vendored
Normal file
@@ -0,0 +1,398 @@
|
||||
import {Pattern, pattern} from './pattern.js';
|
||||
import {Context, forEachUnescaped, replaceUnescaped} from 'regex-utilities';
|
||||
|
||||
const RegexContext = {
|
||||
DEFAULT: 'DEFAULT',
|
||||
CHAR_CLASS: 'CHAR_CLASS',
|
||||
ENCLOSED_P: 'ENCLOSED_P',
|
||||
ENCLOSED_U: 'ENCLOSED_U',
|
||||
GROUP_NAME: 'GROUP_NAME',
|
||||
INTERVAL_QUANTIFIER: 'INTERVAL_QUANTIFIER',
|
||||
INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN',
|
||||
};
|
||||
|
||||
const CharClassContext = {
|
||||
DEFAULT: 'DEFAULT',
|
||||
ENCLOSED_P: 'ENCLOSED_P',
|
||||
ENCLOSED_Q: 'ENCLOSED_Q',
|
||||
ENCLOSED_U: 'ENCLOSED_U',
|
||||
INVALID_INCOMPLETE_TOKEN: 'INVALID_INCOMPLETE_TOKEN',
|
||||
RANGE: 'RANGE',
|
||||
};
|
||||
|
||||
const enclosedTokenRegexContexts = new Set([
|
||||
RegexContext.ENCLOSED_P,
|
||||
RegexContext.ENCLOSED_U,
|
||||
]);
|
||||
|
||||
const enclosedTokenCharClassContexts = new Set([
|
||||
CharClassContext.ENCLOSED_P,
|
||||
CharClassContext.ENCLOSED_Q,
|
||||
CharClassContext.ENCLOSED_U,
|
||||
]);
|
||||
|
||||
const envSupportsFlagGroups = (() => {
|
||||
try {
|
||||
new RegExp('(?i:)');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})();
|
||||
|
||||
const envSupportsFlagV = (() => {
|
||||
try {
|
||||
new RegExp('', 'v');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})();
|
||||
|
||||
const doublePunctuatorChars = '&!#$%*+,.:;<=>?@^`~';
|
||||
const namedCapturingDelim = String.raw`\(\?<(?![=!])(?<captureName>[^>]+)>`;
|
||||
const capturingDelim = String.raw`\((?!\?)(?!(?<=\(\?\()DEFINE\))|${namedCapturingDelim}`;
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@param {number} precedingCaptures
|
||||
@returns {string}
|
||||
*/
|
||||
function adjustNumberedBackrefs(expression, precedingCaptures) {
|
||||
return replaceUnescaped(
|
||||
expression,
|
||||
String.raw`\\(?<num>[1-9]\d*)`,
|
||||
({groups: {num}}) => `\\${+num + precedingCaptures}`,
|
||||
Context.DEFAULT
|
||||
);
|
||||
}
|
||||
|
||||
// Properties of strings as of ES2024
|
||||
const stringPropertyNames = [
|
||||
'Basic_Emoji',
|
||||
'Emoji_Keycap_Sequence',
|
||||
'RGI_Emoji_Modifier_Sequence',
|
||||
'RGI_Emoji_Flag_Sequence',
|
||||
'RGI_Emoji_Tag_Sequence',
|
||||
'RGI_Emoji_ZWJ_Sequence',
|
||||
'RGI_Emoji',
|
||||
].join('|');
|
||||
const charClassUnionToken = new RegExp(String.raw`
|
||||
\\(?: c[A-Za-z]
|
||||
| p\{(?<pStrProp>${stringPropertyNames})\}
|
||||
| [pP]\{[^\}]+\}
|
||||
| (?<qStrProp>q)
|
||||
| u(?:[A-Fa-f\d]{4}|\{[A-Fa-f\d]+\})
|
||||
| x[A-Fa-f\d]{2}
|
||||
| .
|
||||
)
|
||||
| --
|
||||
| &&
|
||||
| .
|
||||
`.replace(/\s+/g, ''), 'gsu');
|
||||
|
||||
// Assumes flag v and doesn't worry about syntax errors that are caught by it
|
||||
function containsCharClassUnion(charClassPattern) {
|
||||
// Return `true` if it contains:
|
||||
// - `\p` (lowercase only) and the name is a property of strings (case sensitive).
|
||||
// - `\q`.
|
||||
// - Two single-char-matching tokens in sequence.
|
||||
// - One single-char-matching token followed immediately by unescaped `[`.
|
||||
// - One single-char-matching token preceded immediately by unescaped `]`.
|
||||
// Else, `false`.
|
||||
// Ranges with `-` create a single token.
|
||||
// Subtraction and intersection with `--` and `&&` create a single token.
|
||||
// Supports any number of nested classes
|
||||
let hasFirst = false;
|
||||
let lastM;
|
||||
for (const {0: m, groups} of charClassPattern.matchAll(charClassUnionToken)) {
|
||||
if (groups.pStrProp || groups.qStrProp) {
|
||||
return true;
|
||||
}
|
||||
if (m === '[' && hasFirst) {
|
||||
return true;
|
||||
}
|
||||
if (['-', '--', '&&'].includes(m)) {
|
||||
hasFirst = false;
|
||||
} else if (m !== '[' && m !== ']') {
|
||||
if (hasFirst || lastM === ']') {
|
||||
return true;
|
||||
}
|
||||
hasFirst = true;
|
||||
}
|
||||
lastM = m;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@returns {number}
|
||||
*/
|
||||
function countCaptures(expression) {
|
||||
let num = 0;
|
||||
forEachUnescaped(expression, capturingDelim, () => num++, Context.DEFAULT);
|
||||
return num;
|
||||
}
|
||||
|
||||
/**
|
||||
Escape special characters for the given context, assuming flag v.
|
||||
@param {string} str String to escape
|
||||
@param {'DEFAULT' | 'CHAR_CLASS'} context `Context` option from lib `regex-utilities`
|
||||
@returns {string} Escaped string
|
||||
*/
|
||||
function escapeV(str, context) {
|
||||
if (context === Context.CHAR_CLASS) {
|
||||
// Escape all double punctuators (including ^, which is special on its own in the first
|
||||
// position) in case they're bordered by the same character in or outside of the escaped string
|
||||
return str.replace(new RegExp(String.raw`[()\[\]{}|\\/\-${doublePunctuatorChars}]`, 'g'), '\\$&');
|
||||
}
|
||||
return str.replace(/[()\[\]{}|\\^$*+?.]/g, '\\$&');
|
||||
}
|
||||
|
||||
// Look for characters that would change the meaning of subsequent tokens outside an interpolated value
|
||||
function getBreakoutChar(expression, regexContext, charClassContext) {
|
||||
const escapesRemoved = expression.replace(/\\./gsu, '');
|
||||
// Trailing unescaped `\`; checking `.includes('\\')` would also work
|
||||
if (escapesRemoved.endsWith('\\')) {
|
||||
return '\\';
|
||||
}
|
||||
if (regexContext === RegexContext.DEFAULT) {
|
||||
// Unbalanced `[` or `]` are also errors but don't breakout; they're caught by the wrapper
|
||||
return getUnbalancedChar(escapesRemoved, '(', ')');
|
||||
} else if (
|
||||
regexContext === RegexContext.CHAR_CLASS &&
|
||||
!enclosedTokenCharClassContexts.has(charClassContext)
|
||||
) {
|
||||
return getUnbalancedChar(escapesRemoved, '[', ']');
|
||||
} else if (
|
||||
regexContext === RegexContext.INTERVAL_QUANTIFIER ||
|
||||
enclosedTokenRegexContexts.has(regexContext) ||
|
||||
enclosedTokenCharClassContexts.has(charClassContext)
|
||||
) {
|
||||
if (escapesRemoved.includes('}')) {
|
||||
return '}';
|
||||
}
|
||||
} else if (regexContext === RegexContext.GROUP_NAME) {
|
||||
if (escapesRemoved.includes('>')) {
|
||||
return '>';
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
const contextToken = new RegExp(String.raw`
|
||||
(?<groupN>\(\?<(?![=!])|\\[gk]<)
|
||||
| (?<enclosedPU>\\[pPu]\{)
|
||||
| (?<enclosedQ>\\q\{)
|
||||
| (?<intervalQ>\{)
|
||||
| (?<incompleteT>\\(?: $
|
||||
| c(?![A-Za-z])
|
||||
| u(?![A-Fa-f\d]{4})[A-Fa-f\d]{0,3}
|
||||
| x(?![A-Fa-f\d]{2})[A-Fa-f\d]?
|
||||
)
|
||||
)
|
||||
| --
|
||||
| \\?.
|
||||
`.replace(/\s+/g, ''), 'gsu');
|
||||
|
||||
/**
|
||||
@typedef {{
|
||||
regexContext: string;
|
||||
charClassContext: string;
|
||||
charClassDepth: number;
|
||||
lastPos: number;
|
||||
}} RunningContext
|
||||
*/
|
||||
/**
|
||||
Accepts and returns its full state so it doesn't have to reprocess parts that have already been
|
||||
seen. Assumes flag v and doesn't worry about syntax errors that are caught by it.
|
||||
@param {string} incompleteExpression
|
||||
@param {Partial<RunningContext>} [runningContext]
|
||||
@returns {RunningContext}
|
||||
*/
|
||||
function getEndContextForIncompleteExpression(incompleteExpression, runningContext) {
|
||||
let {regexContext, charClassContext, charClassDepth, lastPos} = {
|
||||
regexContext: RegexContext.DEFAULT,
|
||||
charClassContext: CharClassContext.DEFAULT,
|
||||
charClassDepth: 0,
|
||||
lastPos: 0,
|
||||
...runningContext,
|
||||
};
|
||||
contextToken.lastIndex = lastPos;
|
||||
let match;
|
||||
while (match = contextToken.exec(incompleteExpression)) {
|
||||
const {0: m, groups: {groupN, enclosedPU, enclosedQ, intervalQ, incompleteT}} = match;
|
||||
if (m === '[') {
|
||||
charClassDepth++;
|
||||
regexContext = RegexContext.CHAR_CLASS;
|
||||
charClassContext = CharClassContext.DEFAULT;
|
||||
} else if (m === ']' && regexContext === RegexContext.CHAR_CLASS) {
|
||||
if (charClassDepth) {
|
||||
charClassDepth--;
|
||||
}
|
||||
if (!charClassDepth) {
|
||||
regexContext = RegexContext.DEFAULT;
|
||||
}
|
||||
charClassContext = CharClassContext.DEFAULT;
|
||||
} else if (regexContext === RegexContext.CHAR_CLASS) {
|
||||
if (incompleteT) {
|
||||
charClassContext = CharClassContext.INVALID_INCOMPLETE_TOKEN;
|
||||
} else if (m === '-') {
|
||||
charClassContext = CharClassContext.RANGE;
|
||||
} else if (enclosedPU) {
|
||||
charClassContext = m[1] === 'u' ? CharClassContext.ENCLOSED_U : CharClassContext.ENCLOSED_P;
|
||||
} else if (enclosedQ) {
|
||||
charClassContext = CharClassContext.ENCLOSED_Q;
|
||||
} else if (
|
||||
(m === '}' && enclosedTokenCharClassContexts.has(charClassContext)) ||
|
||||
// Don't continue in these contexts since we've advanced another token
|
||||
charClassContext === CharClassContext.INVALID_INCOMPLETE_TOKEN ||
|
||||
charClassContext === CharClassContext.RANGE
|
||||
) {
|
||||
charClassContext = CharClassContext.DEFAULT;
|
||||
}
|
||||
} else {
|
||||
if (incompleteT) {
|
||||
regexContext = RegexContext.INVALID_INCOMPLETE_TOKEN;
|
||||
} else if (groupN) {
|
||||
regexContext = RegexContext.GROUP_NAME;
|
||||
} else if (enclosedPU) {
|
||||
regexContext = m[1] === 'u' ? RegexContext.ENCLOSED_U : RegexContext.ENCLOSED_P;
|
||||
} else if (intervalQ) {
|
||||
regexContext = RegexContext.INTERVAL_QUANTIFIER;
|
||||
} else if (
|
||||
(m === '>' && regexContext === RegexContext.GROUP_NAME) ||
|
||||
(m === '}' && (regexContext === RegexContext.INTERVAL_QUANTIFIER || enclosedTokenRegexContexts.has(regexContext))) ||
|
||||
// Don't continue in this context since we've advanced another token
|
||||
regexContext === RegexContext.INVALID_INCOMPLETE_TOKEN
|
||||
) {
|
||||
regexContext = RegexContext.DEFAULT;
|
||||
}
|
||||
}
|
||||
}
|
||||
return {
|
||||
regexContext,
|
||||
charClassContext,
|
||||
charClassDepth,
|
||||
lastPos: incompleteExpression.length,
|
||||
};
|
||||
}
|
||||
|
||||
// No special handling for escaped versions of the characters
|
||||
function getUnbalancedChar(expression, leftChar, rightChar) {
|
||||
let numOpen = 0;
|
||||
for (const [m] of expression.matchAll(new RegExp(`[${escapeV(leftChar + rightChar, Context.CHAR_CLASS)}]`, 'g'))) {
|
||||
numOpen += m === leftChar ? 1 : -1;
|
||||
if (numOpen < 0) {
|
||||
return rightChar;
|
||||
}
|
||||
}
|
||||
if (numOpen > 0) {
|
||||
return leftChar;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
@typedef {import('./regex.js').InterpolatedValue} InterpolatedValue
|
||||
@typedef {import('./regex.js').RawTemplate} RawTemplate
|
||||
@typedef {import('./regex.js').RegexTagOptions} RegexTagOptions
|
||||
@typedef {(
|
||||
value: InterpolatedValue,
|
||||
runningContext: RunningContext,
|
||||
options: Required<RegexTagOptions>
|
||||
) => {
|
||||
transformed: string;
|
||||
runningContext: RunningContext;
|
||||
}} Preprocessor
|
||||
*/
|
||||
/**
|
||||
Returns transformed versions of a template and substitutions, using the given preprocessor. Only
|
||||
processes substitutions that are instanceof `Pattern`.
|
||||
@param {RawTemplate} template
|
||||
@param {ReadonlyArray<InterpolatedValue>} substitutions
|
||||
@param {Preprocessor} preprocessor
|
||||
@param {Required<RegexTagOptions>} options
|
||||
@returns {{template: RawTemplate; substitutions: ReadonlyArray<InterpolatedValue>;}}
|
||||
*/
|
||||
function preprocess(template, substitutions, preprocessor, options) {
|
||||
let /** @type {RawTemplate} */ newTemplate = {raw: []};
|
||||
let newSubstitutions = [];
|
||||
let runningContext;
|
||||
template.raw.forEach((raw, i) => {
|
||||
const result = preprocessor(raw, {...runningContext, lastPos: 0}, options);
|
||||
newTemplate.raw.push(result.transformed);
|
||||
runningContext = result.runningContext;
|
||||
if (i < template.raw.length - 1) {
|
||||
const substitution = substitutions[i];
|
||||
if (substitution instanceof Pattern) {
|
||||
const result = preprocessor(substitution, {...runningContext, lastPos: 0}, options);
|
||||
newSubstitutions.push(pattern(result.transformed));
|
||||
runningContext = result.runningContext;
|
||||
} else {
|
||||
newSubstitutions.push(substitution);
|
||||
}
|
||||
}
|
||||
});
|
||||
return {
|
||||
template: newTemplate,
|
||||
substitutions: newSubstitutions,
|
||||
};
|
||||
}
|
||||
|
||||
// Sandbox `^` if relevant, done so it can't change the meaning of the surrounding character class
|
||||
// if we happen to be at the first position. See `sandboxLoneDoublePunctuatorChar` for more details
|
||||
function sandboxLoneCharClassCaret(str) {
|
||||
return str.replace(/^\^/, '\\^^');
|
||||
}
|
||||
|
||||
// Sandbox without escaping by repeating the character and escaping only the first one. The second
|
||||
// one is so that, if followed by the same symbol, the resulting double punctuator will still throw
|
||||
// as expected. Details:
|
||||
// - Only need to check the first position because, if it's part of an implicit union,
|
||||
// interpolation handling will wrap it in nested `[…]`.
|
||||
// - Can't just wrap in nested `[…]` here, since the value might be used in a range.
|
||||
// - Can't add a second unescaped symbol if a lone symbol is the entire string because it might be
|
||||
// followed by the same unescaped symbol outside an interpolation, and since it won't be wrapped,
|
||||
// the second symbol wouldn't be sandboxed from the one following it.
|
||||
function sandboxLoneDoublePunctuatorChar(str) {
|
||||
return str.replace(new RegExp(`^([${doublePunctuatorChars}])(?!\\1)`), (m, _, pos) => {
|
||||
return `\\${m}${pos + 1 === str.length ? '' : m}`;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
Converts `\0` tokens to `\x00` in the given context.
|
||||
@param {string} str
|
||||
@param {'DEFAULT' | 'CHAR_CLASS'} [context] `Context` option from lib `regex-utilities`
|
||||
@returns {string}
|
||||
*/
|
||||
function sandboxUnsafeNulls(str, context) {
|
||||
// regex`[\0${0}]` and regex`[${pattern`\0`}0]` can't be guarded against via nested `[…]`
|
||||
// sandboxing in character classes if the interpolated value doesn't contain union (since it
|
||||
// might be placed on a range boundary). So escape `\0` in character classes as `\x00`
|
||||
return replaceUnescaped(str, String.raw`\\0(?!\d)`, '\\x00', context);
|
||||
}
|
||||
|
||||
export {
|
||||
adjustNumberedBackrefs,
|
||||
capturingDelim,
|
||||
CharClassContext,
|
||||
containsCharClassUnion,
|
||||
countCaptures,
|
||||
doublePunctuatorChars,
|
||||
enclosedTokenCharClassContexts,
|
||||
enclosedTokenRegexContexts,
|
||||
envSupportsFlagGroups,
|
||||
envSupportsFlagV,
|
||||
escapeV,
|
||||
getBreakoutChar,
|
||||
getEndContextForIncompleteExpression,
|
||||
namedCapturingDelim,
|
||||
preprocess,
|
||||
RegexContext,
|
||||
sandboxLoneCharClassCaret,
|
||||
sandboxLoneDoublePunctuatorChar,
|
||||
sandboxUnsafeNulls,
|
||||
};
|
Reference in New Issue
Block a user