full site update
This commit is contained in:
312
node_modules/regex-recursion/src/index.js
generated
vendored
312
node_modules/regex-recursion/src/index.js
generated
vendored
@@ -1,42 +1,58 @@
|
||||
import {Context, forEachUnescaped, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities';
|
||||
import {emulationGroupMarker} from 'regex/internals';
|
||||
|
||||
const r = String.raw;
|
||||
const gRToken = r`\\g<(?<gRNameOrNum>[^>&]+)&R=(?<gRDepth>[^>]+)>`;
|
||||
const recursiveToken = r`\(\?R=(?<rDepth>[^\)]+)\)|${gRToken}`;
|
||||
const namedCapturingDelim = r`\(\?<(?![=!])(?<captureName>[^>]+)>`;
|
||||
const token = new RegExp(r`${namedCapturingDelim}|${recursiveToken}|\(\?|\\?.`, 'gsu');
|
||||
const namedCaptureDelim = r`\(\?<(?![=!])(?<captureName>[^>]+)>`;
|
||||
const captureDelim = r`${namedCaptureDelim}|(?<unnamed>\()(?!\?)`;
|
||||
const token = new RegExp(r`${namedCaptureDelim}|${recursiveToken}|\(\?|\\?.`, 'gsu');
|
||||
const overlappingRecursionMsg = 'Cannot use multiple overlapping recursions';
|
||||
// Support emulation groups with transfer marker prefix
|
||||
const emulationGroupMarkerRe = new RegExp(r`(?:\$[1-9]\d*)?${emulationGroupMarker.replace(/\$/g, r`\$`)}`, 'y');
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@param {string} pattern
|
||||
@param {{
|
||||
flags?: string;
|
||||
useEmulationGroups?: boolean;
|
||||
captureTransfers?: Map<number, Array<number>>;
|
||||
hiddenCaptures?: Array<number>;
|
||||
mode?: 'plugin' | 'external';
|
||||
}} [data]
|
||||
@returns {string}
|
||||
@returns {{
|
||||
pattern: string;
|
||||
captureTransfers: Map<number, Array<number>>;
|
||||
hiddenCaptures: Array<number>;
|
||||
}}
|
||||
*/
|
||||
export function recursion(expression, data) {
|
||||
function recursion(pattern, data) {
|
||||
const {hiddenCaptures, mode} = {
|
||||
hiddenCaptures: [],
|
||||
mode: 'plugin',
|
||||
...data,
|
||||
};
|
||||
// Capture transfer is used by <github.com/slevithan/oniguruma-to-es>
|
||||
let captureTransfers = data?.captureTransfers ?? new Map();
|
||||
// Keep the initial fail-check (which avoids unneeded processing) as fast as possible by testing
|
||||
// without the accuracy improvement of using `hasUnescaped` with default `Context`
|
||||
if (!(new RegExp(recursiveToken, 'su').test(expression))) {
|
||||
return expression;
|
||||
// without the accuracy improvement of using `hasUnescaped` with `Context.DEFAULT`
|
||||
if (!(new RegExp(recursiveToken, 'su').test(pattern))) {
|
||||
return {
|
||||
pattern,
|
||||
captureTransfers,
|
||||
hiddenCaptures,
|
||||
};
|
||||
}
|
||||
if (hasUnescaped(expression, r`\(\?\(DEFINE\)`, Context.DEFAULT)) {
|
||||
if (mode === 'plugin' && hasUnescaped(pattern, r`\(\?\(DEFINE\)`, Context.DEFAULT)) {
|
||||
throw new Error('DEFINE groups cannot be used with recursion');
|
||||
}
|
||||
const useEmulationGroups = !!data?.useEmulationGroups;
|
||||
const hasNumberedBackref = hasUnescaped(expression, r`\\[1-9]`, Context.DEFAULT);
|
||||
|
||||
const addedHiddenCaptures = [];
|
||||
const hasNumberedBackref = hasUnescaped(pattern, r`\\[1-9]`, Context.DEFAULT);
|
||||
const groupContentsStartPos = new Map();
|
||||
const openGroups = [];
|
||||
let hasRecursed = false;
|
||||
let numCharClassesOpen = 0;
|
||||
let numCaptures = 0;
|
||||
let numCapturesPassed = 0;
|
||||
let match;
|
||||
token.lastIndex = 0;
|
||||
while ((match = token.exec(expression))) {
|
||||
while ((match = token.exec(pattern))) {
|
||||
const {0: m, groups: {captureName, rDepth, gRNameOrNum, gRDepth}} = match;
|
||||
if (m === '[') {
|
||||
numCharClassesOpen++;
|
||||
@@ -57,15 +73,37 @@ export function recursion(expression, data) {
|
||||
// Note that Regex+'s extended syntax (atomic groups and sometimes subroutines) can also
|
||||
// add numbered backrefs, but those work fine because external plugins like this one run
|
||||
// *before* the transformation of built-in syntax extensions
|
||||
throw new Error('Numbered backrefs cannot be used with global recursion');
|
||||
throw new Error(
|
||||
// When used in `external` mode by transpilers other than Regex+, backrefs might have
|
||||
// gone through conversion from named to numbered, so avoid a misleading error
|
||||
`${mode === 'external' ? 'Backrefs' : 'Numbered backrefs'} cannot be used with global recursion`
|
||||
);
|
||||
}
|
||||
const pre = expression.slice(0, match.index);
|
||||
const post = expression.slice(token.lastIndex);
|
||||
if (hasUnescaped(post, recursiveToken, Context.DEFAULT)) {
|
||||
const left = pattern.slice(0, match.index);
|
||||
const right = pattern.slice(token.lastIndex);
|
||||
if (hasUnescaped(right, recursiveToken, Context.DEFAULT)) {
|
||||
throw new Error(overlappingRecursionMsg);
|
||||
}
|
||||
const reps = +rDepth - 1;
|
||||
pattern = makeRecursive(
|
||||
left,
|
||||
right,
|
||||
reps,
|
||||
false,
|
||||
hiddenCaptures,
|
||||
addedHiddenCaptures,
|
||||
numCapturesPassed
|
||||
);
|
||||
captureTransfers = mapCaptureTransfers(
|
||||
captureTransfers,
|
||||
left,
|
||||
reps,
|
||||
addedHiddenCaptures.length,
|
||||
0,
|
||||
numCapturesPassed
|
||||
);
|
||||
// No need to parse further
|
||||
return makeRecursive(pre, post, +rDepth, false, useEmulationGroups);
|
||||
break;
|
||||
// `\g<name&R=N>`, `\g<number&R=N>`
|
||||
} else if (gRNameOrNum) {
|
||||
assertMaxInBounds(gRDepth);
|
||||
@@ -80,46 +118,66 @@ export function recursion(expression, data) {
|
||||
}
|
||||
}
|
||||
if (!isWithinReffedGroup) {
|
||||
throw new Error(r`Recursive \g cannot be used outside the referenced group "\g<${gRNameOrNum}&R=${gRDepth}>"`);
|
||||
throw new Error(r`Recursive \g cannot be used outside the referenced group "${
|
||||
mode === 'external' ? gRNameOrNum : r`\g<${gRNameOrNum}&R=${gRDepth}>`
|
||||
}"`);
|
||||
}
|
||||
const startPos = groupContentsStartPos.get(gRNameOrNum);
|
||||
const groupContents = getGroupContents(expression, startPos);
|
||||
const groupContents = getGroupContents(pattern, startPos);
|
||||
if (
|
||||
hasNumberedBackref &&
|
||||
hasUnescaped(groupContents, r`${namedCapturingDelim}|\((?!\?)`, Context.DEFAULT)
|
||||
hasUnescaped(groupContents, r`${namedCaptureDelim}|\((?!\?)`, Context.DEFAULT)
|
||||
) {
|
||||
throw new Error('Numbered backrefs cannot be used with recursion of capturing groups');
|
||||
throw new Error(
|
||||
// When used in `external` mode by transpilers other than Regex+, backrefs might have
|
||||
// gone through conversion from named to numbered, so avoid a misleading error
|
||||
`${mode === 'external' ? 'Backrefs' : 'Numbered backrefs'} cannot be used with recursion of capturing groups`
|
||||
);
|
||||
}
|
||||
const groupContentsPre = expression.slice(startPos, match.index);
|
||||
const groupContentsPost = groupContents.slice(groupContentsPre.length + m.length);
|
||||
const expansion = makeRecursive(groupContentsPre, groupContentsPost, +gRDepth, true, useEmulationGroups);
|
||||
const pre = expression.slice(0, startPos);
|
||||
const post = expression.slice(startPos + groupContents.length);
|
||||
const groupContentsLeft = pattern.slice(startPos, match.index);
|
||||
const groupContentsRight = groupContents.slice(groupContentsLeft.length + m.length);
|
||||
const numAddedHiddenCapturesPreExpansion = addedHiddenCaptures.length;
|
||||
const reps = +gRDepth - 1;
|
||||
const expansion = makeRecursive(
|
||||
groupContentsLeft,
|
||||
groupContentsRight,
|
||||
reps,
|
||||
true,
|
||||
hiddenCaptures,
|
||||
addedHiddenCaptures,
|
||||
numCapturesPassed
|
||||
);
|
||||
captureTransfers = mapCaptureTransfers(
|
||||
captureTransfers,
|
||||
groupContentsLeft,
|
||||
reps,
|
||||
addedHiddenCaptures.length - numAddedHiddenCapturesPreExpansion,
|
||||
numAddedHiddenCapturesPreExpansion,
|
||||
numCapturesPassed
|
||||
);
|
||||
const pre = pattern.slice(0, startPos);
|
||||
const post = pattern.slice(startPos + groupContents.length);
|
||||
// Modify the string we're looping over
|
||||
expression = `${pre}${expansion}${post}`;
|
||||
pattern = `${pre}${expansion}${post}`;
|
||||
// Step forward for the next loop iteration
|
||||
token.lastIndex += expansion.length - m.length - groupContentsPre.length - groupContentsPost.length;
|
||||
token.lastIndex += expansion.length - m.length - groupContentsLeft.length - groupContentsRight.length;
|
||||
openGroups.forEach(g => g.hasRecursedWithin = true);
|
||||
hasRecursed = true;
|
||||
} else if (captureName) {
|
||||
numCaptures++;
|
||||
// NOTE: Not currently handling *named* emulation groups that already exist in the pattern
|
||||
groupContentsStartPos.set(String(numCaptures), token.lastIndex);
|
||||
numCapturesPassed++;
|
||||
groupContentsStartPos.set(String(numCapturesPassed), token.lastIndex);
|
||||
groupContentsStartPos.set(captureName, token.lastIndex);
|
||||
openGroups.push({
|
||||
num: numCaptures,
|
||||
num: numCapturesPassed,
|
||||
name: captureName,
|
||||
});
|
||||
} else if (m.startsWith('(')) {
|
||||
} else if (m[0] === '(') {
|
||||
const isUnnamedCapture = m === '(';
|
||||
if (isUnnamedCapture) {
|
||||
numCaptures++;
|
||||
groupContentsStartPos.set(
|
||||
String(numCaptures),
|
||||
token.lastIndex + (useEmulationGroups ? emulationGroupMarkerLength(expression, token.lastIndex) : 0)
|
||||
);
|
||||
numCapturesPassed++;
|
||||
groupContentsStartPos.set(String(numCapturesPassed), token.lastIndex);
|
||||
}
|
||||
openGroups.push(isUnnamedCapture ? {num: numCaptures} : {});
|
||||
openGroups.push(isUnnamedCapture ? {num: numCapturesPassed} : {});
|
||||
} else if (m === ')') {
|
||||
openGroups.pop();
|
||||
}
|
||||
@@ -129,7 +187,13 @@ export function recursion(expression, data) {
|
||||
}
|
||||
}
|
||||
|
||||
return expression;
|
||||
hiddenCaptures.push(...addedHiddenCaptures);
|
||||
|
||||
return {
|
||||
pattern,
|
||||
captureTransfers,
|
||||
hiddenCaptures,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -147,66 +211,88 @@ function assertMaxInBounds(max) {
|
||||
}
|
||||
|
||||
/**
|
||||
@param {string} pre
|
||||
@param {string} post
|
||||
@param {number} maxDepth
|
||||
@param {string} left
|
||||
@param {string} right
|
||||
@param {number} reps
|
||||
@param {boolean} isSubpattern
|
||||
@param {boolean} useEmulationGroups
|
||||
@param {Array<number>} hiddenCaptures
|
||||
@param {Array<number>} addedHiddenCaptures
|
||||
@param {number} numCapturesPassed
|
||||
@returns {string}
|
||||
*/
|
||||
function makeRecursive(pre, post, maxDepth, isSubpattern, useEmulationGroups) {
|
||||
function makeRecursive(
|
||||
left,
|
||||
right,
|
||||
reps,
|
||||
isSubpattern,
|
||||
hiddenCaptures,
|
||||
addedHiddenCaptures,
|
||||
numCapturesPassed
|
||||
) {
|
||||
const namesInRecursed = new Set();
|
||||
// Avoid this work if not needed
|
||||
// Can skip this work if not needed
|
||||
if (isSubpattern) {
|
||||
forEachUnescaped(pre + post, namedCapturingDelim, ({groups: {captureName}}) => {
|
||||
forEachUnescaped(left + right, namedCaptureDelim, ({groups: {captureName}}) => {
|
||||
namesInRecursed.add(captureName);
|
||||
}, Context.DEFAULT);
|
||||
}
|
||||
const reps = maxDepth - 1;
|
||||
// Depth 2: 'pre(?:pre(?:)post)post'
|
||||
// Depth 3: 'pre(?:pre(?:pre(?:)post)post)post'
|
||||
return `${pre}${
|
||||
repeatWithDepth(`(?:${pre}`, reps, (isSubpattern ? namesInRecursed : null), 'forward', useEmulationGroups)
|
||||
const rest = [
|
||||
reps,
|
||||
isSubpattern ? namesInRecursed : null,
|
||||
hiddenCaptures,
|
||||
addedHiddenCaptures,
|
||||
numCapturesPassed,
|
||||
];
|
||||
// Depth 2: 'left(?:left(?:)right)right'
|
||||
// Depth 3: 'left(?:left(?:left(?:)right)right)right'
|
||||
// Empty group in the middle separates tokens and absorbs a following quantifier if present
|
||||
return `${left}${
|
||||
repeatWithDepth(`(?:${left}`, 'forward', ...rest)
|
||||
}(?:)${
|
||||
repeatWithDepth(`${post})`, reps, (isSubpattern ? namesInRecursed : null), 'backward', useEmulationGroups)
|
||||
}${post}`;
|
||||
repeatWithDepth(`${right})`, 'backward', ...rest)
|
||||
}${right}`;
|
||||
}
|
||||
|
||||
/**
|
||||
@param {string} expression
|
||||
@param {string} pattern
|
||||
@param {'forward' | 'backward'} direction
|
||||
@param {number} reps
|
||||
@param {Set<string> | null} namesInRecursed
|
||||
@param {'forward' | 'backward'} direction
|
||||
@param {boolean} useEmulationGroups
|
||||
@param {Array<number>} hiddenCaptures
|
||||
@param {Array<number>} addedHiddenCaptures
|
||||
@param {number} numCapturesPassed
|
||||
@returns {string}
|
||||
*/
|
||||
function repeatWithDepth(expression, reps, namesInRecursed, direction, useEmulationGroups) {
|
||||
function repeatWithDepth(
|
||||
pattern,
|
||||
direction,
|
||||
reps,
|
||||
namesInRecursed,
|
||||
hiddenCaptures,
|
||||
addedHiddenCaptures,
|
||||
numCapturesPassed
|
||||
) {
|
||||
const startNum = 2;
|
||||
const depthNum = i => direction === 'backward' ? reps - i + startNum - 1 : i + startNum;
|
||||
const getDepthNum = i => direction === 'forward' ? (i + startNum) : (reps - i + startNum - 1);
|
||||
let result = '';
|
||||
for (let i = 0; i < reps; i++) {
|
||||
const captureNum = depthNum(i);
|
||||
const depthNum = getDepthNum(i);
|
||||
result += replaceUnescaped(
|
||||
expression,
|
||||
// NOTE: Not currently handling *named* emulation groups that already exist in the pattern
|
||||
r`${namedCapturingDelim}|\\k<(?<backref>[^>]+)>${
|
||||
useEmulationGroups ? r`|(?<unnamed>\()(?!\?)(?:${emulationGroupMarkerRe.source})?` : ''
|
||||
}`,
|
||||
({0: m, index, groups: {captureName, backref, unnamed}}) => {
|
||||
pattern,
|
||||
r`${captureDelim}|\\k<(?<backref>[^>]+)>`,
|
||||
({0: m, groups: {captureName, unnamed, backref}}) => {
|
||||
if (backref && namesInRecursed && !namesInRecursed.has(backref)) {
|
||||
// Don't alter backrefs to groups outside the recursed subpattern
|
||||
return m;
|
||||
}
|
||||
// Only matches unnamed capture delim if `useEmulationGroups`
|
||||
if (unnamed) {
|
||||
// Add an emulation group marker, possibly replacing an existing marker (removes any
|
||||
// transfer prefix)
|
||||
return `(${emulationGroupMarker}`;
|
||||
const suffix = `_$${depthNum}`;
|
||||
if (unnamed || captureName) {
|
||||
const addedCaptureNum = numCapturesPassed + addedHiddenCaptures.length + 1;
|
||||
addedHiddenCaptures.push(addedCaptureNum);
|
||||
incrementIfAtLeast(hiddenCaptures, addedCaptureNum);
|
||||
return unnamed ? m : `(?<${captureName}${suffix}>`;
|
||||
}
|
||||
const suffix = `_$${captureNum}`;
|
||||
return captureName ?
|
||||
`(?<${captureName}${suffix}>${useEmulationGroups ? emulationGroupMarker : ''}` :
|
||||
r`\k<${backref}${suffix}>`;
|
||||
return r`\k<${backref}${suffix}>`;
|
||||
},
|
||||
Context.DEFAULT
|
||||
);
|
||||
@@ -214,8 +300,66 @@ function repeatWithDepth(expression, reps, namesInRecursed, direction, useEmulat
|
||||
return result;
|
||||
}
|
||||
|
||||
function emulationGroupMarkerLength(expression, index) {
|
||||
emulationGroupMarkerRe.lastIndex = index;
|
||||
const match = emulationGroupMarkerRe.exec(expression);
|
||||
return match ? match[0].length : 0;
|
||||
/**
|
||||
Updates the array in place by incrementing each value greater than or equal to the threshold.
|
||||
@param {Array<number>} arr
|
||||
@param {number} threshold
|
||||
*/
|
||||
function incrementIfAtLeast(arr, threshold) {
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
if (arr[i] >= threshold) {
|
||||
arr[i]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@param {Map<number, Array<number>>} captureTransfers
|
||||
@param {string} left
|
||||
@param {number} reps
|
||||
@param {number} numCapturesAddedInExpansion
|
||||
@param {number} numAddedHiddenCapturesPreExpansion
|
||||
@param {number} numCapturesPassed
|
||||
@returns {Map<number, Array<number>>}
|
||||
*/
|
||||
function mapCaptureTransfers(captureTransfers, left, reps, numCapturesAddedInExpansion, numAddedHiddenCapturesPreExpansion, numCapturesPassed) {
|
||||
if (captureTransfers.size && numCapturesAddedInExpansion) {
|
||||
let numCapturesInLeft = 0;
|
||||
forEachUnescaped(left, captureDelim, () => numCapturesInLeft++, Context.DEFAULT);
|
||||
// Is 0 for global recursion
|
||||
const recursionDelimCaptureNum = numCapturesPassed - numCapturesInLeft + numAddedHiddenCapturesPreExpansion;
|
||||
const newCaptureTransfers = new Map();
|
||||
captureTransfers.forEach((from, to) => {
|
||||
const numCapturesInRight = (numCapturesAddedInExpansion - (numCapturesInLeft * reps)) / reps;
|
||||
const numCapturesAddedInLeft = numCapturesInLeft * reps;
|
||||
const newTo = to > (recursionDelimCaptureNum + numCapturesInLeft) ? to + numCapturesAddedInExpansion : to;
|
||||
const newFrom = [];
|
||||
for (const f of from) {
|
||||
// Before the recursed subpattern
|
||||
if (f <= recursionDelimCaptureNum) {
|
||||
newFrom.push(f);
|
||||
// After the recursed subpattern
|
||||
} else if (f > (recursionDelimCaptureNum + numCapturesInLeft + numCapturesInRight)) {
|
||||
newFrom.push(f + numCapturesAddedInExpansion);
|
||||
// Within the recursed subpattern, on the left of the recursion token
|
||||
} else if (f <= (recursionDelimCaptureNum + numCapturesInLeft)) {
|
||||
for (let i = 0; i <= reps; i++) {
|
||||
newFrom.push(f + (numCapturesInLeft * i));
|
||||
}
|
||||
// Within the recursed subpattern, on the right of the recursion token
|
||||
} else {
|
||||
for (let i = 0; i <= reps; i++) {
|
||||
newFrom.push(f + numCapturesAddedInLeft + (numCapturesInRight * i));
|
||||
}
|
||||
}
|
||||
}
|
||||
newCaptureTransfers.set(newTo, newFrom);
|
||||
});
|
||||
return newCaptureTransfers;
|
||||
}
|
||||
return captureTransfers;
|
||||
}
|
||||
|
||||
export {
|
||||
recursion,
|
||||
};
|
||||
|
Reference in New Issue
Block a user