full site update

This commit is contained in:
2025-07-24 18:46:24 +02:00
parent bfe2b90d8d
commit 37a6e0ab31
6912 changed files with 540482 additions and 361712 deletions

View File

@@ -1,42 +1,58 @@
import {Context, forEachUnescaped, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities';
import {emulationGroupMarker} from 'regex/internals';
const r = String.raw;
const gRToken = r`\\g<(?<gRNameOrNum>[^>&]+)&R=(?<gRDepth>[^>]+)>`;
const recursiveToken = r`\(\?R=(?<rDepth>[^\)]+)\)|${gRToken}`;
const namedCapturingDelim = r`\(\?<(?![=!])(?<captureName>[^>]+)>`;
const token = new RegExp(r`${namedCapturingDelim}|${recursiveToken}|\(\?|\\?.`, 'gsu');
const namedCaptureDelim = r`\(\?<(?![=!])(?<captureName>[^>]+)>`;
const captureDelim = r`${namedCaptureDelim}|(?<unnamed>\()(?!\?)`;
const token = new RegExp(r`${namedCaptureDelim}|${recursiveToken}|\(\?|\\?.`, 'gsu');
const overlappingRecursionMsg = 'Cannot use multiple overlapping recursions';
// Support emulation groups with transfer marker prefix
const emulationGroupMarkerRe = new RegExp(r`(?:\$[1-9]\d*)?${emulationGroupMarker.replace(/\$/g, r`\$`)}`, 'y');
/**
@param {string} expression
@param {string} pattern
@param {{
flags?: string;
useEmulationGroups?: boolean;
captureTransfers?: Map<number, Array<number>>;
hiddenCaptures?: Array<number>;
mode?: 'plugin' | 'external';
}} [data]
@returns {string}
@returns {{
pattern: string;
captureTransfers: Map<number, Array<number>>;
hiddenCaptures: Array<number>;
}}
*/
export function recursion(expression, data) {
function recursion(pattern, data) {
const {hiddenCaptures, mode} = {
hiddenCaptures: [],
mode: 'plugin',
...data,
};
// Capture transfer is used by <github.com/slevithan/oniguruma-to-es>
let captureTransfers = data?.captureTransfers ?? new Map();
// Keep the initial fail-check (which avoids unneeded processing) as fast as possible by testing
// without the accuracy improvement of using `hasUnescaped` with default `Context`
if (!(new RegExp(recursiveToken, 'su').test(expression))) {
return expression;
// without the accuracy improvement of using `hasUnescaped` with `Context.DEFAULT`
if (!(new RegExp(recursiveToken, 'su').test(pattern))) {
return {
pattern,
captureTransfers,
hiddenCaptures,
};
}
if (hasUnescaped(expression, r`\(\?\(DEFINE\)`, Context.DEFAULT)) {
if (mode === 'plugin' && hasUnescaped(pattern, r`\(\?\(DEFINE\)`, Context.DEFAULT)) {
throw new Error('DEFINE groups cannot be used with recursion');
}
const useEmulationGroups = !!data?.useEmulationGroups;
const hasNumberedBackref = hasUnescaped(expression, r`\\[1-9]`, Context.DEFAULT);
const addedHiddenCaptures = [];
const hasNumberedBackref = hasUnescaped(pattern, r`\\[1-9]`, Context.DEFAULT);
const groupContentsStartPos = new Map();
const openGroups = [];
let hasRecursed = false;
let numCharClassesOpen = 0;
let numCaptures = 0;
let numCapturesPassed = 0;
let match;
token.lastIndex = 0;
while ((match = token.exec(expression))) {
while ((match = token.exec(pattern))) {
const {0: m, groups: {captureName, rDepth, gRNameOrNum, gRDepth}} = match;
if (m === '[') {
numCharClassesOpen++;
@@ -57,15 +73,37 @@ export function recursion(expression, data) {
// Note that Regex+'s extended syntax (atomic groups and sometimes subroutines) can also
// add numbered backrefs, but those work fine because external plugins like this one run
// *before* the transformation of built-in syntax extensions
throw new Error('Numbered backrefs cannot be used with global recursion');
throw new Error(
// When used in `external` mode by transpilers other than Regex+, backrefs might have
// gone through conversion from named to numbered, so avoid a misleading error
`${mode === 'external' ? 'Backrefs' : 'Numbered backrefs'} cannot be used with global recursion`
);
}
const pre = expression.slice(0, match.index);
const post = expression.slice(token.lastIndex);
if (hasUnescaped(post, recursiveToken, Context.DEFAULT)) {
const left = pattern.slice(0, match.index);
const right = pattern.slice(token.lastIndex);
if (hasUnescaped(right, recursiveToken, Context.DEFAULT)) {
throw new Error(overlappingRecursionMsg);
}
const reps = +rDepth - 1;
pattern = makeRecursive(
left,
right,
reps,
false,
hiddenCaptures,
addedHiddenCaptures,
numCapturesPassed
);
captureTransfers = mapCaptureTransfers(
captureTransfers,
left,
reps,
addedHiddenCaptures.length,
0,
numCapturesPassed
);
// No need to parse further
return makeRecursive(pre, post, +rDepth, false, useEmulationGroups);
break;
// `\g<name&R=N>`, `\g<number&R=N>`
} else if (gRNameOrNum) {
assertMaxInBounds(gRDepth);
@@ -80,46 +118,66 @@ export function recursion(expression, data) {
}
}
if (!isWithinReffedGroup) {
throw new Error(r`Recursive \g cannot be used outside the referenced group "\g<${gRNameOrNum}&R=${gRDepth}>"`);
throw new Error(r`Recursive \g cannot be used outside the referenced group "${
mode === 'external' ? gRNameOrNum : r`\g<${gRNameOrNum}&R=${gRDepth}>`
}"`);
}
const startPos = groupContentsStartPos.get(gRNameOrNum);
const groupContents = getGroupContents(expression, startPos);
const groupContents = getGroupContents(pattern, startPos);
if (
hasNumberedBackref &&
hasUnescaped(groupContents, r`${namedCapturingDelim}|\((?!\?)`, Context.DEFAULT)
hasUnescaped(groupContents, r`${namedCaptureDelim}|\((?!\?)`, Context.DEFAULT)
) {
throw new Error('Numbered backrefs cannot be used with recursion of capturing groups');
throw new Error(
// When used in `external` mode by transpilers other than Regex+, backrefs might have
// gone through conversion from named to numbered, so avoid a misleading error
`${mode === 'external' ? 'Backrefs' : 'Numbered backrefs'} cannot be used with recursion of capturing groups`
);
}
const groupContentsPre = expression.slice(startPos, match.index);
const groupContentsPost = groupContents.slice(groupContentsPre.length + m.length);
const expansion = makeRecursive(groupContentsPre, groupContentsPost, +gRDepth, true, useEmulationGroups);
const pre = expression.slice(0, startPos);
const post = expression.slice(startPos + groupContents.length);
const groupContentsLeft = pattern.slice(startPos, match.index);
const groupContentsRight = groupContents.slice(groupContentsLeft.length + m.length);
const numAddedHiddenCapturesPreExpansion = addedHiddenCaptures.length;
const reps = +gRDepth - 1;
const expansion = makeRecursive(
groupContentsLeft,
groupContentsRight,
reps,
true,
hiddenCaptures,
addedHiddenCaptures,
numCapturesPassed
);
captureTransfers = mapCaptureTransfers(
captureTransfers,
groupContentsLeft,
reps,
addedHiddenCaptures.length - numAddedHiddenCapturesPreExpansion,
numAddedHiddenCapturesPreExpansion,
numCapturesPassed
);
const pre = pattern.slice(0, startPos);
const post = pattern.slice(startPos + groupContents.length);
// Modify the string we're looping over
expression = `${pre}${expansion}${post}`;
pattern = `${pre}${expansion}${post}`;
// Step forward for the next loop iteration
token.lastIndex += expansion.length - m.length - groupContentsPre.length - groupContentsPost.length;
token.lastIndex += expansion.length - m.length - groupContentsLeft.length - groupContentsRight.length;
openGroups.forEach(g => g.hasRecursedWithin = true);
hasRecursed = true;
} else if (captureName) {
numCaptures++;
// NOTE: Not currently handling *named* emulation groups that already exist in the pattern
groupContentsStartPos.set(String(numCaptures), token.lastIndex);
numCapturesPassed++;
groupContentsStartPos.set(String(numCapturesPassed), token.lastIndex);
groupContentsStartPos.set(captureName, token.lastIndex);
openGroups.push({
num: numCaptures,
num: numCapturesPassed,
name: captureName,
});
} else if (m.startsWith('(')) {
} else if (m[0] === '(') {
const isUnnamedCapture = m === '(';
if (isUnnamedCapture) {
numCaptures++;
groupContentsStartPos.set(
String(numCaptures),
token.lastIndex + (useEmulationGroups ? emulationGroupMarkerLength(expression, token.lastIndex) : 0)
);
numCapturesPassed++;
groupContentsStartPos.set(String(numCapturesPassed), token.lastIndex);
}
openGroups.push(isUnnamedCapture ? {num: numCaptures} : {});
openGroups.push(isUnnamedCapture ? {num: numCapturesPassed} : {});
} else if (m === ')') {
openGroups.pop();
}
@@ -129,7 +187,13 @@ export function recursion(expression, data) {
}
}
return expression;
hiddenCaptures.push(...addedHiddenCaptures);
return {
pattern,
captureTransfers,
hiddenCaptures,
};
}
/**
@@ -147,66 +211,88 @@ function assertMaxInBounds(max) {
}
/**
@param {string} pre
@param {string} post
@param {number} maxDepth
@param {string} left
@param {string} right
@param {number} reps
@param {boolean} isSubpattern
@param {boolean} useEmulationGroups
@param {Array<number>} hiddenCaptures
@param {Array<number>} addedHiddenCaptures
@param {number} numCapturesPassed
@returns {string}
*/
function makeRecursive(pre, post, maxDepth, isSubpattern, useEmulationGroups) {
function makeRecursive(
left,
right,
reps,
isSubpattern,
hiddenCaptures,
addedHiddenCaptures,
numCapturesPassed
) {
const namesInRecursed = new Set();
// Avoid this work if not needed
// Can skip this work if not needed
if (isSubpattern) {
forEachUnescaped(pre + post, namedCapturingDelim, ({groups: {captureName}}) => {
forEachUnescaped(left + right, namedCaptureDelim, ({groups: {captureName}}) => {
namesInRecursed.add(captureName);
}, Context.DEFAULT);
}
const reps = maxDepth - 1;
// Depth 2: 'pre(?:pre(?:)post)post'
// Depth 3: 'pre(?:pre(?:pre(?:)post)post)post'
return `${pre}${
repeatWithDepth(`(?:${pre}`, reps, (isSubpattern ? namesInRecursed : null), 'forward', useEmulationGroups)
const rest = [
reps,
isSubpattern ? namesInRecursed : null,
hiddenCaptures,
addedHiddenCaptures,
numCapturesPassed,
];
// Depth 2: 'left(?:left(?:)right)right'
// Depth 3: 'left(?:left(?:left(?:)right)right)right'
// Empty group in the middle separates tokens and absorbs a following quantifier if present
return `${left}${
repeatWithDepth(`(?:${left}`, 'forward', ...rest)
}(?:)${
repeatWithDepth(`${post})`, reps, (isSubpattern ? namesInRecursed : null), 'backward', useEmulationGroups)
}${post}`;
repeatWithDepth(`${right})`, 'backward', ...rest)
}${right}`;
}
/**
@param {string} expression
@param {string} pattern
@param {'forward' | 'backward'} direction
@param {number} reps
@param {Set<string> | null} namesInRecursed
@param {'forward' | 'backward'} direction
@param {boolean} useEmulationGroups
@param {Array<number>} hiddenCaptures
@param {Array<number>} addedHiddenCaptures
@param {number} numCapturesPassed
@returns {string}
*/
function repeatWithDepth(expression, reps, namesInRecursed, direction, useEmulationGroups) {
function repeatWithDepth(
pattern,
direction,
reps,
namesInRecursed,
hiddenCaptures,
addedHiddenCaptures,
numCapturesPassed
) {
const startNum = 2;
const depthNum = i => direction === 'backward' ? reps - i + startNum - 1 : i + startNum;
const getDepthNum = i => direction === 'forward' ? (i + startNum) : (reps - i + startNum - 1);
let result = '';
for (let i = 0; i < reps; i++) {
const captureNum = depthNum(i);
const depthNum = getDepthNum(i);
result += replaceUnescaped(
expression,
// NOTE: Not currently handling *named* emulation groups that already exist in the pattern
r`${namedCapturingDelim}|\\k<(?<backref>[^>]+)>${
useEmulationGroups ? r`|(?<unnamed>\()(?!\?)(?:${emulationGroupMarkerRe.source})?` : ''
}`,
({0: m, index, groups: {captureName, backref, unnamed}}) => {
pattern,
r`${captureDelim}|\\k<(?<backref>[^>]+)>`,
({0: m, groups: {captureName, unnamed, backref}}) => {
if (backref && namesInRecursed && !namesInRecursed.has(backref)) {
// Don't alter backrefs to groups outside the recursed subpattern
return m;
}
// Only matches unnamed capture delim if `useEmulationGroups`
if (unnamed) {
// Add an emulation group marker, possibly replacing an existing marker (removes any
// transfer prefix)
return `(${emulationGroupMarker}`;
const suffix = `_$${depthNum}`;
if (unnamed || captureName) {
const addedCaptureNum = numCapturesPassed + addedHiddenCaptures.length + 1;
addedHiddenCaptures.push(addedCaptureNum);
incrementIfAtLeast(hiddenCaptures, addedCaptureNum);
return unnamed ? m : `(?<${captureName}${suffix}>`;
}
const suffix = `_$${captureNum}`;
return captureName ?
`(?<${captureName}${suffix}>${useEmulationGroups ? emulationGroupMarker : ''}` :
r`\k<${backref}${suffix}>`;
return r`\k<${backref}${suffix}>`;
},
Context.DEFAULT
);
@@ -214,8 +300,66 @@ function repeatWithDepth(expression, reps, namesInRecursed, direction, useEmulat
return result;
}
function emulationGroupMarkerLength(expression, index) {
emulationGroupMarkerRe.lastIndex = index;
const match = emulationGroupMarkerRe.exec(expression);
return match ? match[0].length : 0;
/**
Updates the array in place by incrementing each value greater than or equal to the threshold.
@param {Array<number>} arr
@param {number} threshold
*/
function incrementIfAtLeast(arr, threshold) {
for (let i = 0; i < arr.length; i++) {
if (arr[i] >= threshold) {
arr[i]++;
}
}
}
/**
@param {Map<number, Array<number>>} captureTransfers
@param {string} left
@param {number} reps
@param {number} numCapturesAddedInExpansion
@param {number} numAddedHiddenCapturesPreExpansion
@param {number} numCapturesPassed
@returns {Map<number, Array<number>>}
*/
function mapCaptureTransfers(captureTransfers, left, reps, numCapturesAddedInExpansion, numAddedHiddenCapturesPreExpansion, numCapturesPassed) {
if (captureTransfers.size && numCapturesAddedInExpansion) {
let numCapturesInLeft = 0;
forEachUnescaped(left, captureDelim, () => numCapturesInLeft++, Context.DEFAULT);
// Is 0 for global recursion
const recursionDelimCaptureNum = numCapturesPassed - numCapturesInLeft + numAddedHiddenCapturesPreExpansion;
const newCaptureTransfers = new Map();
captureTransfers.forEach((from, to) => {
const numCapturesInRight = (numCapturesAddedInExpansion - (numCapturesInLeft * reps)) / reps;
const numCapturesAddedInLeft = numCapturesInLeft * reps;
const newTo = to > (recursionDelimCaptureNum + numCapturesInLeft) ? to + numCapturesAddedInExpansion : to;
const newFrom = [];
for (const f of from) {
// Before the recursed subpattern
if (f <= recursionDelimCaptureNum) {
newFrom.push(f);
// After the recursed subpattern
} else if (f > (recursionDelimCaptureNum + numCapturesInLeft + numCapturesInRight)) {
newFrom.push(f + numCapturesAddedInExpansion);
// Within the recursed subpattern, on the left of the recursion token
} else if (f <= (recursionDelimCaptureNum + numCapturesInLeft)) {
for (let i = 0; i <= reps; i++) {
newFrom.push(f + (numCapturesInLeft * i));
}
// Within the recursed subpattern, on the right of the recursion token
} else {
for (let i = 0; i <= reps; i++) {
newFrom.push(f + numCapturesAddedInLeft + (numCapturesInRight * i));
}
}
}
newCaptureTransfers.set(newTo, newFrom);
});
return newCaptureTransfers;
}
return captureTransfers;
}
export {
recursion,
};