3580 lines
114 KiB
JavaScript
3580 lines
114 KiB
JavaScript
var __defProp = Object.defineProperty;
|
||
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
||
var __getOwnPropNames = Object.getOwnPropertyNames;
|
||
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
||
var __export = (target, all) => {
|
||
for (var name in all)
|
||
__defProp(target, name, { get: all[name], enumerable: true });
|
||
};
|
||
var __copyProps = (to, from, except, desc) => {
|
||
if (from && typeof from === "object" || typeof from === "function") {
|
||
for (let key of __getOwnPropNames(from))
|
||
if (!__hasOwnProp.call(to, key) && key !== except)
|
||
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
||
}
|
||
return to;
|
||
};
|
||
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
||
|
||
// src/index.js
|
||
var index_exports = {};
|
||
__export(index_exports, {
|
||
EmulatedRegExp: () => EmulatedRegExp,
|
||
toDetails: () => toDetails,
|
||
toOnigurumaAst: () => toOnigurumaAst,
|
||
toRegExp: () => toRegExp
|
||
});
|
||
module.exports = __toCommonJS(index_exports);
|
||
|
||
// src/utils.js
|
||
var cp = String.fromCodePoint;
|
||
var r = String.raw;
|
||
var envSupportsFlagGroups = (() => {
|
||
try {
|
||
new RegExp("(?i:)");
|
||
} catch {
|
||
return false;
|
||
}
|
||
return true;
|
||
})();
|
||
var envSupportsFlagV = (() => {
|
||
try {
|
||
new RegExp("", "v");
|
||
} catch {
|
||
return false;
|
||
}
|
||
return true;
|
||
})();
|
||
function getNewCurrentFlags(current, { enable, disable }) {
|
||
return {
|
||
dotAll: !disable?.dotAll && !!(enable?.dotAll || current.dotAll),
|
||
ignoreCase: !disable?.ignoreCase && !!(enable?.ignoreCase || current.ignoreCase)
|
||
};
|
||
}
|
||
function getOrCreate(map, key, defaultValue) {
|
||
if (!map.has(key)) {
|
||
map.set(key, defaultValue);
|
||
}
|
||
return map.get(key);
|
||
}
|
||
function isMinTarget(target, min) {
|
||
return EsVersion[target] >= EsVersion[min];
|
||
}
|
||
function throwIfNot(value, msg) {
|
||
if (!value) {
|
||
throw new Error(msg ?? "Value expected");
|
||
}
|
||
return value;
|
||
}
|
||
|
||
// src/options.js
|
||
var EsVersion = {
|
||
ES2025: 2025,
|
||
ES2024: 2024,
|
||
ES2018: 2018
|
||
};
|
||
var Target = (
|
||
/** @type {const} */
|
||
{
|
||
auto: "auto",
|
||
ES2025: "ES2025",
|
||
ES2024: "ES2024",
|
||
ES2018: "ES2018"
|
||
}
|
||
);
|
||
function getOptions(options) {
|
||
if (options?.target !== void 0 && !Target[options.target]) {
|
||
throw new Error(`Unexpected target "${options.target}"`);
|
||
}
|
||
const opts = {
|
||
// Sets the level of emulation rigor/strictness.
|
||
accuracy: "default",
|
||
// Disables advanced emulation that relies on returning a `RegExp` subclass, resulting in
|
||
// certain patterns not being emulatable.
|
||
avoidSubclass: false,
|
||
// Oniguruma flags; a string with `i`, `m`, `x`, `D`, `S`, `W` in any order (all optional).
|
||
// Oniguruma's `m` is equivalent to JavaScript's `s` (`dotAll`).
|
||
flags: "",
|
||
// Include JavaScript flag `g` (`global`) in the result.
|
||
global: false,
|
||
// Include JavaScript flag `d` (`hasIndices`) in the result.
|
||
hasIndices: false,
|
||
// JavaScript version used for generated regexes. Using `auto` detects the best value based on
|
||
// your environment. Later targets allow faster processing, simpler generated source, and
|
||
// support for additional features.
|
||
target: "auto",
|
||
// Disables optimizations that simplify the pattern when it doesn't change the meaning.
|
||
verbose: false,
|
||
...options,
|
||
// Advanced options that override standard behavior, error checking, and flags when enabled.
|
||
rules: {
|
||
// Useful with TextMate grammars that merge backreferences across patterns.
|
||
allowOrphanBackrefs: false,
|
||
// Use ASCII-based `\b` and `\B`, which increases search performance of generated regexes.
|
||
asciiWordBoundaries: false,
|
||
// Allow unnamed captures and numbered calls (backreferences and subroutines) when using
|
||
// named capture. This is Oniguruma option `ONIG_OPTION_CAPTURE_GROUP`; on by default in
|
||
// `vscode-oniguruma`.
|
||
captureGroup: false,
|
||
// Change the recursion depth limit from Oniguruma's `20` to an integer `2`–`20`.
|
||
recursionLimit: 20,
|
||
// `^` as `\A`; `$` as`\Z`. Improves search performance of generated regexes without changing
|
||
// meaning if searching line by line. This is Oniguruma option `ONIG_OPTION_SINGLELINE`.
|
||
singleline: false,
|
||
...options?.rules
|
||
}
|
||
};
|
||
if (opts.target === "auto") {
|
||
opts.target = envSupportsFlagGroups ? "ES2025" : envSupportsFlagV ? "ES2024" : "ES2018";
|
||
}
|
||
return opts;
|
||
}
|
||
|
||
// src/unicode.js
|
||
var CharsWithoutIgnoreCaseExpansion = /* @__PURE__ */ new Set([
|
||
cp(304),
|
||
// İ
|
||
cp(305)
|
||
// ı
|
||
]);
|
||
function getIgnoreCaseMatchChars(char) {
|
||
if (CharsWithoutIgnoreCaseExpansion.has(char)) {
|
||
return [char];
|
||
}
|
||
const set = /* @__PURE__ */ new Set();
|
||
const lower = char.toLowerCase();
|
||
const upper = lower.toUpperCase();
|
||
const title = LowerToTitleCaseMap.get(lower);
|
||
const altLower = LowerToAlternativeLowerCaseMap.get(lower);
|
||
const altUpper = LowerToAlternativeUpperCaseMap.get(lower);
|
||
if ([...upper].length === 1) {
|
||
set.add(upper);
|
||
}
|
||
altUpper && set.add(altUpper);
|
||
title && set.add(title);
|
||
set.add(lower);
|
||
altLower && set.add(altLower);
|
||
return [...set];
|
||
}
|
||
var JsUnicodeProperties = new Set(
|
||
`C Other
|
||
Cc Control cntrl
|
||
Cf Format
|
||
Cn Unassigned
|
||
Co Private_Use
|
||
Cs Surrogate
|
||
L Letter
|
||
LC Cased_Letter
|
||
Ll Lowercase_Letter
|
||
Lm Modifier_Letter
|
||
Lo Other_Letter
|
||
Lt Titlecase_Letter
|
||
Lu Uppercase_Letter
|
||
M Mark Combining_Mark
|
||
Mc Spacing_Mark
|
||
Me Enclosing_Mark
|
||
Mn Nonspacing_Mark
|
||
N Number
|
||
Nd Decimal_Number digit
|
||
Nl Letter_Number
|
||
No Other_Number
|
||
P Punctuation punct
|
||
Pc Connector_Punctuation
|
||
Pd Dash_Punctuation
|
||
Pe Close_Punctuation
|
||
Pf Final_Punctuation
|
||
Pi Initial_Punctuation
|
||
Po Other_Punctuation
|
||
Ps Open_Punctuation
|
||
S Symbol
|
||
Sc Currency_Symbol
|
||
Sk Modifier_Symbol
|
||
Sm Math_Symbol
|
||
So Other_Symbol
|
||
Z Separator
|
||
Zl Line_Separator
|
||
Zp Paragraph_Separator
|
||
Zs Space_Separator
|
||
ASCII
|
||
ASCII_Hex_Digit AHex
|
||
Alphabetic Alpha
|
||
Any
|
||
Assigned
|
||
Bidi_Control Bidi_C
|
||
Bidi_Mirrored Bidi_M
|
||
Case_Ignorable CI
|
||
Cased
|
||
Changes_When_Casefolded CWCF
|
||
Changes_When_Casemapped CWCM
|
||
Changes_When_Lowercased CWL
|
||
Changes_When_NFKC_Casefolded CWKCF
|
||
Changes_When_Titlecased CWT
|
||
Changes_When_Uppercased CWU
|
||
Dash
|
||
Default_Ignorable_Code_Point DI
|
||
Deprecated Dep
|
||
Diacritic Dia
|
||
Emoji
|
||
Emoji_Component EComp
|
||
Emoji_Modifier EMod
|
||
Emoji_Modifier_Base EBase
|
||
Emoji_Presentation EPres
|
||
Extended_Pictographic ExtPict
|
||
Extender Ext
|
||
Grapheme_Base Gr_Base
|
||
Grapheme_Extend Gr_Ext
|
||
Hex_Digit Hex
|
||
IDS_Binary_Operator IDSB
|
||
IDS_Trinary_Operator IDST
|
||
ID_Continue IDC
|
||
ID_Start IDS
|
||
Ideographic Ideo
|
||
Join_Control Join_C
|
||
Logical_Order_Exception LOE
|
||
Lowercase Lower
|
||
Math
|
||
Noncharacter_Code_Point NChar
|
||
Pattern_Syntax Pat_Syn
|
||
Pattern_White_Space Pat_WS
|
||
Quotation_Mark QMark
|
||
Radical
|
||
Regional_Indicator RI
|
||
Sentence_Terminal STerm
|
||
Soft_Dotted SD
|
||
Terminal_Punctuation Term
|
||
Unified_Ideograph UIdeo
|
||
Uppercase Upper
|
||
Variation_Selector VS
|
||
White_Space space
|
||
XID_Continue XIDC
|
||
XID_Start XIDS`.split(/\s/)
|
||
);
|
||
var JsUnicodePropertiesMap = /* @__PURE__ */ new Map();
|
||
for (const p of JsUnicodeProperties) {
|
||
JsUnicodePropertiesMap.set(slug(p), p);
|
||
}
|
||
var JsUnicodePropertiesOfStrings = /* @__PURE__ */ new Set([
|
||
// ES2024 properties of strings; none are supported by Oniguruma
|
||
"Basic_Emoji",
|
||
"Emoji_Keycap_Sequence",
|
||
"RGI_Emoji",
|
||
"RGI_Emoji_Flag_Sequence",
|
||
"RGI_Emoji_Modifier_Sequence",
|
||
"RGI_Emoji_Tag_Sequence",
|
||
"RGI_Emoji_ZWJ_Sequence"
|
||
]);
|
||
var JsUnicodePropertiesOfStringsMap = /* @__PURE__ */ new Map();
|
||
for (const p of JsUnicodePropertiesOfStrings) {
|
||
JsUnicodePropertiesOfStringsMap.set(slug(p), p);
|
||
}
|
||
var LowerToAlternativeLowerCaseMap = /* @__PURE__ */ new Map([
|
||
["s", cp(383)],
|
||
// s, ſ
|
||
[cp(383), "s"]
|
||
// ſ, s
|
||
]);
|
||
var LowerToAlternativeUpperCaseMap = /* @__PURE__ */ new Map([
|
||
[cp(223), cp(7838)],
|
||
// ß, ẞ
|
||
[cp(107), cp(8490)],
|
||
// k, K (Kelvin)
|
||
[cp(229), cp(8491)],
|
||
// å, Å (Angstrom)
|
||
[cp(969), cp(8486)]
|
||
// ω, Ω (Ohm)
|
||
]);
|
||
var LowerToTitleCaseMap = new Map([
|
||
titleEntry(453),
|
||
titleEntry(456),
|
||
titleEntry(459),
|
||
titleEntry(498),
|
||
...titleRange(8072, 8079),
|
||
...titleRange(8088, 8095),
|
||
...titleRange(8104, 8111),
|
||
titleEntry(8124),
|
||
titleEntry(8140),
|
||
titleEntry(8188)
|
||
]);
|
||
var PosixClassesMap = /* @__PURE__ */ new Map([
|
||
["alnum", r`[\p{Alpha}\p{Nd}]`],
|
||
["alpha", r`\p{Alpha}`],
|
||
["ascii", r`\p{ASCII}`],
|
||
["blank", r`[\p{Zs}\t]`],
|
||
["cntrl", r`\p{cntrl}`],
|
||
["digit", r`\p{Nd}`],
|
||
["graph", r`[\P{space}&&\P{cntrl}&&\P{Cn}&&\P{Cs}]`],
|
||
["lower", r`\p{Lower}`],
|
||
["print", r`[[\P{space}&&\P{cntrl}&&\P{Cn}&&\P{Cs}]\p{Zs}]`],
|
||
["punct", r`[\p{P}\p{S}]`],
|
||
// New value from Oniguruma 6.9.9
|
||
["space", r`\p{space}`],
|
||
["upper", r`\p{Upper}`],
|
||
["word", r`[\p{Alpha}\p{M}\p{Nd}\p{Pc}]`],
|
||
["xdigit", r`\p{AHex}`]
|
||
]);
|
||
var PosixProperties = /* @__PURE__ */ new Set([
|
||
"alnum",
|
||
"blank",
|
||
"graph",
|
||
"print",
|
||
"word",
|
||
"xdigit"
|
||
// The following are available with the same name in JS (see `JsUnicodeProperties`), so can be
|
||
// handled as standard Unicode properties
|
||
// 'alpha', // (JS: Alpha)
|
||
// 'ascii', // (JS: ASCII)
|
||
// 'cntrl', // (JS: cntrl)
|
||
// 'digit', // (JS: digit)
|
||
// 'lower', // (JS: Lower)
|
||
// 'punct', // (JS: punct)
|
||
// 'space', // (JS: space)
|
||
// 'upper', // (JS: Upper)
|
||
]);
|
||
function range(start, end) {
|
||
const range2 = [];
|
||
for (let i = start; i <= end; i++) {
|
||
range2.push(i);
|
||
}
|
||
return range2;
|
||
}
|
||
function slug(name) {
|
||
return name.replace(/[- _]+/g, "").toLowerCase();
|
||
}
|
||
function titleEntry(codePoint) {
|
||
const char = cp(codePoint);
|
||
return [char.toLowerCase(), char];
|
||
}
|
||
function titleRange(start, end) {
|
||
return range(start, end).map((codePoint) => titleEntry(codePoint));
|
||
}
|
||
var UnicodePropertiesWithSpecificCase = /* @__PURE__ */ new Set([
|
||
"Lower",
|
||
"Lowercase",
|
||
"Upper",
|
||
"Uppercase",
|
||
"Ll",
|
||
"Lowercase_Letter",
|
||
"Lt",
|
||
"Titlecase_Letter",
|
||
"Lu",
|
||
"Uppercase_Letter"
|
||
// The `Changes_When_*` properties (and their aliases) could be included, but they're very rare.
|
||
// Some other properties include a handful of chars with specific cases only, but these chars are
|
||
// generally extreme edge cases and using such properties case insensitively generally produces
|
||
// undesired behavior anyway
|
||
]);
|
||
|
||
// src/tokenize.js
|
||
var TokenTypes = (
|
||
/** @type {const} */
|
||
{
|
||
Alternator: "Alternator",
|
||
Assertion: "Assertion",
|
||
Backreference: "Backreference",
|
||
Character: "Character",
|
||
CharacterClassClose: "CharacterClassClose",
|
||
CharacterClassHyphen: "CharacterClassHyphen",
|
||
CharacterClassIntersector: "CharacterClassIntersector",
|
||
CharacterClassOpen: "CharacterClassOpen",
|
||
CharacterSet: "CharacterSet",
|
||
Directive: "Directive",
|
||
GroupClose: "GroupClose",
|
||
GroupOpen: "GroupOpen",
|
||
Subroutine: "Subroutine",
|
||
Quantifier: "Quantifier",
|
||
// These aren't allowed in char classes, so they aren't equivalent to JS `[\q{}]`
|
||
VariableLengthCharacterSet: "VariableLengthCharacterSet",
|
||
// Intermediate representation not included in results
|
||
EscapedNumber: "EscapedNumber"
|
||
}
|
||
);
|
||
var TokenCharacterSetKinds = {
|
||
any: "any",
|
||
digit: "digit",
|
||
dot: "dot",
|
||
hex: "hex",
|
||
non_newline: "non_newline",
|
||
posix: "posix",
|
||
property: "property",
|
||
space: "space",
|
||
word: "word"
|
||
};
|
||
var TokenDirectiveKinds = {
|
||
flags: "flags",
|
||
keep: "keep"
|
||
};
|
||
var TokenGroupKinds = {
|
||
absent_repeater: "absent_repeater",
|
||
atomic: "atomic",
|
||
capturing: "capturing",
|
||
group: "group",
|
||
lookahead: "lookahead",
|
||
lookbehind: "lookbehind"
|
||
};
|
||
var EscapeCharCodes = /* @__PURE__ */ new Map([
|
||
["a", 7],
|
||
// alert/bell (Not available in JS)
|
||
["b", 8],
|
||
// backspace (only in char classes)
|
||
["e", 27],
|
||
// escape (Not available in JS)
|
||
["f", 12],
|
||
// form feed
|
||
["n", 10],
|
||
// line feed
|
||
["r", 13],
|
||
// carriage return
|
||
["t", 9],
|
||
// horizontal tab
|
||
["v", 11]
|
||
// vertical tab
|
||
]);
|
||
var charClassOpenPattern = r`\[\^?`;
|
||
var sharedEscapesPattern = `${// Control char
|
||
"c.? | C(?:-.?)?"}|${// Unicode property; Onig considers `\p` an identity escape, but e.g. `\p{`, `\p{ ^L}`, and
|
||
// `\p{gc=L}` are invalid
|
||
r`[pP]\{(?:\^?[-\x20_]*[A-Za-z][-\x20\w]*\})?`}|${// Hex encoded byte sequence; attempt match before other `\xNN` hex char
|
||
r`x[89A-Fa-f]\p{AHex}(?:\\x[89A-Fa-f]\p{AHex})*`}|${// Hex char
|
||
r`u(?:\p{AHex}{4})? | x\{[^\}]*\}? | x\p{AHex}{0,2}`}|${// Enclosed octal code point
|
||
r`o\{[^\}]*\}?`}|${// Escaped number
|
||
r`\d{1,3}`}`;
|
||
var quantifierRe = /[?*+][?+]?|\{(?:\d+(?:,\d*)?|,\d+)\}\??/;
|
||
var tokenRe = new RegExp(r`
|
||
\\ (?:
|
||
${sharedEscapesPattern}
|
||
| [gk]<[^>]*>?
|
||
| [gk]'[^']*'?
|
||
| .
|
||
)
|
||
| \( (?: \? (?:
|
||
[:=!>(]
|
||
| <[=!]
|
||
| <[^>]*>
|
||
| '[^']*'
|
||
| ~\|?
|
||
| # (?:[^)\\] | \\.?)*
|
||
| [imx\-]+[:)]
|
||
)?)?
|
||
| ${quantifierRe.source}
|
||
| ${charClassOpenPattern}
|
||
| .
|
||
`.replace(/\s+/g, ""), "gsu");
|
||
var charClassTokenRe = new RegExp(r`
|
||
\\ (?:
|
||
${sharedEscapesPattern}
|
||
| .
|
||
)
|
||
| \[:[^:]*:\]
|
||
| ${charClassOpenPattern}
|
||
| &&
|
||
| .
|
||
`.replace(/\s+/g, ""), "gsu");
|
||
function tokenize(pattern, flags = "", rules) {
|
||
rules = {
|
||
// `ONIG_OPTION_CAPTURE_GROUP`
|
||
captureGroup: false,
|
||
// `ONIG_OPTION_SINGLELINE`
|
||
singleline: false,
|
||
...rules
|
||
};
|
||
if (typeof pattern !== "string") {
|
||
throw new Error("String expected as pattern");
|
||
}
|
||
if (!/^[imxDSW]*$/.test(flags)) {
|
||
throw new Error(`Flags "${flags}" includes unsupported value`);
|
||
}
|
||
const extended = flags.includes("x");
|
||
const xStack = [extended];
|
||
const context = {
|
||
captureGroup: rules.captureGroup,
|
||
getCurrentModX: () => xStack.at(-1),
|
||
numOpenGroups: 0,
|
||
popModX() {
|
||
xStack.pop();
|
||
},
|
||
pushModX(isXOn) {
|
||
xStack.push(isXOn);
|
||
},
|
||
replaceCurrentModX(isXOn) {
|
||
xStack[xStack.length - 1] = isXOn;
|
||
},
|
||
singleline: rules.singleline
|
||
};
|
||
let tokens = [];
|
||
let match;
|
||
tokenRe.lastIndex = 0;
|
||
while (match = tokenRe.exec(pattern)) {
|
||
const result = getTokenWithDetails(context, pattern, match[0], tokenRe.lastIndex);
|
||
if (result.tokens) {
|
||
tokens.push(...result.tokens);
|
||
} else if (result.token) {
|
||
tokens.push(result.token);
|
||
}
|
||
if (result.lastIndex !== void 0) {
|
||
tokenRe.lastIndex = result.lastIndex;
|
||
}
|
||
}
|
||
const potentialUnnamedCaptureTokens = [];
|
||
let numNamedAndOptInUnnamedCaptures = 0;
|
||
tokens.forEach((t) => {
|
||
if (t.type === TokenTypes.GroupOpen) {
|
||
if (t.kind === TokenGroupKinds.capturing) {
|
||
t.number = ++numNamedAndOptInUnnamedCaptures;
|
||
} else if (t.raw === "(") {
|
||
potentialUnnamedCaptureTokens.push(t);
|
||
}
|
||
}
|
||
});
|
||
if (!numNamedAndOptInUnnamedCaptures) {
|
||
potentialUnnamedCaptureTokens.forEach((t, i) => {
|
||
t.kind = TokenGroupKinds.capturing;
|
||
t.number = i + 1;
|
||
});
|
||
}
|
||
const numCaptures = numNamedAndOptInUnnamedCaptures || potentialUnnamedCaptureTokens.length;
|
||
tokens = tokens.map(
|
||
(t) => t.type === TokenTypes.EscapedNumber ? splitEscapedNumToken(t, numCaptures) : t
|
||
).flat();
|
||
return {
|
||
tokens,
|
||
flags: {
|
||
ignoreCase: flags.includes("i"),
|
||
// Flag m is called `multiline` in Onig, but that has a different meaning in JS. Onig flag m
|
||
// is equivalent to JS flag s
|
||
dotAll: flags.includes("m"),
|
||
// Flag x is fully handled during tokenization
|
||
extended,
|
||
// Flags D, S, W are currently only supported as top-level flags
|
||
digitIsAscii: flags.includes("D"),
|
||
spaceIsAscii: flags.includes("S"),
|
||
wordIsAscii: flags.includes("W")
|
||
},
|
||
rules
|
||
};
|
||
}
|
||
function getTokenWithDetails(context, pattern, m, lastIndex) {
|
||
const [m0, m1, m2] = m;
|
||
if (m0 === "[") {
|
||
const result = getAllTokensForCharClass(pattern, m, lastIndex);
|
||
return {
|
||
// Array of all of the char class's tokens
|
||
tokens: result.tokens,
|
||
// Jump forward to the end of the char class
|
||
lastIndex: result.lastIndex
|
||
};
|
||
}
|
||
if (m0 === "\\") {
|
||
if ("AbBGzZ".includes(m1)) {
|
||
return {
|
||
token: createToken(TokenTypes.Assertion, m, {
|
||
kind: m
|
||
})
|
||
};
|
||
}
|
||
if (/^\\g[<']/.test(m)) {
|
||
if (!/^\\g(?:<[^>]+>|'[^']+')$/.test(m)) {
|
||
throw new Error(`Invalid group name "${m}"`);
|
||
}
|
||
return {
|
||
token: createToken(TokenTypes.Subroutine, m)
|
||
};
|
||
}
|
||
if (/^\\k[<']/.test(m)) {
|
||
if (!/^\\k(?:<[^>]+>|'[^']+')$/.test(m)) {
|
||
throw new Error(`Invalid group name "${m}"`);
|
||
}
|
||
return {
|
||
token: createToken(TokenTypes.Backreference, m)
|
||
};
|
||
}
|
||
if (m1 === "K") {
|
||
return {
|
||
token: createToken(TokenTypes.Directive, m, {
|
||
kind: TokenDirectiveKinds.keep
|
||
})
|
||
};
|
||
}
|
||
if (m1 === "N") {
|
||
return {
|
||
token: createToken(TokenTypes.CharacterSet, m, {
|
||
kind: TokenCharacterSetKinds.non_newline
|
||
})
|
||
};
|
||
}
|
||
if (m1 === "O") {
|
||
return {
|
||
token: createToken(TokenTypes.CharacterSet, m, {
|
||
kind: TokenCharacterSetKinds.any
|
||
})
|
||
};
|
||
}
|
||
if ("RX".includes(m1)) {
|
||
return {
|
||
token: createToken(TokenTypes.VariableLengthCharacterSet, m, {
|
||
kind: m
|
||
})
|
||
};
|
||
}
|
||
if ("yY".includes(m1)) {
|
||
throw new Error(`Unsupported grapheme boundary "${m}"`);
|
||
}
|
||
const result = createTokenForSharedEscape(m, { inCharClass: false });
|
||
return Array.isArray(result) ? { tokens: result } : { token: result };
|
||
}
|
||
if (m0 === "(") {
|
||
if (m2 === "#") {
|
||
if (pattern[lastIndex] !== ")") {
|
||
throw new Error('Unclosed comment group "(?#"');
|
||
}
|
||
return {
|
||
lastIndex: lastIndex + 1
|
||
};
|
||
}
|
||
if ("-imx".includes(m2)) {
|
||
return {
|
||
token: createTokenForFlagMod(m, context)
|
||
};
|
||
}
|
||
context.pushModX(context.getCurrentModX());
|
||
context.numOpenGroups++;
|
||
if (
|
||
// Unnamed capture if no named captures present and `captureGroup` not enabled, else
|
||
// noncapturing group
|
||
m === "(" && !context.captureGroup || // Noncapturing group
|
||
m === "(?:"
|
||
) {
|
||
return {
|
||
token: createToken(TokenTypes.GroupOpen, m, {
|
||
// For `(`, will later change to `capturing` and add `number` prop if no named captures
|
||
kind: TokenGroupKinds.group
|
||
})
|
||
};
|
||
}
|
||
if (m === "(?>") {
|
||
return {
|
||
token: createToken(TokenTypes.GroupOpen, m, {
|
||
kind: TokenGroupKinds.atomic
|
||
})
|
||
};
|
||
}
|
||
if (m === "(?=" || m === "(?!" || m === "(?<=" || m === "(?<!") {
|
||
return {
|
||
token: createToken(TokenTypes.GroupOpen, m, {
|
||
kind: m2 === "<" ? TokenGroupKinds.lookbehind : TokenGroupKinds.lookahead,
|
||
negate: m.endsWith("!")
|
||
})
|
||
};
|
||
}
|
||
if (m2 === "<" || m2 === "'" || m === "(" && context.captureGroup) {
|
||
const token2 = createToken(TokenTypes.GroupOpen, m, {
|
||
kind: TokenGroupKinds.capturing
|
||
// Will add `number` in a second pass
|
||
});
|
||
if (m !== "(") {
|
||
token2.name = m.slice(3, -1);
|
||
}
|
||
return {
|
||
token: token2
|
||
};
|
||
}
|
||
if (m2 === "~") {
|
||
if (m === "(?~|") {
|
||
throw new Error(`Unsupported absent function kind "${m}"`);
|
||
}
|
||
return {
|
||
token: createToken(TokenTypes.GroupOpen, m, {
|
||
kind: TokenGroupKinds.absent_repeater
|
||
})
|
||
};
|
||
}
|
||
if (m2 === "(") {
|
||
throw new Error(`Unsupported conditional "${m}"`);
|
||
}
|
||
if (m === "(?") {
|
||
throw new Error("Invalid group");
|
||
}
|
||
throw new Error(`Unexpected group "${m}"`);
|
||
}
|
||
if (m === ")") {
|
||
context.popModX();
|
||
context.numOpenGroups--;
|
||
if (context.numOpenGroups < 0) {
|
||
throw new Error('Unmatched ")"');
|
||
}
|
||
return {
|
||
token: createToken(TokenTypes.GroupClose, m)
|
||
};
|
||
}
|
||
if (m === "#" && context.getCurrentModX()) {
|
||
const end = pattern.indexOf("\n", lastIndex);
|
||
return {
|
||
// Jump forward to the end of the comment
|
||
lastIndex: end === -1 ? pattern.length : end
|
||
};
|
||
}
|
||
if (/^\s$/.test(m) && context.getCurrentModX()) {
|
||
const re = /\s+/y;
|
||
re.lastIndex = lastIndex;
|
||
const rest = re.exec(pattern);
|
||
return {
|
||
// Jump forward to the end of the whitespace
|
||
lastIndex: rest ? re.lastIndex : lastIndex
|
||
};
|
||
}
|
||
if (m === ".") {
|
||
return {
|
||
token: createToken(TokenTypes.CharacterSet, m, {
|
||
kind: TokenCharacterSetKinds.dot
|
||
})
|
||
};
|
||
}
|
||
if (m === "^" || m === "$") {
|
||
const kind = context.singleline ? {
|
||
"^": r`\A`,
|
||
"$": r`\Z`
|
||
}[m] : m;
|
||
return {
|
||
token: createToken(TokenTypes.Assertion, m, {
|
||
kind
|
||
})
|
||
};
|
||
}
|
||
if (m === "|") {
|
||
return {
|
||
token: createToken(TokenTypes.Alternator, m)
|
||
};
|
||
}
|
||
if (quantifierRe.test(m)) {
|
||
return {
|
||
token: createTokenForQuantifier(m)
|
||
};
|
||
}
|
||
assertSingleCodePoint(m);
|
||
return {
|
||
token: createToken(TokenTypes.Character, m, {
|
||
value: m.codePointAt(0)
|
||
})
|
||
};
|
||
}
|
||
function getAllTokensForCharClass(pattern, opener, lastIndex) {
|
||
const tokens = [createToken(TokenTypes.CharacterClassOpen, opener, {
|
||
negate: opener[1] === "^"
|
||
})];
|
||
let numCharClassesOpen = 1;
|
||
let match;
|
||
charClassTokenRe.lastIndex = lastIndex;
|
||
while (match = charClassTokenRe.exec(pattern)) {
|
||
const m = match[0];
|
||
if (m[0] === "[" && m[1] !== ":") {
|
||
numCharClassesOpen++;
|
||
tokens.push(createToken(TokenTypes.CharacterClassOpen, m, {
|
||
negate: m[1] === "^"
|
||
}));
|
||
} else if (m === "]") {
|
||
if (tokens.at(-1).type === TokenTypes.CharacterClassOpen) {
|
||
tokens.push(createToken(TokenTypes.Character, m, {
|
||
value: 93
|
||
}));
|
||
} else {
|
||
numCharClassesOpen--;
|
||
tokens.push(createToken(TokenTypes.CharacterClassClose, m));
|
||
if (!numCharClassesOpen) {
|
||
break;
|
||
}
|
||
}
|
||
} else {
|
||
const result = createTokenForAnyTokenWithinCharClass(m);
|
||
if (Array.isArray(result)) {
|
||
tokens.push(...result);
|
||
} else {
|
||
tokens.push(result);
|
||
}
|
||
}
|
||
}
|
||
return {
|
||
tokens,
|
||
lastIndex: charClassTokenRe.lastIndex || pattern.length
|
||
};
|
||
}
|
||
function createTokenForAnyTokenWithinCharClass(raw) {
|
||
if (raw[0] === "\\") {
|
||
return createTokenForSharedEscape(raw, { inCharClass: true });
|
||
}
|
||
if (raw[0] === "[") {
|
||
const posix = /\[:(?<negate>\^?)(?<name>[a-z]+):\]/.exec(raw);
|
||
if (!posix || !PosixClassesMap.get(posix.groups.name)) {
|
||
throw new Error(`Invalid POSIX class "${raw}"`);
|
||
}
|
||
return createToken(TokenTypes.CharacterSet, raw, {
|
||
kind: TokenCharacterSetKinds.posix,
|
||
negate: !!posix.groups.negate,
|
||
value: posix.groups.name
|
||
});
|
||
}
|
||
if (raw === "-") {
|
||
return createToken(TokenTypes.CharacterClassHyphen, raw);
|
||
}
|
||
if (raw === "&&") {
|
||
return createToken(TokenTypes.CharacterClassIntersector, raw);
|
||
}
|
||
assertSingleCodePoint(raw);
|
||
return createToken(TokenTypes.Character, raw, {
|
||
value: raw.codePointAt(0)
|
||
});
|
||
}
|
||
function createTokenForSharedEscape(raw, { inCharClass }) {
|
||
const char1 = raw[1];
|
||
if (char1 === "c" || char1 === "C") {
|
||
return createTokenForControlChar(raw);
|
||
}
|
||
if ("dDhHsSwW".includes(char1)) {
|
||
return createTokenForShorthandCharClass(raw);
|
||
}
|
||
if (raw.startsWith(r`\o{`)) {
|
||
throw new Error(`Incomplete, invalid, or unsupported octal code point "${raw}"`);
|
||
}
|
||
if (/^\\[pP]\{/.test(raw)) {
|
||
if (raw.length === 3) {
|
||
throw new Error(`Incomplete or invalid Unicode property "${raw}"`);
|
||
}
|
||
return createTokenForUnicodeProperty(raw);
|
||
}
|
||
if (/^\\x[89A-Fa-f]\p{AHex}/u.test(raw)) {
|
||
try {
|
||
const bytes = raw.split(/\\x/).slice(1).map((hex) => parseInt(hex, 16));
|
||
const decoded = new TextDecoder("utf-8", {
|
||
ignoreBOM: true,
|
||
fatal: true
|
||
}).decode(new Uint8Array(bytes));
|
||
const encoder = new TextEncoder();
|
||
const tokens = [...decoded].map((char) => {
|
||
const raw2 = [...encoder.encode(char)].map((byte) => `\\x${byte.toString(16)}`).join("");
|
||
return createToken(TokenTypes.Character, raw2, {
|
||
value: char.codePointAt(0)
|
||
});
|
||
});
|
||
return tokens;
|
||
} catch {
|
||
throw new Error(`Multibyte code "${raw}" incomplete or invalid in Oniguruma`);
|
||
}
|
||
}
|
||
if (char1 === "u" || char1 === "x") {
|
||
return createToken(TokenTypes.Character, raw, {
|
||
value: getValidatedHexCharCode(raw)
|
||
});
|
||
}
|
||
if (EscapeCharCodes.has(char1)) {
|
||
return createToken(TokenTypes.Character, raw, {
|
||
value: EscapeCharCodes.get(char1)
|
||
});
|
||
}
|
||
if (/\d/.test(char1)) {
|
||
return createToken(TokenTypes.EscapedNumber, raw, {
|
||
inCharClass
|
||
});
|
||
}
|
||
if (raw === "\\") {
|
||
throw new Error(r`Incomplete escape "\"`);
|
||
}
|
||
if (char1 === "M") {
|
||
throw new Error(`Unsupported meta "${raw}"`);
|
||
}
|
||
if ([...raw].length === 2) {
|
||
return createToken(TokenTypes.Character, raw, {
|
||
value: raw.codePointAt(1)
|
||
});
|
||
}
|
||
throw new Error(`Unexpected escape "${raw}"`);
|
||
}
|
||
function createToken(type, raw, data) {
|
||
return {
|
||
type,
|
||
raw,
|
||
...data
|
||
};
|
||
}
|
||
function createTokenForControlChar(raw) {
|
||
const char = raw[1] === "c" ? raw[2] : raw[3];
|
||
if (!char || !/[A-Za-z]/.test(char)) {
|
||
throw new Error(`Unsupported control character "${raw}"`);
|
||
}
|
||
return createToken(TokenTypes.Character, raw, {
|
||
value: char.toUpperCase().codePointAt(0) - 64
|
||
});
|
||
}
|
||
function createTokenForFlagMod(raw, context) {
|
||
let { on, off } = /^\(\?(?<on>[imx]*)(?:-(?<off>[imx\-]*))?/.exec(raw).groups;
|
||
off ??= "";
|
||
const isXOn = (context.getCurrentModX() || on.includes("x")) && !off.includes("x");
|
||
const enabledFlags = getFlagPropsForToken(on);
|
||
const disabledFlags = getFlagPropsForToken(off);
|
||
const flagChanges = {};
|
||
enabledFlags && (flagChanges.enable = enabledFlags);
|
||
disabledFlags && (flagChanges.disable = disabledFlags);
|
||
if (raw.endsWith(")")) {
|
||
context.replaceCurrentModX(isXOn);
|
||
return createToken(TokenTypes.Directive, raw, {
|
||
kind: TokenDirectiveKinds.flags,
|
||
flags: flagChanges
|
||
});
|
||
}
|
||
if (raw.endsWith(":")) {
|
||
context.pushModX(isXOn);
|
||
context.numOpenGroups++;
|
||
const token2 = createToken(TokenTypes.GroupOpen, raw, {
|
||
kind: TokenGroupKinds.group
|
||
});
|
||
if (enabledFlags || disabledFlags) {
|
||
token2.flags = flagChanges;
|
||
}
|
||
return token2;
|
||
}
|
||
throw new Error(`Unexpected flag modifier "${raw}"`);
|
||
}
|
||
function createTokenForQuantifier(raw) {
|
||
const data = {};
|
||
if (raw[0] === "{") {
|
||
const { min, max } = /^\{(?<min>\d*)(?:,(?<max>\d*))?/.exec(raw).groups;
|
||
const limit = 1e5;
|
||
if (+min > limit || +max > limit) {
|
||
throw new Error("Quantifier value unsupported in Oniguruma");
|
||
}
|
||
data.min = +min;
|
||
data.max = max === void 0 ? +min : max === "" ? Infinity : +max;
|
||
data.greedy = !raw.endsWith("?");
|
||
data.possessive = false;
|
||
} else {
|
||
data.min = raw[0] === "+" ? 1 : 0;
|
||
data.max = raw[0] === "?" ? 1 : Infinity;
|
||
data.greedy = raw[1] !== "?";
|
||
data.possessive = raw[1] === "+";
|
||
}
|
||
return createToken(TokenTypes.Quantifier, raw, data);
|
||
}
|
||
function createTokenForShorthandCharClass(raw) {
|
||
const lower = raw[1].toLowerCase();
|
||
return createToken(TokenTypes.CharacterSet, raw, {
|
||
kind: {
|
||
"d": TokenCharacterSetKinds.digit,
|
||
"h": TokenCharacterSetKinds.hex,
|
||
// Not available in JS
|
||
"s": TokenCharacterSetKinds.space,
|
||
// Different than JS
|
||
"w": TokenCharacterSetKinds.word
|
||
}[lower],
|
||
negate: raw[1] !== lower
|
||
});
|
||
}
|
||
function createTokenForUnicodeProperty(raw) {
|
||
const { p, neg, value } = /^\\(?<p>[pP])\{(?<neg>\^?)(?<value>[^}]+)/.exec(raw).groups;
|
||
const negate = p === "P" && !neg || p === "p" && !!neg;
|
||
return createToken(TokenTypes.CharacterSet, raw, {
|
||
kind: TokenCharacterSetKinds.property,
|
||
negate,
|
||
value
|
||
});
|
||
}
|
||
function getFlagPropsForToken(flags) {
|
||
const obj = {};
|
||
if (flags.includes("i")) {
|
||
obj.ignoreCase = true;
|
||
}
|
||
if (flags.includes("m")) {
|
||
obj.dotAll = true;
|
||
}
|
||
if (flags.includes("x")) {
|
||
obj.extended = true;
|
||
}
|
||
return Object.keys(obj).length ? obj : null;
|
||
}
|
||
function getValidatedHexCharCode(raw) {
|
||
if (/^(?:\\u(?!\p{AHex}{4})|\\x(?!\p{AHex}{1,2}|\{\p{AHex}{1,8}\}))/u.test(raw)) {
|
||
throw new Error(`Incomplete or invalid escape "${raw}"`);
|
||
}
|
||
const hex = raw[2] === "{" ? /^\\x\{\s*(?<hex>\p{AHex}+)/u.exec(raw).groups.hex : raw.slice(2);
|
||
const dec = parseInt(hex, 16);
|
||
return dec;
|
||
}
|
||
function splitEscapedNumToken(token2, numCaptures) {
|
||
const { raw, inCharClass } = token2;
|
||
const value = raw.slice(1);
|
||
if (!inCharClass && // Single digit 1-9 outside a char class is always treated as a backref
|
||
(value !== "0" && value.length === 1 || // Leading 0 makes it octal; backrefs can't include following literal digits
|
||
value[0] !== "0" && +value <= numCaptures)) {
|
||
return [createToken(TokenTypes.Backreference, raw)];
|
||
}
|
||
const tokens = [];
|
||
const matches = value.match(/^[0-7]+|\d/g);
|
||
for (let i = 0; i < matches.length; i++) {
|
||
const m = matches[i];
|
||
let value2;
|
||
if (i === 0 && m !== "8" && m !== "9") {
|
||
value2 = parseInt(m, 8);
|
||
if (value2 > 127) {
|
||
throw new Error(r`Octal encoded byte above 177 unsupported "${raw}"`);
|
||
}
|
||
} else {
|
||
value2 = m.codePointAt(0);
|
||
}
|
||
tokens.push(createToken(TokenTypes.Character, (i === 0 ? "\\" : "") + m, {
|
||
value: value2
|
||
}));
|
||
}
|
||
return tokens;
|
||
}
|
||
function assertSingleCodePoint(raw) {
|
||
if ([...raw].length !== 1) {
|
||
throw new Error(`Expected "${raw}" to be a single code point`);
|
||
}
|
||
}
|
||
|
||
// src/utils-ast.js
|
||
function hasOnlyChild({ alternatives }, kidFn) {
|
||
return alternatives.length === 1 && alternatives[0].elements.length === 1 && (!kidFn || kidFn(alternatives[0].elements[0]));
|
||
}
|
||
function isAlwaysZeroLength({ type }) {
|
||
return type === AstTypes.Assertion || type === AstTypes.Directive;
|
||
}
|
||
function isAlwaysNonZeroLength(node) {
|
||
const types = [
|
||
AstTypes.Character,
|
||
AstTypes.CharacterClass,
|
||
AstTypes.CharacterSet
|
||
];
|
||
return types.includes(node.type) || node.type === AstTypes.Quantifier && node.min && types.includes(node.element.type);
|
||
}
|
||
function isConsumptiveGroup({ type }) {
|
||
return type === AstTypes.CapturingGroup || type === AstTypes.Group;
|
||
}
|
||
function isLookaround({ type, kind }) {
|
||
return type === AstTypes.Assertion && (kind === AstAssertionKinds.lookahead || kind === AstAssertionKinds.lookbehind);
|
||
}
|
||
|
||
// src/traverse.js
|
||
function traverse(path, state, visitor) {
|
||
let ast = path.node;
|
||
while (ast.parent) {
|
||
ast = ast.parent;
|
||
}
|
||
function traverseArray(array, parent) {
|
||
for (let i = 0; i < array.length; i++) {
|
||
const keyShift = traverseNode(array[i], parent, i, array);
|
||
i = Math.max(-1, i + keyShift);
|
||
}
|
||
}
|
||
function traverseNode(node, parent = null, key = null, container = null) {
|
||
let keyShift = 0;
|
||
let skipTraversingKidsOfPath = false;
|
||
const path2 = {
|
||
node,
|
||
parent,
|
||
key,
|
||
container,
|
||
ast,
|
||
remove() {
|
||
throwIfNot(container, "Container expected").splice(Math.max(0, key + keyShift), 1);
|
||
keyShift -= 1;
|
||
},
|
||
removeAllNextSiblings() {
|
||
return throwIfNot(container, "Container expected").splice(key + 1);
|
||
},
|
||
removeAllPrevSiblings() {
|
||
const shifted = key + keyShift;
|
||
keyShift -= shifted;
|
||
return throwIfNot(container, "Container expected").splice(0, Math.max(0, shifted));
|
||
},
|
||
replaceWith(newNode) {
|
||
setParent(newNode, parent);
|
||
if (container) {
|
||
container[Math.max(0, key + keyShift)] = newNode;
|
||
} else {
|
||
parent[key] = newNode;
|
||
}
|
||
},
|
||
skip() {
|
||
skipTraversingKidsOfPath = true;
|
||
}
|
||
};
|
||
const visitorKey = getAstTypeAliases(node).find((key2) => !!visitor[key2]);
|
||
const methods = visitorKey && visitor[visitorKey];
|
||
const enterFn = typeof methods === "function" ? methods : methods?.enter;
|
||
const exitFn = methods?.exit;
|
||
enterFn?.(path2, state);
|
||
if (!skipTraversingKidsOfPath) {
|
||
switch (node.type) {
|
||
case AstTypes.Regex:
|
||
traverseNode(node.pattern, node, "pattern");
|
||
traverseNode(node.flags, node, "flags");
|
||
break;
|
||
case AstTypes.Alternative:
|
||
case AstTypes.CharacterClass:
|
||
traverseArray(node.elements, node);
|
||
break;
|
||
case AstTypes.Assertion:
|
||
if (isLookaround(node)) {
|
||
traverseArray(node.alternatives, node);
|
||
}
|
||
break;
|
||
case AstTypes.Backreference:
|
||
case AstTypes.Character:
|
||
case AstTypes.CharacterSet:
|
||
case AstTypes.Directive:
|
||
case AstTypes.Flags:
|
||
case AstTypes.Recursion:
|
||
case AstTypes.Subroutine:
|
||
case AstTypes.VariableLengthCharacterSet:
|
||
break;
|
||
case AstTypes.AbsentFunction:
|
||
case AstTypes.CapturingGroup:
|
||
case AstTypes.Group:
|
||
case AstTypes.Pattern:
|
||
traverseArray(node.alternatives, node);
|
||
break;
|
||
case AstTypes.CharacterClassIntersection:
|
||
traverseArray(node.classes, node);
|
||
break;
|
||
case AstTypes.CharacterClassRange:
|
||
traverseNode(node.min, node, "min");
|
||
traverseNode(node.max, node, "max");
|
||
break;
|
||
case AstTypes.Quantifier:
|
||
traverseNode(node.element, node, "element");
|
||
break;
|
||
default:
|
||
throw new Error(`Unexpected node type "${node.type}"`);
|
||
}
|
||
}
|
||
exitFn?.(path2, state);
|
||
return keyShift;
|
||
}
|
||
traverseNode(path.node, path.parent, path.key, path.container);
|
||
}
|
||
var AstTypeAliases = {
|
||
AnyGroup: "AnyGroup",
|
||
AnyNode: "AnyNode"
|
||
};
|
||
function getAstTypeAliases(node) {
|
||
const types = [AstTypeAliases.AnyNode];
|
||
if (isConsumptiveGroup(node) || isLookaround(node)) {
|
||
types.push(AstTypeAliases.AnyGroup);
|
||
}
|
||
types.push(node.type);
|
||
return types;
|
||
}
|
||
function setParent(node, parent) {
|
||
if ("parent" in parent) {
|
||
node.parent = parent;
|
||
}
|
||
}
|
||
|
||
// src/parse.js
|
||
var AstTypes = {
|
||
AbsentFunction: "AbsentFunction",
|
||
Alternative: "Alternative",
|
||
Assertion: "Assertion",
|
||
Backreference: "Backreference",
|
||
CapturingGroup: "CapturingGroup",
|
||
Character: "Character",
|
||
CharacterClass: "CharacterClass",
|
||
CharacterClassIntersection: "CharacterClassIntersection",
|
||
CharacterClassRange: "CharacterClassRange",
|
||
CharacterSet: "CharacterSet",
|
||
Directive: "Directive",
|
||
Flags: "Flags",
|
||
Group: "Group",
|
||
Pattern: "Pattern",
|
||
Quantifier: "Quantifier",
|
||
Regex: "Regex",
|
||
Subroutine: "Subroutine",
|
||
VariableLengthCharacterSet: "VariableLengthCharacterSet",
|
||
// Used only by the transformer for Regex+ ASTs
|
||
Recursion: "Recursion"
|
||
};
|
||
var AstAbsentFunctionKinds = {
|
||
// See <github.com/slevithan/oniguruma-to-es/issues/13>
|
||
repeater: "repeater"
|
||
};
|
||
var AstAssertionKinds = {
|
||
line_end: "line_end",
|
||
line_start: "line_start",
|
||
lookahead: "lookahead",
|
||
lookbehind: "lookbehind",
|
||
search_start: "search_start",
|
||
string_end: "string_end",
|
||
string_end_newline: "string_end_newline",
|
||
string_start: "string_start",
|
||
word_boundary: "word_boundary"
|
||
};
|
||
var AstCharacterSetKinds = TokenCharacterSetKinds;
|
||
var AstDirectiveKinds = TokenDirectiveKinds;
|
||
var AstVariableLengthCharacterSetKinds = {
|
||
grapheme: "grapheme",
|
||
newline: "newline"
|
||
};
|
||
function parse({ tokens, flags, rules }, options) {
|
||
const opts = {
|
||
skipBackrefValidation: false,
|
||
skipLookbehindValidation: false,
|
||
skipPropertyNameValidation: false,
|
||
verbose: false,
|
||
...options
|
||
};
|
||
const context = {
|
||
capturingGroups: [],
|
||
current: 0,
|
||
hasNumberedRef: false,
|
||
namedGroupsByName: /* @__PURE__ */ new Map(),
|
||
parent: null,
|
||
skipBackrefValidation: opts.skipBackrefValidation,
|
||
skipLookbehindValidation: opts.skipLookbehindValidation,
|
||
skipPropertyNameValidation: opts.skipPropertyNameValidation,
|
||
subroutines: [],
|
||
token: null,
|
||
tokens,
|
||
verbose: opts.verbose,
|
||
walk
|
||
};
|
||
function walk(parent, state) {
|
||
const token2 = tokens[context.current];
|
||
context.parent = parent;
|
||
context.token = token2;
|
||
context.current++;
|
||
switch (token2.type) {
|
||
case TokenTypes.Alternator:
|
||
return createAlternative();
|
||
case TokenTypes.Assertion:
|
||
return createAssertionFromToken(token2);
|
||
case TokenTypes.Backreference:
|
||
return parseBackreference(context);
|
||
case TokenTypes.Character:
|
||
return createCharacter(token2.value, { useLastValid: !!state.isCheckingRangeEnd });
|
||
case TokenTypes.CharacterClassHyphen:
|
||
return parseCharacterClassHyphen(context, state);
|
||
case TokenTypes.CharacterClassOpen:
|
||
return parseCharacterClassOpen(context, state);
|
||
case TokenTypes.CharacterSet:
|
||
return parseCharacterSet(context);
|
||
case TokenTypes.Directive:
|
||
return createDirectiveFromToken(token2);
|
||
case TokenTypes.GroupOpen:
|
||
return parseGroupOpen(context, state);
|
||
case TokenTypes.Quantifier:
|
||
return parseQuantifier(context);
|
||
case TokenTypes.Subroutine:
|
||
return parseSubroutine(context);
|
||
case TokenTypes.VariableLengthCharacterSet:
|
||
return createVariableLengthCharacterSet(token2.kind);
|
||
default:
|
||
throw new Error(`Unexpected token type "${token2.type}"`);
|
||
}
|
||
}
|
||
const ast = createRegex(createPattern(), createFlags(flags));
|
||
let top = ast.pattern.alternatives[0];
|
||
while (context.current < tokens.length) {
|
||
const node = walk(top, {});
|
||
if (node.type === AstTypes.Alternative) {
|
||
ast.pattern.alternatives.push(node);
|
||
top = node;
|
||
} else {
|
||
top.elements.push(node);
|
||
}
|
||
}
|
||
const { capturingGroups, hasNumberedRef, namedGroupsByName, subroutines } = context;
|
||
if (hasNumberedRef && namedGroupsByName.size && !rules.captureGroup) {
|
||
throw new Error("Numbered backref/subroutine not allowed when using named capture");
|
||
}
|
||
for (const { ref } of subroutines) {
|
||
if (typeof ref === "number") {
|
||
if (ref > capturingGroups.length) {
|
||
throw new Error(`Subroutine uses a group number that's not defined`);
|
||
}
|
||
} else if (!namedGroupsByName.has(ref)) {
|
||
throw new Error(r`Subroutine uses a group name that's not defined "\g<${ref}>"`);
|
||
} else if (namedGroupsByName.get(ref).length > 1) {
|
||
throw new Error(r`Subroutine uses a duplicate group name "\g<${ref}>"`);
|
||
}
|
||
}
|
||
traverse({ node: ast }, null, {
|
||
AnyNode({ node, parent }) {
|
||
node.parent = parent;
|
||
}
|
||
});
|
||
return ast;
|
||
}
|
||
function parseBackreference(context) {
|
||
const { raw } = context.token;
|
||
const hasKWrapper = /^\\k[<']/.test(raw);
|
||
const ref = hasKWrapper ? raw.slice(3, -1) : raw.slice(1);
|
||
const fromNum = (num, isRelative = false) => {
|
||
const numCapturesToLeft = context.capturingGroups.length;
|
||
let orphan = false;
|
||
if (num > numCapturesToLeft) {
|
||
if (context.skipBackrefValidation) {
|
||
orphan = true;
|
||
} else {
|
||
throw new Error(`Not enough capturing groups defined to the left "${raw}"`);
|
||
}
|
||
}
|
||
context.hasNumberedRef = true;
|
||
return createBackreference(isRelative ? numCapturesToLeft + 1 - num : num, { orphan });
|
||
};
|
||
if (hasKWrapper) {
|
||
const numberedRef = /^(?<sign>-?)0*(?<num>[1-9]\d*)$/.exec(ref);
|
||
if (numberedRef) {
|
||
return fromNum(+numberedRef.groups.num, !!numberedRef.groups.sign);
|
||
}
|
||
if (/[-+]/.test(ref)) {
|
||
throw new Error(`Invalid backref name "${raw}"`);
|
||
}
|
||
if (!context.namedGroupsByName.has(ref)) {
|
||
throw new Error(`Group name not defined to the left "${raw}"`);
|
||
}
|
||
return createBackreference(ref);
|
||
}
|
||
return fromNum(+ref);
|
||
}
|
||
function parseCharacterClassHyphen(context, state) {
|
||
const { parent, tokens, walk } = context;
|
||
const prevSiblingNode = parent.elements.at(-1);
|
||
const nextToken = tokens[context.current];
|
||
if (!state.isCheckingRangeEnd && prevSiblingNode && prevSiblingNode.type !== AstTypes.CharacterClass && prevSiblingNode.type !== AstTypes.CharacterClassRange && nextToken && nextToken.type !== TokenTypes.CharacterClassOpen && nextToken.type !== TokenTypes.CharacterClassClose && nextToken.type !== TokenTypes.CharacterClassIntersector) {
|
||
const nextNode = walk(parent, {
|
||
...state,
|
||
isCheckingRangeEnd: true
|
||
});
|
||
if (prevSiblingNode.type === AstTypes.Character && nextNode.type === AstTypes.Character) {
|
||
parent.elements.pop();
|
||
return createCharacterClassRange(prevSiblingNode, nextNode);
|
||
}
|
||
throw new Error("Invalid character class range");
|
||
}
|
||
return createCharacter(45);
|
||
}
|
||
function parseCharacterClassOpen(context, state) {
|
||
const { token: token2, tokens, verbose, walk } = context;
|
||
const firstClassToken = tokens[context.current];
|
||
let node = createCharacterClass({ negate: token2.negate });
|
||
const intersection = node.elements[0];
|
||
let nextToken = throwIfUnclosedCharacterClass(firstClassToken);
|
||
while (nextToken.type !== TokenTypes.CharacterClassClose) {
|
||
if (nextToken.type === TokenTypes.CharacterClassIntersector) {
|
||
intersection.classes.push(createCharacterClass({ negate: false, baseOnly: true }));
|
||
context.current++;
|
||
} else {
|
||
const cc = intersection.classes.at(-1);
|
||
cc.elements.push(walk(cc, state));
|
||
}
|
||
nextToken = throwIfUnclosedCharacterClass(tokens[context.current], firstClassToken);
|
||
}
|
||
if (!verbose) {
|
||
optimizeCharacterClassIntersection(intersection);
|
||
}
|
||
if (intersection.classes.length === 1) {
|
||
const cc = intersection.classes[0];
|
||
cc.negate = node.negate !== cc.negate;
|
||
node = cc;
|
||
}
|
||
context.current++;
|
||
return node;
|
||
}
|
||
function parseCharacterSet({ token: token2, skipPropertyNameValidation }) {
|
||
let { kind, negate, value } = token2;
|
||
if (kind === TokenCharacterSetKinds.property) {
|
||
const normalized = slug(value);
|
||
if (PosixProperties.has(normalized)) {
|
||
kind = TokenCharacterSetKinds.posix;
|
||
value = normalized;
|
||
} else {
|
||
return createUnicodeProperty(value, {
|
||
negate,
|
||
skipPropertyNameValidation
|
||
});
|
||
}
|
||
}
|
||
if (kind === TokenCharacterSetKinds.posix) {
|
||
return {
|
||
type: AstTypes.CharacterSet,
|
||
kind: AstCharacterSetKinds.posix,
|
||
negate,
|
||
value
|
||
};
|
||
}
|
||
return createCharacterSet(kind, { negate });
|
||
}
|
||
function parseGroupOpen(context, state) {
|
||
const { token: token2, tokens, capturingGroups, namedGroupsByName, skipLookbehindValidation, verbose, walk } = context;
|
||
let node = createByGroupKind(token2);
|
||
const isAbsentFunction = node.type === AstTypes.AbsentFunction;
|
||
const isLookbehind = node.kind === AstAssertionKinds.lookbehind;
|
||
const isNegLookbehind = isLookbehind && node.negate;
|
||
if (node.type === AstTypes.CapturingGroup) {
|
||
capturingGroups.push(node);
|
||
if (node.name) {
|
||
getOrCreate(namedGroupsByName, node.name, []).push(node);
|
||
}
|
||
}
|
||
if (isAbsentFunction && state.isInAbsentFunction) {
|
||
throw new Error("Nested absent function not supported by Oniguruma");
|
||
}
|
||
let nextToken = throwIfUnclosedGroup(tokens[context.current]);
|
||
while (nextToken.type !== TokenTypes.GroupClose) {
|
||
if (nextToken.type === TokenTypes.Alternator) {
|
||
node.alternatives.push(createAlternative());
|
||
context.current++;
|
||
} else {
|
||
const alt = node.alternatives.at(-1);
|
||
const child = walk(alt, {
|
||
...state,
|
||
isInAbsentFunction: state.isInAbsentFunction || isAbsentFunction,
|
||
isInLookbehind: state.isInLookbehind || isLookbehind,
|
||
isInNegLookbehind: state.isInNegLookbehind || isNegLookbehind
|
||
});
|
||
alt.elements.push(child);
|
||
if ((isLookbehind || state.isInLookbehind) && !skipLookbehindValidation) {
|
||
const msg = "Lookbehind includes a pattern not allowed by Oniguruma";
|
||
if (isNegLookbehind || state.isInNegLookbehind) {
|
||
if (child.kind === AstAssertionKinds.lookahead || child.type === AstTypes.CapturingGroup) {
|
||
throw new Error(msg);
|
||
}
|
||
} else {
|
||
if (child.kind === AstAssertionKinds.lookahead || child.kind === AstAssertionKinds.lookbehind && child.negate) {
|
||
throw new Error(msg);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
nextToken = throwIfUnclosedGroup(tokens[context.current]);
|
||
}
|
||
if (!verbose) {
|
||
node = getOptimizedGroup(node);
|
||
}
|
||
context.current++;
|
||
return node;
|
||
}
|
||
function parseQuantifier({ token: token2, parent }) {
|
||
const { min, max, greedy, possessive: possessive2 } = token2;
|
||
const quantifiedNode = parent.elements.at(-1);
|
||
if (!quantifiedNode || quantifiedNode.type === AstTypes.Assertion || quantifiedNode.type === AstTypes.Directive) {
|
||
throw new Error(`Quantifier requires a repeatable token`);
|
||
}
|
||
const node = createQuantifier(quantifiedNode, min, max, greedy, possessive2);
|
||
parent.elements.pop();
|
||
return node;
|
||
}
|
||
function parseSubroutine(context) {
|
||
const { token: token2, capturingGroups, subroutines } = context;
|
||
let ref = token2.raw.slice(3, -1);
|
||
const numberedRef = /^(?<sign>[-+]?)0*(?<num>[1-9]\d*)$/.exec(ref);
|
||
if (numberedRef) {
|
||
const num = +numberedRef.groups.num;
|
||
const numCapturesToLeft = capturingGroups.length;
|
||
context.hasNumberedRef = true;
|
||
ref = {
|
||
"": num,
|
||
"+": numCapturesToLeft + num,
|
||
"-": numCapturesToLeft + 1 - num
|
||
}[numberedRef.groups.sign];
|
||
if (ref < 1) {
|
||
throw new Error("Invalid subroutine number");
|
||
}
|
||
} else if (ref === "0") {
|
||
ref = 0;
|
||
}
|
||
const node = createSubroutine(ref);
|
||
subroutines.push(node);
|
||
return node;
|
||
}
|
||
function createAbsentFunction(kind) {
|
||
if (kind !== AstAbsentFunctionKinds.repeater) {
|
||
throw new Error(`Unexpected absent function kind "${kind}"`);
|
||
}
|
||
return {
|
||
type: AstTypes.AbsentFunction,
|
||
kind,
|
||
alternatives: [createAlternative()]
|
||
};
|
||
}
|
||
function createAlternative() {
|
||
return {
|
||
type: AstTypes.Alternative,
|
||
elements: []
|
||
};
|
||
}
|
||
function createAssertion(kind, options) {
|
||
const negate = !!options?.negate;
|
||
return {
|
||
type: AstTypes.Assertion,
|
||
kind,
|
||
...kind === AstAssertionKinds.word_boundary && { negate }
|
||
};
|
||
}
|
||
function createAssertionFromToken({ kind }) {
|
||
return createAssertion(
|
||
throwIfNot({
|
||
"^": AstAssertionKinds.line_start,
|
||
"$": AstAssertionKinds.line_end,
|
||
"\\A": AstAssertionKinds.string_start,
|
||
"\\b": AstAssertionKinds.word_boundary,
|
||
"\\B": AstAssertionKinds.word_boundary,
|
||
"\\G": AstAssertionKinds.search_start,
|
||
"\\z": AstAssertionKinds.string_end,
|
||
"\\Z": AstAssertionKinds.string_end_newline
|
||
}[kind], `Unexpected assertion kind "${kind}"`),
|
||
{ negate: kind === r`\B` }
|
||
);
|
||
}
|
||
function createBackreference(ref, options) {
|
||
const orphan = !!options?.orphan;
|
||
return {
|
||
type: AstTypes.Backreference,
|
||
...orphan && { orphan },
|
||
ref
|
||
};
|
||
}
|
||
function createByGroupKind({ flags, kind, name, negate, number }) {
|
||
switch (kind) {
|
||
case TokenGroupKinds.absent_repeater:
|
||
return createAbsentFunction(AstAbsentFunctionKinds.repeater);
|
||
case TokenGroupKinds.atomic:
|
||
return createGroup({ atomic: true });
|
||
case TokenGroupKinds.capturing:
|
||
return createCapturingGroup(number, name);
|
||
case TokenGroupKinds.group:
|
||
return createGroup({ flags });
|
||
case TokenGroupKinds.lookahead:
|
||
case TokenGroupKinds.lookbehind:
|
||
return createLookaround({
|
||
behind: kind === TokenGroupKinds.lookbehind,
|
||
negate
|
||
});
|
||
default:
|
||
throw new Error(`Unexpected group kind "${kind}"`);
|
||
}
|
||
}
|
||
function createCapturingGroup(number, name) {
|
||
const hasName = name !== void 0;
|
||
if (hasName && !isValidGroupNameOniguruma(name)) {
|
||
throw new Error(`Group name "${name}" invalid in Oniguruma`);
|
||
}
|
||
return {
|
||
type: AstTypes.CapturingGroup,
|
||
number,
|
||
...hasName && { name },
|
||
alternatives: [createAlternative()]
|
||
};
|
||
}
|
||
function createCharacter(charCode, options) {
|
||
const opts = {
|
||
useLastValid: false,
|
||
...options
|
||
};
|
||
if (charCode > 1114111) {
|
||
const hex = charCode.toString(16);
|
||
if (opts.useLastValid) {
|
||
charCode = 1114111;
|
||
} else if (charCode > 1310719) {
|
||
throw new Error(`Invalid code point out of range "\\x{${hex}}"`);
|
||
} else {
|
||
throw new Error(`Invalid code point out of range in JS "\\x{${hex}}"`);
|
||
}
|
||
}
|
||
return {
|
||
type: AstTypes.Character,
|
||
value: charCode
|
||
};
|
||
}
|
||
function createCharacterClass(options) {
|
||
const opts = {
|
||
baseOnly: false,
|
||
negate: false,
|
||
...options
|
||
};
|
||
return {
|
||
type: AstTypes.CharacterClass,
|
||
negate: opts.negate,
|
||
elements: opts.baseOnly ? [] : [createCharacterClassIntersection()]
|
||
};
|
||
}
|
||
function createCharacterClassIntersection() {
|
||
return {
|
||
type: AstTypes.CharacterClassIntersection,
|
||
classes: [createCharacterClass({ negate: false, baseOnly: true })]
|
||
};
|
||
}
|
||
function createCharacterClassRange(min, max) {
|
||
if (max.value < min.value) {
|
||
throw new Error("Character class range out of order");
|
||
}
|
||
return {
|
||
type: AstTypes.CharacterClassRange,
|
||
min,
|
||
max
|
||
};
|
||
}
|
||
function createCharacterSet(kind, { negate }) {
|
||
const node = {
|
||
type: AstTypes.CharacterSet,
|
||
kind: throwIfNot(AstCharacterSetKinds[kind], `Unexpected character set kind "${kind}"`)
|
||
};
|
||
if (kind === TokenCharacterSetKinds.digit || kind === TokenCharacterSetKinds.hex || kind === TokenCharacterSetKinds.space || kind === TokenCharacterSetKinds.word) {
|
||
node.negate = negate;
|
||
}
|
||
return node;
|
||
}
|
||
function createDirectiveFromToken({ kind, flags }) {
|
||
const node = {
|
||
type: AstTypes.Directive,
|
||
kind: throwIfNot(AstDirectiveKinds[kind], `Unexpected directive kind "${kind}"`)
|
||
};
|
||
if (kind === TokenDirectiveKinds.flags) {
|
||
node.flags = flags;
|
||
}
|
||
return node;
|
||
}
|
||
function createFlags({ ignoreCase, dotAll, extended, digitIsAscii, spaceIsAscii, wordIsAscii }) {
|
||
return {
|
||
type: AstTypes.Flags,
|
||
ignoreCase,
|
||
dotAll,
|
||
extended,
|
||
digitIsAscii,
|
||
spaceIsAscii,
|
||
wordIsAscii
|
||
};
|
||
}
|
||
function createGroup(options) {
|
||
const atomic2 = options?.atomic;
|
||
const flags = options?.flags;
|
||
return {
|
||
type: AstTypes.Group,
|
||
...atomic2 && { atomic: atomic2 },
|
||
...flags && { flags },
|
||
alternatives: [createAlternative()]
|
||
};
|
||
}
|
||
function createLookaround(options) {
|
||
const opts = {
|
||
behind: false,
|
||
negate: false,
|
||
...options
|
||
};
|
||
return {
|
||
type: AstTypes.Assertion,
|
||
kind: opts.behind ? AstAssertionKinds.lookbehind : AstAssertionKinds.lookahead,
|
||
negate: opts.negate,
|
||
alternatives: [createAlternative()]
|
||
};
|
||
}
|
||
function createPattern() {
|
||
return {
|
||
type: AstTypes.Pattern,
|
||
alternatives: [createAlternative()]
|
||
};
|
||
}
|
||
function createQuantifier(element, min, max, greedy = true, possessive2 = false) {
|
||
const node = {
|
||
type: AstTypes.Quantifier,
|
||
min,
|
||
max,
|
||
greedy,
|
||
possessive: possessive2,
|
||
element
|
||
};
|
||
if (max < min) {
|
||
return {
|
||
...node,
|
||
min: max,
|
||
max: min,
|
||
possessive: true
|
||
};
|
||
}
|
||
return node;
|
||
}
|
||
function createRegex(pattern, flags) {
|
||
return {
|
||
type: AstTypes.Regex,
|
||
pattern,
|
||
flags
|
||
};
|
||
}
|
||
function createSubroutine(ref) {
|
||
return {
|
||
type: AstTypes.Subroutine,
|
||
ref
|
||
};
|
||
}
|
||
function createUnicodeProperty(value, options) {
|
||
const opts = {
|
||
negate: false,
|
||
skipPropertyNameValidation: false,
|
||
...options
|
||
};
|
||
return {
|
||
type: AstTypes.CharacterSet,
|
||
kind: AstCharacterSetKinds.property,
|
||
value: opts.skipPropertyNameValidation ? value : getJsUnicodePropertyName(value),
|
||
negate: opts.negate
|
||
};
|
||
}
|
||
function createVariableLengthCharacterSet(kind) {
|
||
return {
|
||
type: AstTypes.VariableLengthCharacterSet,
|
||
kind: throwIfNot({
|
||
"\\R": AstVariableLengthCharacterSetKinds.newline,
|
||
"\\X": AstVariableLengthCharacterSetKinds.grapheme
|
||
}[kind], `Unexpected varcharset kind "${kind}"`)
|
||
};
|
||
}
|
||
function getJsUnicodePropertyName(value) {
|
||
const slugged = slug(value);
|
||
if (JsUnicodePropertiesOfStringsMap.has(slugged)) {
|
||
throw new Error(r`Unicode property "\p{${value}}" unsupported in Oniguruma`);
|
||
}
|
||
const jsName = JsUnicodePropertiesMap.get(slugged);
|
||
if (jsName) {
|
||
return jsName;
|
||
}
|
||
return value.trim().replace(/[- _]+/g, "_").replace(/[A-Z][a-z]+(?=[A-Z])/g, "$&_").replace(/[A-Za-z]+/g, (m) => m[0].toUpperCase() + m.slice(1).toLowerCase());
|
||
}
|
||
function getOptimizedGroup(node) {
|
||
const firstAltFirstEl = node.alternatives[0].elements[0];
|
||
if (node.type === AstTypes.Group && hasOnlyChild(node, (kid) => kid.type === AstTypes.Group) && !(node.atomic && firstAltFirstEl.flags) && !(node.flags && (firstAltFirstEl.atomic || firstAltFirstEl.flags))) {
|
||
if (node.atomic) {
|
||
firstAltFirstEl.atomic = true;
|
||
} else if (node.flags) {
|
||
firstAltFirstEl.flags = node.flags;
|
||
}
|
||
return firstAltFirstEl;
|
||
}
|
||
return node;
|
||
}
|
||
function isValidGroupNameOniguruma(name) {
|
||
return /^[\p{Alpha}\p{Pc}][^)]*$/u.test(name);
|
||
}
|
||
function optimizeCharacterClassIntersection(intersection) {
|
||
for (let i = 0; i < intersection.classes.length; i++) {
|
||
const cc = intersection.classes[i];
|
||
const firstChild = cc.elements[0];
|
||
if (cc.elements.length === 1 && firstChild.type === AstTypes.CharacterClass) {
|
||
intersection.classes[i] = firstChild;
|
||
firstChild.negate = cc.negate !== firstChild.negate;
|
||
}
|
||
}
|
||
}
|
||
function throwIfUnclosedCharacterClass(token2, firstClassToken) {
|
||
return throwIfNot(
|
||
token2,
|
||
// Easier to understand error when applicable
|
||
`${firstClassToken?.value === 93 ? "Empty" : "Unclosed"} character class`
|
||
);
|
||
}
|
||
function throwIfUnclosedGroup(token2) {
|
||
return throwIfNot(token2, "Unclosed group");
|
||
}
|
||
|
||
// node_modules/.pnpm/emoji-regex-xs@1.0.0/node_modules/emoji-regex-xs/index.mjs
|
||
var r2 = String.raw;
|
||
var seq = r2`(?:\p{Emoji}\uFE0F\u20E3?|\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation})`;
|
||
var sTags = r2`\u{E0061}-\u{E007A}`;
|
||
var emoji_regex_xs_default = () => new RegExp(r2`[\u{1F1E6}-\u{1F1FF}]{2}|\u{1F3F4}[${sTags}]{2}[\u{E0030}-\u{E0039}${sTags}]{1,3}\u{E007F}|${seq}(?:\u200D${seq})*`, "gu");
|
||
|
||
// src/transform.js
|
||
function transform(ast, options) {
|
||
const opts = {
|
||
// A couple edge cases exist where options `accuracy` and `bestEffortTarget` are used:
|
||
// - `VariableLengthCharacterSet` kind `grapheme` (`\X`): An exact representation would require
|
||
// heavy Unicode data; a best-effort approximation requires knowing the target.
|
||
// - `CharacterSet` kind `posix` with values `graph` and `print`: Their complex Unicode-based
|
||
// representations would be hard to change to ASCII-based after the fact in the generator
|
||
// based on `target`/`accuracy`, so produce the appropriate structure here.
|
||
accuracy: "default",
|
||
asciiWordBoundaries: false,
|
||
avoidSubclass: false,
|
||
bestEffortTarget: "ES2025",
|
||
...options
|
||
};
|
||
const firstPassState = {
|
||
accuracy: opts.accuracy,
|
||
asciiWordBoundaries: opts.asciiWordBoundaries,
|
||
avoidSubclass: opts.avoidSubclass,
|
||
flagDirectivesByAlt: /* @__PURE__ */ new Map(),
|
||
jsGroupNameMap: /* @__PURE__ */ new Map(),
|
||
minTargetEs2024: isMinTarget(opts.bestEffortTarget, "ES2024"),
|
||
passedLookbehind: false,
|
||
strategy: null,
|
||
// Subroutines can appear before the groups they ref, so collect reffed nodes for a second pass
|
||
subroutineRefMap: /* @__PURE__ */ new Map(),
|
||
supportedGNodes: /* @__PURE__ */ new Set(),
|
||
digitIsAscii: ast.flags.digitIsAscii,
|
||
spaceIsAscii: ast.flags.spaceIsAscii,
|
||
wordIsAscii: ast.flags.wordIsAscii
|
||
};
|
||
traverse({ node: ast }, firstPassState, FirstPassVisitor);
|
||
const globalFlags = {
|
||
dotAll: ast.flags.dotAll,
|
||
ignoreCase: ast.flags.ignoreCase
|
||
};
|
||
const secondPassState = {
|
||
currentFlags: globalFlags,
|
||
prevFlags: null,
|
||
globalFlags,
|
||
groupOriginByCopy: /* @__PURE__ */ new Map(),
|
||
groupsByName: /* @__PURE__ */ new Map(),
|
||
multiplexCapturesToLeftByRef: /* @__PURE__ */ new Map(),
|
||
openRefs: /* @__PURE__ */ new Map(),
|
||
reffedNodesByReferencer: /* @__PURE__ */ new Map(),
|
||
subroutineRefMap: firstPassState.subroutineRefMap
|
||
};
|
||
traverse({ node: ast }, secondPassState, SecondPassVisitor);
|
||
const thirdPassState = {
|
||
groupsByName: secondPassState.groupsByName,
|
||
highestOrphanBackref: 0,
|
||
numCapturesToLeft: 0,
|
||
reffedNodesByReferencer: secondPassState.reffedNodesByReferencer
|
||
};
|
||
traverse({ node: ast }, thirdPassState, ThirdPassVisitor);
|
||
ast._strategy = firstPassState.strategy;
|
||
return ast;
|
||
}
|
||
var FirstPassVisitor = {
|
||
AbsentFunction({ node, replaceWith }) {
|
||
const group = prepContainer(createGroup(), [
|
||
adoptAndSwapKids(createLookaround({ negate: true }), node.alternatives),
|
||
createUnicodeProperty("Any")
|
||
]);
|
||
const quantifier = createQuantifier(group, 0, Infinity);
|
||
group.parent = quantifier;
|
||
replaceWith(prepContainer(createGroup(), [quantifier]));
|
||
},
|
||
Alternative: {
|
||
enter({ node, parent, key }, { flagDirectivesByAlt }) {
|
||
const flagDirectives = node.elements.filter((el) => el.kind === AstDirectiveKinds.flags);
|
||
for (let i = key + 1; i < parent.alternatives.length; i++) {
|
||
const forwardSiblingAlt = parent.alternatives[i];
|
||
getOrCreate(flagDirectivesByAlt, forwardSiblingAlt, []).push(...flagDirectives);
|
||
}
|
||
},
|
||
exit({ node }, { flagDirectivesByAlt }) {
|
||
if (flagDirectivesByAlt.get(node)?.length) {
|
||
const flags = getCombinedFlagModsFromFlagNodes(flagDirectivesByAlt.get(node));
|
||
if (flags) {
|
||
const flagGroup = prepContainer(createGroup({ flags }), node.elements);
|
||
flagGroup.parent = node;
|
||
node.elements = [flagGroup];
|
||
}
|
||
}
|
||
}
|
||
},
|
||
Assertion({ node, key, container, ast, remove, replaceWith }, state) {
|
||
const { kind, negate } = node;
|
||
const { asciiWordBoundaries, avoidSubclass, supportedGNodes, wordIsAscii } = state;
|
||
if (kind === AstAssertionKinds.line_end) {
|
||
replaceWith(parseFragment(r`(?=\z|\n)`));
|
||
} else if (kind === AstAssertionKinds.line_start) {
|
||
replaceWith(parseFragment(r`(?<=\A|\n(?!\z))`, { skipLookbehindValidation: true }));
|
||
} else if (kind === AstAssertionKinds.lookbehind) {
|
||
state.passedLookbehind = true;
|
||
} else if (kind === AstAssertionKinds.search_start) {
|
||
if (supportedGNodes.has(node)) {
|
||
ast.flags.sticky = true;
|
||
remove();
|
||
} else {
|
||
const prev = container[key - 1];
|
||
if (prev && isAlwaysNonZeroLength(prev)) {
|
||
replaceWith(prepContainer(createLookaround({ negate: true })));
|
||
} else if (avoidSubclass) {
|
||
throw new Error(r`Uses "\G" in a way that requires a subclass`);
|
||
} else {
|
||
replaceWith(createAssertion(AstAssertionKinds.string_start));
|
||
state.strategy = "search_start_clip";
|
||
}
|
||
}
|
||
} else if (kind === AstAssertionKinds.string_end_newline) {
|
||
replaceWith(parseFragment(r`(?=\n?\z)`));
|
||
} else if (kind === AstAssertionKinds.word_boundary && !wordIsAscii && !asciiWordBoundaries) {
|
||
const b = `(?:(?<=${defaultWordChar})(?!${defaultWordChar})|(?<!${defaultWordChar})(?=${defaultWordChar}))`;
|
||
const B = `(?:(?<=${defaultWordChar})(?=${defaultWordChar})|(?<!${defaultWordChar})(?!${defaultWordChar}))`;
|
||
replaceWith(parseFragment(negate ? B : b));
|
||
}
|
||
},
|
||
Backreference({ node }, { jsGroupNameMap }) {
|
||
let { ref } = node;
|
||
if (typeof ref === "string" && !isValidGroupNameJs(ref)) {
|
||
ref = getAndStoreJsGroupName(ref, jsGroupNameMap);
|
||
node.ref = ref;
|
||
}
|
||
},
|
||
CapturingGroup({ node }, { jsGroupNameMap, subroutineRefMap }) {
|
||
let { name } = node;
|
||
if (name && !isValidGroupNameJs(name)) {
|
||
name = getAndStoreJsGroupName(name, jsGroupNameMap);
|
||
node.name = name;
|
||
}
|
||
subroutineRefMap.set(node.number, node);
|
||
if (name) {
|
||
subroutineRefMap.set(name, node);
|
||
}
|
||
},
|
||
CharacterSet({ node, replaceWith }, { accuracy, minTargetEs2024, digitIsAscii, spaceIsAscii, wordIsAscii }) {
|
||
const { kind, negate, value } = node;
|
||
if (digitIsAscii && (kind === AstCharacterSetKinds.digit || value === "digit")) {
|
||
replaceWith(createCharacterSet(AstCharacterSetKinds.digit, { negate }));
|
||
return;
|
||
}
|
||
if (spaceIsAscii && (kind === AstCharacterSetKinds.space || value === "space")) {
|
||
replaceWith(setNegate(parseFragment(asciiSpaceChar), negate));
|
||
return;
|
||
}
|
||
if (wordIsAscii && (kind === AstCharacterSetKinds.word || value === "word")) {
|
||
replaceWith(createCharacterSet(AstCharacterSetKinds.word, { negate }));
|
||
return;
|
||
}
|
||
if (kind === AstCharacterSetKinds.any) {
|
||
replaceWith(createUnicodeProperty("Any"));
|
||
} else if (kind === AstCharacterSetKinds.digit) {
|
||
replaceWith(createUnicodeProperty("Nd", { negate }));
|
||
} else if (kind === AstCharacterSetKinds.hex) {
|
||
replaceWith(createUnicodeProperty("AHex", { negate }));
|
||
} else if (kind === AstCharacterSetKinds.non_newline) {
|
||
replaceWith(parseFragment(r`[^\n]`));
|
||
} else if (kind === AstCharacterSetKinds.space) {
|
||
replaceWith(createUnicodeProperty("space", { negate }));
|
||
} else if (kind === AstCharacterSetKinds.word) {
|
||
replaceWith(setNegate(parseFragment(defaultWordChar), negate));
|
||
} else if (kind === AstCharacterSetKinds.property) {
|
||
if (!JsUnicodeProperties.has(value)) {
|
||
node.key = "sc";
|
||
}
|
||
} else if (kind === AstCharacterSetKinds.posix) {
|
||
if (!minTargetEs2024 && (value === "graph" || value === "print")) {
|
||
if (accuracy === "strict") {
|
||
throw new Error(`POSIX class "${value}" requires min target ES2024 or non-strict accuracy`);
|
||
}
|
||
let ascii = {
|
||
graph: "!-~",
|
||
print: " -~"
|
||
}[value];
|
||
if (negate) {
|
||
ascii = `\0-${cp(ascii.codePointAt(0) - 1)}${cp(ascii.codePointAt(2) + 1)}-\u{10FFFF}`;
|
||
}
|
||
replaceWith(parseFragment(`[${ascii}]`));
|
||
} else {
|
||
replaceWith(setNegate(parseFragment(PosixClassesMap.get(value)), negate));
|
||
}
|
||
}
|
||
},
|
||
Directive(path, state) {
|
||
const { node, parent, ast, remove, replaceWith, removeAllPrevSiblings, removeAllNextSiblings } = path;
|
||
const { kind, flags } = node;
|
||
if (kind === AstDirectiveKinds.flags) {
|
||
if (!flags.enable && !flags.disable) {
|
||
remove();
|
||
} else {
|
||
const flagGroup = prepContainer(createGroup({ flags }), removeAllNextSiblings());
|
||
replaceWith(flagGroup);
|
||
traverseReplacement(flagGroup, path, state, FirstPassVisitor);
|
||
}
|
||
} else if (kind === AstDirectiveKinds.keep) {
|
||
const firstAltFirstEl = ast.pattern.alternatives[0].elements[0];
|
||
const hasWrapperGroup = (
|
||
// Not emulatable if within a `CapturingGroup`
|
||
hasOnlyChild(ast.pattern, (kid) => kid.type === AstTypes.Group) && firstAltFirstEl.alternatives.length === 1
|
||
);
|
||
const topLevel = hasWrapperGroup ? firstAltFirstEl : ast.pattern;
|
||
if (parent.parent !== topLevel || topLevel.alternatives.length > 1) {
|
||
throw new Error(r`Uses "\K" in a way that's unsupported`);
|
||
}
|
||
replaceWith(prepContainer(createLookaround({ behind: true }), removeAllPrevSiblings()));
|
||
}
|
||
},
|
||
Flags({ node, parent }) {
|
||
[
|
||
"digitIsAscii",
|
||
// Flag D
|
||
"extended",
|
||
// Flag x
|
||
"spaceIsAscii",
|
||
// Flag S
|
||
"wordIsAscii"
|
||
// Flag W
|
||
].forEach((f) => delete node[f]);
|
||
Object.assign(node, {
|
||
// JS flag g; no Onig equiv
|
||
global: false,
|
||
// JS flag d; no Onig equiv
|
||
hasIndices: false,
|
||
// JS flag m; no Onig equiv but its behavior is always on in Onig. Onig's only line break
|
||
// char is line feed, unlike JS, so this flag isn't used since it would produce inaccurate
|
||
// results (also allows `^` and `$` to be used in the generator for string start and end)
|
||
multiline: false,
|
||
// JS flag y; no Onig equiv, but used for `\G` emulation
|
||
sticky: node.sticky ?? false
|
||
// Note: Regex+ doesn't allow explicitly adding flags it handles implicitly, so leave out
|
||
// properties `unicode` (JS flag u) and `unicodeSets` (JS flag v). Keep the existing values
|
||
// for `ignoreCase` (flag i) and `dotAll` (JS flag s, but Onig flag m)
|
||
});
|
||
parent.options = {
|
||
disable: {
|
||
// Onig uses different rules for flag x than Regex+, so disable the implicit flag
|
||
x: true,
|
||
// Onig has no flag to control "named capture only" mode but contextually applies its
|
||
// behavior when named capturing is used, so disable Regex+'s implicit flag for it
|
||
n: true
|
||
},
|
||
force: {
|
||
// Always add flag v because we're generating an AST that relies on it (it enables JS
|
||
// support for Onig features nested classes, set intersection, Unicode properties, etc.).
|
||
// However, the generator might disable flag v based on its `target` option
|
||
v: true
|
||
}
|
||
};
|
||
},
|
||
Group({ node }) {
|
||
if (!node.flags) {
|
||
return;
|
||
}
|
||
const { enable, disable } = node.flags;
|
||
enable?.extended && delete enable.extended;
|
||
disable?.extended && delete disable.extended;
|
||
enable?.dotAll && disable?.dotAll && delete enable.dotAll;
|
||
enable?.ignoreCase && disable?.ignoreCase && delete enable.ignoreCase;
|
||
enable && !Object.keys(enable).length && delete node.flags.enable;
|
||
disable && !Object.keys(disable).length && delete node.flags.disable;
|
||
!node.flags.enable && !node.flags.disable && delete node.flags;
|
||
},
|
||
Pattern: {
|
||
enter({ node }, { supportedGNodes }) {
|
||
const leadingGs = [];
|
||
let hasAltWithLeadG = false;
|
||
let hasAltWithoutLeadG = false;
|
||
for (const alt of node.alternatives) {
|
||
if (alt.elements.length === 1 && alt.elements[0].kind === AstAssertionKinds.search_start) {
|
||
alt.elements.pop();
|
||
} else {
|
||
const leadingG = getLeadingG(alt.elements);
|
||
if (leadingG) {
|
||
hasAltWithLeadG = true;
|
||
Array.isArray(leadingG) ? leadingGs.push(...leadingG) : leadingGs.push(leadingG);
|
||
} else {
|
||
hasAltWithoutLeadG = true;
|
||
}
|
||
}
|
||
}
|
||
if (hasAltWithLeadG && !hasAltWithoutLeadG) {
|
||
leadingGs.forEach((g) => supportedGNodes.add(g));
|
||
}
|
||
},
|
||
exit(_, { accuracy, passedLookbehind, strategy }) {
|
||
if (accuracy === "strict" && passedLookbehind && strategy) {
|
||
throw new Error(r`Uses "\G" in a way that requires non-strict accuracy`);
|
||
}
|
||
}
|
||
},
|
||
Quantifier({ node }) {
|
||
if (node.element.type === AstTypes.Quantifier) {
|
||
const group = prepContainer(createGroup(), [node.element]);
|
||
group.parent = node;
|
||
node.element = group;
|
||
}
|
||
},
|
||
Subroutine({ node }, { jsGroupNameMap }) {
|
||
let { ref } = node;
|
||
if (typeof ref === "string" && !isValidGroupNameJs(ref)) {
|
||
ref = getAndStoreJsGroupName(ref, jsGroupNameMap);
|
||
node.ref = ref;
|
||
}
|
||
},
|
||
VariableLengthCharacterSet({ node, replaceWith }, { accuracy, minTargetEs2024 }) {
|
||
const { kind } = node;
|
||
if (kind === AstVariableLengthCharacterSetKinds.newline) {
|
||
replaceWith(parseFragment("(?>\r\n?|[\n\v\f\x85\u2028\u2029])"));
|
||
} else if (kind === AstVariableLengthCharacterSetKinds.grapheme) {
|
||
if (accuracy === "strict") {
|
||
throw new Error(r`Use of "\X" requires non-strict accuracy`);
|
||
}
|
||
const emoji = minTargetEs2024 ? r`\p{RGI_Emoji}` : emoji_regex_xs_default().source.replace(/\\u\{/g, `\\x{`);
|
||
replaceWith(parseFragment(r`(?>\r\n|${emoji}|\P{M}\p{M}*)`, { skipPropertyNameValidation: true }));
|
||
} else {
|
||
throw new Error(`Unexpected varcharset kind "${kind}"`);
|
||
}
|
||
}
|
||
};
|
||
var SecondPassVisitor = {
|
||
Backreference({ node }, { multiplexCapturesToLeftByRef, reffedNodesByReferencer }) {
|
||
const { orphan, ref } = node;
|
||
if (!orphan) {
|
||
reffedNodesByReferencer.set(node, [...multiplexCapturesToLeftByRef.get(ref).map(({ node: node2 }) => node2)]);
|
||
}
|
||
},
|
||
CapturingGroup: {
|
||
enter({
|
||
node,
|
||
replaceWith,
|
||
skip
|
||
}, {
|
||
groupOriginByCopy,
|
||
groupsByName,
|
||
multiplexCapturesToLeftByRef,
|
||
openRefs,
|
||
reffedNodesByReferencer
|
||
}) {
|
||
const origin = groupOriginByCopy.get(node);
|
||
if (origin && openRefs.has(node.number)) {
|
||
const recursion2 = createRecursion(node.number);
|
||
reffedNodesByReferencer.set(recursion2, openRefs.get(node.number));
|
||
replaceWith(recursion2);
|
||
skip();
|
||
return;
|
||
}
|
||
openRefs.set(node.number, node);
|
||
multiplexCapturesToLeftByRef.set(node.number, []);
|
||
if (node.name) {
|
||
getOrCreate(multiplexCapturesToLeftByRef, node.name, []);
|
||
}
|
||
const multiplexNodes = multiplexCapturesToLeftByRef.get(node.name ?? node.number);
|
||
for (let i = 0; i < multiplexNodes.length; i++) {
|
||
const multiplex = multiplexNodes[i];
|
||
if (
|
||
// This group is from subroutine expansion, and there's a multiplex value from either the
|
||
// origin node or a prior subroutine expansion group with the same origin
|
||
origin === multiplex.node || origin && origin === multiplex.origin || // This group is not from subroutine expansion, and it comes after a subroutine expansion
|
||
// group that refers to this group
|
||
node === multiplex.origin
|
||
) {
|
||
multiplexNodes.splice(i, 1);
|
||
break;
|
||
}
|
||
}
|
||
multiplexCapturesToLeftByRef.get(node.number).push({ node, origin });
|
||
if (node.name) {
|
||
multiplexCapturesToLeftByRef.get(node.name).push({ node, origin });
|
||
}
|
||
if (node.name) {
|
||
const groupsWithSameName = getOrCreate(groupsByName, node.name, /* @__PURE__ */ new Map());
|
||
let hasDuplicateNameToRemove = false;
|
||
if (origin) {
|
||
hasDuplicateNameToRemove = true;
|
||
} else {
|
||
for (const groupInfo of groupsWithSameName.values()) {
|
||
if (!groupInfo.hasDuplicateNameToRemove) {
|
||
hasDuplicateNameToRemove = true;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
groupsByName.get(node.name).set(node, { node, hasDuplicateNameToRemove });
|
||
}
|
||
if (origin) {
|
||
node._originNumber = origin.number;
|
||
}
|
||
},
|
||
exit({ node }, { openRefs }) {
|
||
openRefs.delete(node.number);
|
||
}
|
||
},
|
||
Group: {
|
||
enter({ node }, state) {
|
||
state.prevFlags = state.currentFlags;
|
||
if (node.flags) {
|
||
state.currentFlags = getNewCurrentFlags(state.currentFlags, node.flags);
|
||
}
|
||
},
|
||
exit(_, state) {
|
||
state.currentFlags = state.prevFlags;
|
||
}
|
||
},
|
||
Recursion({ node, parent }, { reffedNodesByReferencer }) {
|
||
const { ref } = node;
|
||
let reffed = parent;
|
||
while (reffed = reffed.parent) {
|
||
if (reffed.type === AstTypes.CapturingGroup && (reffed.name === ref || reffed.number === ref)) {
|
||
break;
|
||
}
|
||
}
|
||
reffedNodesByReferencer.set(node, reffed);
|
||
},
|
||
Subroutine(path, state) {
|
||
const { node, replaceWith } = path;
|
||
const { ref } = node;
|
||
const reffedGroupNode = state.subroutineRefMap.get(ref);
|
||
const isGlobalRecursion = ref === 0;
|
||
const expandedSubroutine = isGlobalRecursion ? createRecursion(0) : (
|
||
// The reffed group might itself contain subroutines, which are expanded during sub-traversal
|
||
cloneCapturingGroup(reffedGroupNode, state.groupOriginByCopy, null)
|
||
);
|
||
let replacement = expandedSubroutine;
|
||
if (!isGlobalRecursion) {
|
||
const reffedGroupFlagMods = getCombinedFlagModsFromFlagNodes(getAllParents(reffedGroupNode, (node2) => {
|
||
return node2.type === AstTypes.Group && !!node2.flags;
|
||
}));
|
||
const reffedGroupFlags = reffedGroupFlagMods ? getNewCurrentFlags(state.globalFlags, reffedGroupFlagMods) : state.globalFlags;
|
||
if (!areFlagsEqual(reffedGroupFlags, state.currentFlags)) {
|
||
replacement = prepContainer(createGroup({
|
||
flags: getFlagModsFromFlags(reffedGroupFlags)
|
||
}), [expandedSubroutine]);
|
||
}
|
||
}
|
||
replaceWith(replacement);
|
||
if (!isGlobalRecursion) {
|
||
traverseReplacement(replacement, path, state, SecondPassVisitor);
|
||
}
|
||
}
|
||
};
|
||
var ThirdPassVisitor = {
|
||
Backreference({ node, replaceWith }, state) {
|
||
if (node.orphan) {
|
||
state.highestOrphanBackref = Math.max(state.highestOrphanBackref, node.ref);
|
||
return;
|
||
}
|
||
const reffedNodes = state.reffedNodesByReferencer.get(node);
|
||
const participants = reffedNodes.filter((reffed) => canParticipateWithNode(reffed, node));
|
||
if (!participants.length) {
|
||
replaceWith(prepContainer(createLookaround({ negate: true })));
|
||
} else if (participants.length > 1) {
|
||
const alts = participants.map((reffed) => adoptAndSwapKids(
|
||
createAlternative(),
|
||
[createBackreference(reffed.number)]
|
||
));
|
||
replaceWith(adoptAndSwapKids(createGroup(), alts));
|
||
} else {
|
||
node.ref = participants[0].number;
|
||
}
|
||
},
|
||
CapturingGroup({ node }, state) {
|
||
node.number = ++state.numCapturesToLeft;
|
||
if (node.name) {
|
||
if (state.groupsByName.get(node.name).get(node).hasDuplicateNameToRemove) {
|
||
delete node.name;
|
||
}
|
||
}
|
||
},
|
||
Recursion({ node }, state) {
|
||
if (node.ref === 0) {
|
||
return;
|
||
}
|
||
node.ref = state.reffedNodesByReferencer.get(node).number;
|
||
},
|
||
Regex: {
|
||
exit({ node }, state) {
|
||
const numCapsNeeded = Math.max(state.highestOrphanBackref - state.numCapturesToLeft, 0);
|
||
for (let i = 0; i < numCapsNeeded; i++) {
|
||
const emptyCapture = createCapturingGroup();
|
||
node.pattern.alternatives.at(-1).elements.push(emptyCapture);
|
||
}
|
||
}
|
||
}
|
||
};
|
||
var asciiSpaceChar = "[ -\r ]";
|
||
var defaultWordChar = r`[\p{L}\p{M}\p{N}\p{Pc}]`;
|
||
function adoptAndSwapKids(parent, kids) {
|
||
kids.forEach((kid) => kid.parent = parent);
|
||
parent[getContainerAccessor(parent)] = kids;
|
||
return parent;
|
||
}
|
||
function areFlagsEqual(a, b) {
|
||
return a.dotAll === b.dotAll && a.ignoreCase === b.ignoreCase;
|
||
}
|
||
function canParticipateWithNode(capture, node) {
|
||
let rightmostPoint = node;
|
||
do {
|
||
if (rightmostPoint.type === AstTypes.Pattern) {
|
||
return false;
|
||
}
|
||
if (rightmostPoint.type === AstTypes.Alternative) {
|
||
continue;
|
||
}
|
||
if (rightmostPoint === capture) {
|
||
return false;
|
||
}
|
||
const kidsOfParent = getKids(rightmostPoint.parent);
|
||
for (const kid of kidsOfParent) {
|
||
if (kid === rightmostPoint) {
|
||
break;
|
||
}
|
||
if (kid === capture) {
|
||
return true;
|
||
}
|
||
if (hasDescendant(kid, capture)) {
|
||
return true;
|
||
}
|
||
}
|
||
} while (rightmostPoint = rightmostPoint.parent);
|
||
throw new Error("Unexpected path");
|
||
}
|
||
function cloneCapturingGroup(obj, originMap, up, up2) {
|
||
const store = Array.isArray(obj) ? [] : {};
|
||
for (const [key, value] of Object.entries(obj)) {
|
||
if (key === "parent") {
|
||
store.parent = Array.isArray(up) ? up2 : up;
|
||
} else if (value && typeof value === "object") {
|
||
store[key] = cloneCapturingGroup(value, originMap, store, up);
|
||
} else {
|
||
if (key === "type" && value === AstTypes.CapturingGroup) {
|
||
originMap.set(store, originMap.get(obj) ?? obj);
|
||
}
|
||
store[key] = value;
|
||
}
|
||
}
|
||
return store;
|
||
}
|
||
function createRecursion(ref) {
|
||
return {
|
||
type: AstTypes.Recursion,
|
||
ref
|
||
};
|
||
}
|
||
function getAllParents(node, filterFn) {
|
||
const results = [];
|
||
while (node = node.parent) {
|
||
if (!filterFn || filterFn(node)) {
|
||
results.push(node);
|
||
}
|
||
}
|
||
return results;
|
||
}
|
||
function getAndStoreJsGroupName(name, map) {
|
||
if (map.has(name)) {
|
||
return map.get(name);
|
||
}
|
||
const jsName = `$${map.size}_${name.replace(/^[^$_\p{IDS}]|[^$\u200C\u200D\p{IDC}]/ug, "_")}`;
|
||
map.set(name, jsName);
|
||
return jsName;
|
||
}
|
||
function getContainerAccessor(node) {
|
||
for (const accessor of ["alternatives", "classes", "elements"]) {
|
||
if (node[accessor]) {
|
||
return accessor;
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
function getCombinedFlagModsFromFlagNodes(flagNodes) {
|
||
const flagProps = ["dotAll", "ignoreCase"];
|
||
const combinedFlags = { enable: {}, disable: {} };
|
||
flagNodes.forEach(({ flags }) => {
|
||
flagProps.forEach((prop) => {
|
||
if (flags.enable?.[prop]) {
|
||
delete combinedFlags.disable[prop];
|
||
combinedFlags.enable[prop] = true;
|
||
}
|
||
if (flags.disable?.[prop]) {
|
||
combinedFlags.disable[prop] = true;
|
||
}
|
||
});
|
||
});
|
||
if (!Object.keys(combinedFlags.enable).length) {
|
||
delete combinedFlags.enable;
|
||
}
|
||
if (!Object.keys(combinedFlags.disable).length) {
|
||
delete combinedFlags.disable;
|
||
}
|
||
if (combinedFlags.enable || combinedFlags.disable) {
|
||
return combinedFlags;
|
||
}
|
||
return null;
|
||
}
|
||
function getFlagModsFromFlags({ dotAll, ignoreCase }) {
|
||
const mods = {};
|
||
if (dotAll || ignoreCase) {
|
||
mods.enable = {};
|
||
dotAll && (mods.enable.dotAll = true);
|
||
ignoreCase && (mods.enable.ignoreCase = true);
|
||
}
|
||
if (!dotAll || !ignoreCase) {
|
||
mods.disable = {};
|
||
!dotAll && (mods.disable.dotAll = true);
|
||
!ignoreCase && (mods.disable.ignoreCase = true);
|
||
}
|
||
return mods;
|
||
}
|
||
function getKids(node) {
|
||
if (!node) {
|
||
throw new Error("Node expected");
|
||
}
|
||
if (node.type === AstTypes.Quantifier) {
|
||
return [node.element];
|
||
}
|
||
const accessor = getContainerAccessor(node);
|
||
return accessor && node[accessor];
|
||
}
|
||
function getLeadingG(els) {
|
||
const firstToConsider = els.find((el) => el.kind === AstAssertionKinds.search_start || isLoneGLookaround(el, { negate: false }) || !isAlwaysZeroLength(el));
|
||
if (!firstToConsider) {
|
||
return null;
|
||
}
|
||
if (firstToConsider.kind === AstAssertionKinds.search_start) {
|
||
return firstToConsider;
|
||
}
|
||
if (isLookaround(firstToConsider)) {
|
||
return firstToConsider.alternatives[0].elements[0];
|
||
}
|
||
if (isConsumptiveGroup(firstToConsider)) {
|
||
const gNodesForGroup = [];
|
||
for (const alt of firstToConsider.alternatives) {
|
||
const leadingG = getLeadingG(alt.elements);
|
||
if (!leadingG) {
|
||
return null;
|
||
}
|
||
Array.isArray(leadingG) ? gNodesForGroup.push(...leadingG) : gNodesForGroup.push(leadingG);
|
||
}
|
||
return gNodesForGroup;
|
||
}
|
||
return null;
|
||
}
|
||
function hasDescendant(node, descendant) {
|
||
const kids = getKids(node) ?? [];
|
||
for (const kid of kids) {
|
||
if (kid === descendant || hasDescendant(kid, descendant)) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
function isLoneGLookaround(node, options) {
|
||
const opts = {
|
||
negate: null,
|
||
...options
|
||
};
|
||
return isLookaround(node) && (opts.negate === null || node.negate === opts.negate) && hasOnlyChild(node, (kid) => kid.kind === AstAssertionKinds.search_start);
|
||
}
|
||
function isValidGroupNameJs(name) {
|
||
return /^[$_\p{IDS}][$\u200C\u200D\p{IDC}]*$/u.test(name);
|
||
}
|
||
function parseFragment(pattern, options) {
|
||
const ast = parse(tokenize(pattern), options);
|
||
const alts = ast.pattern.alternatives;
|
||
if (alts.length > 1 || alts[0].elements.length > 1) {
|
||
return adoptAndSwapKids(createGroup(), alts);
|
||
}
|
||
return alts[0].elements[0];
|
||
}
|
||
function prepContainer(node, kids) {
|
||
const accessor = getContainerAccessor(node);
|
||
node[accessor][0].parent = node;
|
||
if (kids) {
|
||
adoptAndSwapKids(node[accessor][0], kids);
|
||
}
|
||
return node;
|
||
}
|
||
function setNegate(node, negate) {
|
||
node.negate = negate;
|
||
return node;
|
||
}
|
||
function traverseReplacement(replacement, { parent, key, container }, state, visitor) {
|
||
traverse({
|
||
// Don't use the `node` from `path`
|
||
node: replacement,
|
||
parent,
|
||
key,
|
||
container
|
||
}, state, visitor);
|
||
}
|
||
|
||
// node_modules/.pnpm/regex-utilities@2.3.0/node_modules/regex-utilities/src/index.js
|
||
var Context = Object.freeze({
|
||
DEFAULT: "DEFAULT",
|
||
CHAR_CLASS: "CHAR_CLASS"
|
||
});
|
||
function replaceUnescaped(expression, needle, replacement, context) {
|
||
const re = new RegExp(String.raw`${needle}|(?<$skip>\[\^?|\\?.)`, "gsu");
|
||
const negated = [false];
|
||
let numCharClassesOpen = 0;
|
||
let result = "";
|
||
for (const match of expression.matchAll(re)) {
|
||
const { 0: m, groups: { $skip } } = match;
|
||
if (!$skip && (!context || context === Context.DEFAULT === !numCharClassesOpen)) {
|
||
if (replacement instanceof Function) {
|
||
result += replacement(match, {
|
||
context: numCharClassesOpen ? Context.CHAR_CLASS : Context.DEFAULT,
|
||
negated: negated[negated.length - 1]
|
||
});
|
||
} else {
|
||
result += replacement;
|
||
}
|
||
continue;
|
||
}
|
||
if (m[0] === "[") {
|
||
numCharClassesOpen++;
|
||
negated.push(m[1] === "^");
|
||
} else if (m === "]" && numCharClassesOpen) {
|
||
numCharClassesOpen--;
|
||
negated.pop();
|
||
}
|
||
result += m;
|
||
}
|
||
return result;
|
||
}
|
||
function forEachUnescaped(expression, needle, callback, context) {
|
||
replaceUnescaped(expression, needle, callback, context);
|
||
}
|
||
function execUnescaped(expression, needle, pos = 0, context) {
|
||
if (!new RegExp(needle, "su").test(expression)) {
|
||
return null;
|
||
}
|
||
const re = new RegExp(`${needle}|(?<$skip>\\\\?.)`, "gsu");
|
||
re.lastIndex = pos;
|
||
let numCharClassesOpen = 0;
|
||
let match;
|
||
while (match = re.exec(expression)) {
|
||
const { 0: m, groups: { $skip } } = match;
|
||
if (!$skip && (!context || context === Context.DEFAULT === !numCharClassesOpen)) {
|
||
return match;
|
||
}
|
||
if (m === "[") {
|
||
numCharClassesOpen++;
|
||
} else if (m === "]" && numCharClassesOpen) {
|
||
numCharClassesOpen--;
|
||
}
|
||
if (re.lastIndex == match.index) {
|
||
re.lastIndex++;
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
function hasUnescaped(expression, needle, context) {
|
||
return !!execUnescaped(expression, needle, 0, context);
|
||
}
|
||
function getGroupContents(expression, contentsStartPos) {
|
||
const token2 = /\\?./gsu;
|
||
token2.lastIndex = contentsStartPos;
|
||
let contentsEndPos = expression.length;
|
||
let numCharClassesOpen = 0;
|
||
let numGroupsOpen = 1;
|
||
let match;
|
||
while (match = token2.exec(expression)) {
|
||
const [m] = match;
|
||
if (m === "[") {
|
||
numCharClassesOpen++;
|
||
} else if (!numCharClassesOpen) {
|
||
if (m === "(") {
|
||
numGroupsOpen++;
|
||
} else if (m === ")") {
|
||
numGroupsOpen--;
|
||
if (!numGroupsOpen) {
|
||
contentsEndPos = match.index;
|
||
break;
|
||
}
|
||
}
|
||
} else if (m === "]") {
|
||
numCharClassesOpen--;
|
||
}
|
||
}
|
||
return expression.slice(contentsStartPos, contentsEndPos);
|
||
}
|
||
|
||
// node_modules/.pnpm/regex@5.1.1/node_modules/regex/src/subclass.js
|
||
var emulationGroupMarker = "$E$";
|
||
var RegExpSubclass = class _RegExpSubclass extends RegExp {
|
||
// Avoid `#private` to allow for subclassing
|
||
/**
|
||
@private
|
||
@type {Array<{
|
||
exclude: boolean;
|
||
transfer?: number;
|
||
}> | undefined}
|
||
*/
|
||
_captureMap;
|
||
/**
|
||
@private
|
||
@type {Record<number, string> | undefined}
|
||
*/
|
||
_namesByIndex;
|
||
/**
|
||
@param {string | RegExpSubclass} expression
|
||
@param {string} [flags]
|
||
@param {{useEmulationGroups: boolean;}} [options]
|
||
*/
|
||
constructor(expression, flags, options) {
|
||
if (expression instanceof RegExp && options) {
|
||
throw new Error("Cannot provide options when copying a regexp");
|
||
}
|
||
const useEmulationGroups = !!options?.useEmulationGroups;
|
||
const unmarked = useEmulationGroups ? unmarkEmulationGroups(expression) : null;
|
||
super(unmarked?.expression || expression, flags);
|
||
const src = useEmulationGroups ? unmarked : expression instanceof _RegExpSubclass ? expression : null;
|
||
if (src) {
|
||
this._captureMap = src._captureMap;
|
||
this._namesByIndex = src._namesByIndex;
|
||
}
|
||
}
|
||
/**
|
||
Called internally by all String/RegExp methods that use regexes.
|
||
@override
|
||
@param {string} str
|
||
@returns {RegExpExecArray | null}
|
||
*/
|
||
exec(str) {
|
||
const match = RegExp.prototype.exec.call(this, str);
|
||
if (!match || !this._captureMap) {
|
||
return match;
|
||
}
|
||
const matchCopy = [...match];
|
||
match.length = 1;
|
||
let indicesCopy;
|
||
if (this.hasIndices) {
|
||
indicesCopy = [...match.indices];
|
||
match.indices.length = 1;
|
||
}
|
||
for (let i = 1; i < matchCopy.length; i++) {
|
||
if (this._captureMap[i].exclude) {
|
||
const transfer = this._captureMap[i].transfer;
|
||
if (transfer && match.length > transfer) {
|
||
match[transfer] = matchCopy[i];
|
||
const transferName = this._namesByIndex[transfer];
|
||
if (transferName) {
|
||
match.groups[transferName] = matchCopy[i];
|
||
if (this.hasIndices) {
|
||
match.indices.groups[transferName] = indicesCopy[i];
|
||
}
|
||
}
|
||
if (this.hasIndices) {
|
||
match.indices[transfer] = indicesCopy[i];
|
||
}
|
||
}
|
||
} else {
|
||
match.push(matchCopy[i]);
|
||
if (this.hasIndices) {
|
||
match.indices.push(indicesCopy[i]);
|
||
}
|
||
}
|
||
}
|
||
return match;
|
||
}
|
||
};
|
||
function unmarkEmulationGroups(expression) {
|
||
const marker = emulationGroupMarker.replace(/\$/g, "\\$");
|
||
const _captureMap = [{ exclude: false }];
|
||
const _namesByIndex = { 0: "" };
|
||
let realCaptureNum = 0;
|
||
expression = replaceUnescaped(
|
||
expression,
|
||
String.raw`\((?:(?!\?)|\?<(?![=!])(?<name>[^>]+)>)(?<mark>(?:\$(?<transfer>[1-9]\d*))?${marker})?`,
|
||
({ 0: m, groups: { name, mark, transfer } }) => {
|
||
if (mark) {
|
||
_captureMap.push({
|
||
exclude: true,
|
||
transfer: transfer && +transfer
|
||
});
|
||
return m.slice(0, -mark.length);
|
||
}
|
||
realCaptureNum++;
|
||
if (name) {
|
||
_namesByIndex[realCaptureNum] = name;
|
||
}
|
||
_captureMap.push({
|
||
exclude: false
|
||
});
|
||
return m;
|
||
},
|
||
Context.DEFAULT
|
||
);
|
||
return {
|
||
_captureMap,
|
||
_namesByIndex,
|
||
expression
|
||
};
|
||
}
|
||
|
||
// node_modules/.pnpm/regex@5.1.1/node_modules/regex/src/utils-internals.js
|
||
var noncapturingDelim = String.raw`\(\?(?:[:=!>A-Za-z\-]|<[=!]|\(DEFINE\))`;
|
||
function spliceStr(str, pos, oldValue, newValue) {
|
||
return str.slice(0, pos) + newValue + str.slice(pos + oldValue.length);
|
||
}
|
||
|
||
// node_modules/.pnpm/regex@5.1.1/node_modules/regex/src/atomic.js
|
||
var atomicPluginToken = new RegExp(String.raw`(?<noncapturingStart>${noncapturingDelim})|(?<capturingStart>\((?:\?<[^>]+>)?)|\\?.`, "gsu");
|
||
function atomic(expression, data) {
|
||
if (!/\(\?>/.test(expression)) {
|
||
return expression;
|
||
}
|
||
const aGDelim = "(?>";
|
||
const emulatedAGDelim = `(?:(?=(${data?.useEmulationGroups ? emulationGroupMarker : ""}`;
|
||
const captureNumMap = [0];
|
||
let numCapturesBeforeAG = 0;
|
||
let numAGs = 0;
|
||
let aGPos = NaN;
|
||
let hasProcessedAG;
|
||
do {
|
||
hasProcessedAG = false;
|
||
let numCharClassesOpen = 0;
|
||
let numGroupsOpenInAG = 0;
|
||
let inAG = false;
|
||
let match;
|
||
atomicPluginToken.lastIndex = Number.isNaN(aGPos) ? 0 : aGPos + emulatedAGDelim.length;
|
||
while (match = atomicPluginToken.exec(expression)) {
|
||
const { 0: m, index, groups: { capturingStart, noncapturingStart } } = match;
|
||
if (m === "[") {
|
||
numCharClassesOpen++;
|
||
} else if (!numCharClassesOpen) {
|
||
if (m === aGDelim && !inAG) {
|
||
aGPos = index;
|
||
inAG = true;
|
||
} else if (inAG && noncapturingStart) {
|
||
numGroupsOpenInAG++;
|
||
} else if (capturingStart) {
|
||
if (inAG) {
|
||
numGroupsOpenInAG++;
|
||
} else {
|
||
numCapturesBeforeAG++;
|
||
captureNumMap.push(numCapturesBeforeAG + numAGs);
|
||
}
|
||
} else if (m === ")" && inAG) {
|
||
if (!numGroupsOpenInAG) {
|
||
numAGs++;
|
||
expression = `${expression.slice(0, aGPos)}${emulatedAGDelim}${expression.slice(aGPos + aGDelim.length, index)}))<$$${numAGs + numCapturesBeforeAG}>)${expression.slice(index + 1)}`;
|
||
hasProcessedAG = true;
|
||
break;
|
||
}
|
||
numGroupsOpenInAG--;
|
||
}
|
||
} else if (m === "]") {
|
||
numCharClassesOpen--;
|
||
}
|
||
}
|
||
} while (hasProcessedAG);
|
||
expression = replaceUnescaped(
|
||
expression,
|
||
String.raw`\\(?<backrefNum>[1-9]\d*)|<\$\$(?<wrappedBackrefNum>\d+)>`,
|
||
({ 0: m, groups: { backrefNum, wrappedBackrefNum } }) => {
|
||
if (backrefNum) {
|
||
const bNum = +backrefNum;
|
||
if (bNum > captureNumMap.length - 1) {
|
||
throw new Error(`Backref "${m}" greater than number of captures`);
|
||
}
|
||
return `\\${captureNumMap[bNum]}`;
|
||
}
|
||
return `\\${wrappedBackrefNum}`;
|
||
},
|
||
Context.DEFAULT
|
||
);
|
||
return expression;
|
||
}
|
||
var baseQuantifier = String.raw`(?:[?*+]|\{\d+(?:,\d*)?\})`;
|
||
var possessivePluginToken = new RegExp(String.raw`
|
||
\\(?: \d+
|
||
| c[A-Za-z]
|
||
| [gk]<[^>]+>
|
||
| [pPu]\{[^\}]+\}
|
||
| u[A-Fa-f\d]{4}
|
||
| x[A-Fa-f\d]{2}
|
||
)
|
||
| \((?: \? (?: [:=!>]
|
||
| <(?:[=!]|[^>]+>)
|
||
| [A-Za-z\-]+:
|
||
| \(DEFINE\)
|
||
))?
|
||
| (?<qBase>${baseQuantifier})(?<qMod>[?+]?)(?<invalidQ>[?*+\{]?)
|
||
| \\?.
|
||
`.replace(/\s+/g, ""), "gsu");
|
||
function possessive(expression) {
|
||
if (!new RegExp(`${baseQuantifier}\\+`).test(expression)) {
|
||
return expression;
|
||
}
|
||
const openGroupIndices = [];
|
||
let lastGroupIndex = null;
|
||
let lastCharClassIndex = null;
|
||
let lastToken = "";
|
||
let numCharClassesOpen = 0;
|
||
let match;
|
||
possessivePluginToken.lastIndex = 0;
|
||
while (match = possessivePluginToken.exec(expression)) {
|
||
const { 0: m, index, groups: { qBase, qMod, invalidQ } } = match;
|
||
if (m === "[") {
|
||
if (!numCharClassesOpen) {
|
||
lastCharClassIndex = index;
|
||
}
|
||
numCharClassesOpen++;
|
||
} else if (m === "]") {
|
||
if (numCharClassesOpen) {
|
||
numCharClassesOpen--;
|
||
} else {
|
||
lastCharClassIndex = null;
|
||
}
|
||
} else if (!numCharClassesOpen) {
|
||
if (qMod === "+" && lastToken && !lastToken.startsWith("(")) {
|
||
if (invalidQ) {
|
||
throw new Error(`Invalid quantifier "${m}"`);
|
||
}
|
||
let charsAdded = -1;
|
||
if (/^\{\d+\}$/.test(qBase)) {
|
||
expression = spliceStr(expression, index + qBase.length, qMod, "");
|
||
} else {
|
||
if (lastToken === ")" || lastToken === "]") {
|
||
const nodeIndex = lastToken === ")" ? lastGroupIndex : lastCharClassIndex;
|
||
if (nodeIndex === null) {
|
||
throw new Error(`Invalid unmatched "${lastToken}"`);
|
||
}
|
||
expression = `${expression.slice(0, nodeIndex)}(?>${expression.slice(nodeIndex, index)}${qBase})${expression.slice(index + m.length)}`;
|
||
} else {
|
||
expression = `${expression.slice(0, index - lastToken.length)}(?>${lastToken}${qBase})${expression.slice(index + m.length)}`;
|
||
}
|
||
charsAdded += 4;
|
||
}
|
||
possessivePluginToken.lastIndex += charsAdded;
|
||
} else if (m[0] === "(") {
|
||
openGroupIndices.push(index);
|
||
} else if (m === ")") {
|
||
lastGroupIndex = openGroupIndices.length ? openGroupIndices.pop() : null;
|
||
}
|
||
}
|
||
lastToken = m;
|
||
}
|
||
return expression;
|
||
}
|
||
|
||
// src/generate.js
|
||
function generate(ast, options) {
|
||
const opts = getOptions(options);
|
||
const minTargetEs2024 = isMinTarget(opts.target, "ES2024");
|
||
const minTargetEs2025 = isMinTarget(opts.target, "ES2025");
|
||
const recursionLimit = opts.rules.recursionLimit;
|
||
if (!Number.isInteger(recursionLimit) || recursionLimit < 2 || recursionLimit > 20) {
|
||
throw new Error("Invalid recursionLimit; use 2-20");
|
||
}
|
||
let hasCaseInsensitiveNode = null;
|
||
let hasCaseSensitiveNode = null;
|
||
if (!minTargetEs2025) {
|
||
const iStack = [ast.flags.ignoreCase];
|
||
traverse({ node: ast }, {
|
||
getCurrentModI: () => iStack.at(-1),
|
||
popModI() {
|
||
iStack.pop();
|
||
},
|
||
pushModI(isIOn) {
|
||
iStack.push(isIOn);
|
||
},
|
||
setHasCasedChar() {
|
||
if (iStack.at(-1)) {
|
||
hasCaseInsensitiveNode = true;
|
||
} else {
|
||
hasCaseSensitiveNode = true;
|
||
}
|
||
}
|
||
}, FlagModifierVisitor);
|
||
}
|
||
const appliedGlobalFlags = {
|
||
dotAll: ast.flags.dotAll,
|
||
// - Turn global flag i on if a case insensitive node was used and no case sensitive nodes were
|
||
// used (to avoid unnecessary node expansion).
|
||
// - Turn global flag i off if a case sensitive node was used (since case sensitivity can't be
|
||
// forced without the use of ES2025 flag groups)
|
||
ignoreCase: !!((ast.flags.ignoreCase || hasCaseInsensitiveNode) && !hasCaseSensitiveNode)
|
||
};
|
||
let lastNode = null;
|
||
const state = {
|
||
accuracy: opts.accuracy,
|
||
appliedGlobalFlags,
|
||
avoidSubclass: opts.avoidSubclass,
|
||
captureMap: /* @__PURE__ */ new Map(),
|
||
currentFlags: {
|
||
dotAll: ast.flags.dotAll,
|
||
ignoreCase: ast.flags.ignoreCase
|
||
},
|
||
inCharClass: false,
|
||
lastNode,
|
||
recursionLimit,
|
||
useAppliedIgnoreCase: !!(!minTargetEs2025 && hasCaseInsensitiveNode && hasCaseSensitiveNode),
|
||
useFlagMods: minTargetEs2025,
|
||
useFlagV: minTargetEs2024,
|
||
verbose: opts.verbose
|
||
};
|
||
function gen(node) {
|
||
state.lastNode = lastNode;
|
||
lastNode = node;
|
||
switch (node.type) {
|
||
case AstTypes.Regex:
|
||
return {
|
||
pattern: gen(node.pattern),
|
||
flags: gen(node.flags),
|
||
options: { ...node.options }
|
||
};
|
||
case AstTypes.Alternative:
|
||
return node.elements.map(gen).join("");
|
||
case AstTypes.Assertion:
|
||
return genAssertion(node, state, gen);
|
||
case AstTypes.Backreference:
|
||
return genBackreference(node, state);
|
||
case AstTypes.CapturingGroup:
|
||
return genCapturingGroup(node, state, gen);
|
||
case AstTypes.Character:
|
||
return genCharacter(node, state);
|
||
case AstTypes.CharacterClass:
|
||
return genCharacterClass(node, state, gen);
|
||
case AstTypes.CharacterClassIntersection:
|
||
if (!state.useFlagV) {
|
||
throw new Error("Use of class intersection requires min target ES2024");
|
||
}
|
||
return node.classes.map(gen).join("&&");
|
||
case AstTypes.CharacterClassRange:
|
||
return genCharacterClassRange(node, state);
|
||
case AstTypes.CharacterSet:
|
||
return genCharacterSet(node, state);
|
||
case AstTypes.Flags:
|
||
return genFlags(node, state);
|
||
case AstTypes.Group:
|
||
return genGroup(node, state, gen);
|
||
case AstTypes.Pattern:
|
||
return node.alternatives.map(gen).join("|");
|
||
case AstTypes.Quantifier:
|
||
return gen(node.element) + getQuantifierStr(node);
|
||
case AstTypes.Recursion:
|
||
return genRecursion(node, state);
|
||
default:
|
||
throw new Error(`Unexpected node type "${node.type}"`);
|
||
}
|
||
}
|
||
const result = gen(ast);
|
||
if (!minTargetEs2024) {
|
||
delete result.options.force.v;
|
||
result.options.disable.v = true;
|
||
result.options.unicodeSetsPlugin = null;
|
||
}
|
||
return result;
|
||
}
|
||
var FlagModifierVisitor = {
|
||
AnyGroup: {
|
||
enter({ node }, state) {
|
||
const currentModI = state.getCurrentModI();
|
||
state.pushModI(
|
||
node.flags ? getNewCurrentFlags({ ignoreCase: currentModI }, node.flags).ignoreCase : currentModI
|
||
);
|
||
},
|
||
exit(_, state) {
|
||
state.popModI();
|
||
}
|
||
},
|
||
Backreference(_, state) {
|
||
state.setHasCasedChar();
|
||
},
|
||
Character({ node }, state) {
|
||
if (charHasCase(cp(node.value))) {
|
||
state.setHasCasedChar();
|
||
}
|
||
},
|
||
CharacterClassRange({ node, skip }, state) {
|
||
skip();
|
||
if (getCasesOutsideCharClassRange(node, { firstOnly: true }).length) {
|
||
state.setHasCasedChar();
|
||
}
|
||
},
|
||
CharacterSet({ node }, state) {
|
||
if (node.kind === AstCharacterSetKinds.property && UnicodePropertiesWithSpecificCase.has(node.value)) {
|
||
state.setHasCasedChar();
|
||
}
|
||
}
|
||
};
|
||
var BaseEscapeChars = /* @__PURE__ */ new Set([
|
||
"$",
|
||
"(",
|
||
")",
|
||
"*",
|
||
"+",
|
||
".",
|
||
"?",
|
||
"[",
|
||
"\\",
|
||
"]",
|
||
"^",
|
||
"{",
|
||
"|",
|
||
"}"
|
||
]);
|
||
var CharClassEscapeChars = /* @__PURE__ */ new Set([
|
||
"-",
|
||
"\\",
|
||
"]",
|
||
"^",
|
||
// Literal `[` doesn't require escaping with flag u, but this can help work around regex source
|
||
// linters and regex syntax processors that expect unescaped `[` to create a nested class
|
||
"["
|
||
]);
|
||
var CharClassEscapeCharsFlagV = /* @__PURE__ */ new Set([
|
||
"(",
|
||
")",
|
||
"-",
|
||
"/",
|
||
"[",
|
||
"\\",
|
||
"]",
|
||
"^",
|
||
"{",
|
||
"|",
|
||
"}",
|
||
// Double punctuators; also includes already-listed `-` and `^`
|
||
"!",
|
||
"#",
|
||
"$",
|
||
"%",
|
||
"&",
|
||
"*",
|
||
"+",
|
||
",",
|
||
".",
|
||
":",
|
||
";",
|
||
"<",
|
||
"=",
|
||
">",
|
||
"?",
|
||
"@",
|
||
"`",
|
||
"~"
|
||
]);
|
||
var CharCodeEscapeMap = /* @__PURE__ */ new Map([
|
||
[9, r`\t`],
|
||
// horizontal tab
|
||
[10, r`\n`],
|
||
// line feed
|
||
[11, r`\v`],
|
||
// vertical tab
|
||
[12, r`\f`],
|
||
// form feed
|
||
[13, r`\r`],
|
||
// carriage return
|
||
[8232, r`\u2028`],
|
||
// line separator
|
||
[8233, r`\u2029`],
|
||
// paragraph separator
|
||
[65279, r`\uFEFF`]
|
||
// ZWNBSP/BOM
|
||
]);
|
||
var casedRe = /^\p{Cased}$/u;
|
||
function charHasCase(char) {
|
||
return casedRe.test(char);
|
||
}
|
||
function genAssertion(node, _, gen) {
|
||
const { kind, negate, alternatives } = node;
|
||
if (isLookaround(node)) {
|
||
const prefix = `${kind === AstAssertionKinds.lookahead ? "" : "<"}${negate ? "!" : "="}`;
|
||
return `(?${prefix}${alternatives.map(gen).join("|")})`;
|
||
}
|
||
if (kind === AstAssertionKinds.string_end) {
|
||
return "$";
|
||
}
|
||
if (kind === AstAssertionKinds.string_start) {
|
||
return "^";
|
||
}
|
||
if (kind === AstAssertionKinds.word_boundary) {
|
||
return negate ? r`\B` : r`\b`;
|
||
}
|
||
throw new Error(`Unexpected assertion kind "${kind}"`);
|
||
}
|
||
function genBackreference({ ref }, state) {
|
||
if (typeof ref !== "number") {
|
||
throw new Error("Unexpected named backref in transformed AST");
|
||
}
|
||
if (!state.useFlagMods && state.accuracy === "strict" && state.currentFlags.ignoreCase && !state.captureMap.get(ref).ignoreCase) {
|
||
throw new Error("Use of case-insensitive backref to case-sensitive group requires target ES2025 or non-strict accuracy");
|
||
}
|
||
return "\\" + ref;
|
||
}
|
||
function genCapturingGroup({ name, number, alternatives, _originNumber }, state, gen) {
|
||
state.captureMap.set(number, { ignoreCase: state.currentFlags.ignoreCase });
|
||
return `(${name ? `?<${name}>` : ""}${!state.avoidSubclass && _originNumber ? (
|
||
// All captures from/within expanded subroutines are marked as emulation groups, and some are
|
||
// specially marked as emulation groups with transfer. `number` is based on the pattern after
|
||
// subroutine expansion, whereas `_originNumber` points to the origin capture of an expanded
|
||
// subroutine (or child capture) *prior* to subroutine expansion. `_originNumber` is
|
||
// `undefined` if the current capture isn't from an expanded subroutine
|
||
`${_originNumber < number ? `$${_originNumber}` : ""}${emulationGroupMarker}`
|
||
) : ""}${alternatives.map(gen).join("|")})`;
|
||
}
|
||
function genCharacter({ value }, state) {
|
||
const char = cp(value);
|
||
const escaped = getCharEscape(value, {
|
||
isAfterBackref: state.lastNode.type === AstTypes.Backreference,
|
||
inCharClass: state.inCharClass,
|
||
useFlagV: state.useFlagV
|
||
});
|
||
if (escaped !== char) {
|
||
return escaped;
|
||
}
|
||
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase && charHasCase(char)) {
|
||
const cases = getIgnoreCaseMatchChars(char);
|
||
return state.inCharClass ? cases.join("") : cases.length > 1 ? `[${cases.join("")}]` : cases[0];
|
||
}
|
||
return char;
|
||
}
|
||
function genCharacterClass({ negate, parent, elements }, state, gen) {
|
||
const genClass = () => `[${negate ? "^" : ""}${elements.map(gen).join("")}]`;
|
||
if (!state.inCharClass) {
|
||
state.inCharClass = true;
|
||
const result = genClass();
|
||
state.inCharClass = false;
|
||
return result;
|
||
}
|
||
const firstType = elements[0]?.type;
|
||
if (!negate && firstType && // Allows many nested classes to work with `target` ES2018 which doesn't support nesting
|
||
((!state.useFlagV || !state.verbose) && parent.type === AstTypes.CharacterClass && firstType !== AstTypes.CharacterClassIntersection || !state.verbose && parent.type === AstTypes.CharacterClassIntersection && // JS doesn't allow intersection with union or ranges
|
||
elements.length === 1 && firstType !== AstTypes.CharacterClass && firstType !== AstTypes.CharacterClassRange)) {
|
||
return elements.map(gen).join("");
|
||
}
|
||
if (!state.useFlagV && parent.type === AstTypes.CharacterClass) {
|
||
throw new Error("Use of nested character class requires min target ES2024");
|
||
}
|
||
return genClass();
|
||
}
|
||
function genCharacterClassRange(node, state) {
|
||
const min = node.min.value;
|
||
const max = node.max.value;
|
||
const escOpts = {
|
||
isAfterBackref: false,
|
||
inCharClass: true,
|
||
useFlagV: state.useFlagV
|
||
};
|
||
const minStr = getCharEscape(min, escOpts);
|
||
const maxStr = getCharEscape(max, escOpts);
|
||
const extraChars = /* @__PURE__ */ new Set();
|
||
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase) {
|
||
const charsOutsideRange = getCasesOutsideCharClassRange(node);
|
||
const ranges = getCodePointRangesFromChars(charsOutsideRange);
|
||
ranges.forEach((value) => {
|
||
extraChars.add(
|
||
Array.isArray(value) ? `${getCharEscape(value[0], escOpts)}-${getCharEscape(value[1], escOpts)}` : getCharEscape(value, escOpts)
|
||
);
|
||
});
|
||
}
|
||
return `${minStr}-${maxStr}${[...extraChars].join("")}`;
|
||
}
|
||
function genCharacterSet({ kind, negate, value, key }, state) {
|
||
if (kind === AstCharacterSetKinds.dot) {
|
||
return state.currentFlags.dotAll ? state.appliedGlobalFlags.dotAll || state.useFlagMods ? "." : "[^]" : (
|
||
// Onig's only line break char is line feed, unlike JS
|
||
r`[^\n]`
|
||
);
|
||
}
|
||
if (kind === AstCharacterSetKinds.digit) {
|
||
return negate ? r`\D` : r`\d`;
|
||
}
|
||
if (kind === AstCharacterSetKinds.property) {
|
||
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase && UnicodePropertiesWithSpecificCase.has(value)) {
|
||
throw new Error(`Unicode property "${value}" can't be case-insensitive when other chars have specific case`);
|
||
}
|
||
return `${negate ? r`\P` : r`\p`}{${key ? `${key}=` : ""}${value}}`;
|
||
}
|
||
if (kind === AstCharacterSetKinds.word) {
|
||
return negate ? r`\W` : r`\w`;
|
||
}
|
||
throw new Error(`Unexpected character set kind "${kind}"`);
|
||
}
|
||
function genFlags(node, state) {
|
||
return (
|
||
// The transformer should never turn on the properties for flags d, g, and m since Onig doesn't
|
||
// have equivs. Flag m is never relied on since Onig uses different line break chars than JS
|
||
// (node.hasIndices ? 'd' : '') +
|
||
// (node.global ? 'g' : '') +
|
||
// (node.multiline ? 'm' : '') +
|
||
(state.appliedGlobalFlags.ignoreCase ? "i" : "") + (node.dotAll ? "s" : "") + (node.sticky ? "y" : "")
|
||
);
|
||
}
|
||
function genGroup({ atomic: atomic2, flags, parent, alternatives }, state, gen) {
|
||
const currentFlags = state.currentFlags;
|
||
if (flags) {
|
||
state.currentFlags = getNewCurrentFlags(currentFlags, flags);
|
||
}
|
||
const contents = alternatives.map(gen).join("|");
|
||
const result = !state.verbose && alternatives.length === 1 && parent.type !== AstTypes.Quantifier && !atomic2 && (!state.useFlagMods || !flags) ? contents : `(?${getGroupPrefix(atomic2, flags, state.useFlagMods)}${contents})`;
|
||
state.currentFlags = currentFlags;
|
||
return result;
|
||
}
|
||
function genRecursion({ ref }, state) {
|
||
const limit = state.recursionLimit;
|
||
return ref === 0 ? `(?R=${limit})` : r`\g<${ref}&R=${limit}>`;
|
||
}
|
||
function getCasesOutsideCharClassRange(node, options) {
|
||
const firstOnly = !!options?.firstOnly;
|
||
const min = node.min.value;
|
||
const max = node.max.value;
|
||
const found = [];
|
||
if (min < 65 && (max === 65535 || max >= 131071) || min === 65536 && max >= 131071) {
|
||
return found;
|
||
}
|
||
for (let i = min; i <= max; i++) {
|
||
const char = cp(i);
|
||
if (!charHasCase(char)) {
|
||
continue;
|
||
}
|
||
const charsOutsideRange = getIgnoreCaseMatchChars(char).filter((caseOfChar) => {
|
||
const num = caseOfChar.codePointAt(0);
|
||
return num < min || num > max;
|
||
});
|
||
if (charsOutsideRange.length) {
|
||
found.push(...charsOutsideRange);
|
||
if (firstOnly) {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
return found;
|
||
}
|
||
function getCharEscape(codePoint, { isAfterBackref, inCharClass, useFlagV }) {
|
||
if (CharCodeEscapeMap.has(codePoint)) {
|
||
return CharCodeEscapeMap.get(codePoint);
|
||
}
|
||
if (
|
||
// Control chars, etc.; condition modeled on the Chrome developer console's display for strings
|
||
codePoint < 32 || codePoint > 126 && codePoint < 160 || // Unicode planes 4-16; unassigned, special purpose, and private use area
|
||
codePoint > 262143 || // Avoid corrupting a preceding backref by immediately following it with a literal digit
|
||
isAfterBackref && isDigitCharCode(codePoint)
|
||
) {
|
||
return codePoint > 255 ? `\\u{${codePoint.toString(16).toUpperCase()}}` : `\\x${codePoint.toString(16).toUpperCase().padStart(2, "0")}`;
|
||
}
|
||
const escapeChars = inCharClass ? useFlagV ? CharClassEscapeCharsFlagV : CharClassEscapeChars : BaseEscapeChars;
|
||
const char = cp(codePoint);
|
||
return (escapeChars.has(char) ? "\\" : "") + char;
|
||
}
|
||
function getCodePointRangesFromChars(chars) {
|
||
const codePoints = chars.map((char) => char.codePointAt(0)).sort((a, b) => a - b);
|
||
const values = [];
|
||
let start = null;
|
||
for (let i = 0; i < codePoints.length; i++) {
|
||
if (codePoints[i + 1] === codePoints[i] + 1) {
|
||
start ??= codePoints[i];
|
||
} else if (start === null) {
|
||
values.push(codePoints[i]);
|
||
} else {
|
||
values.push([start, codePoints[i]]);
|
||
start = null;
|
||
}
|
||
}
|
||
return values;
|
||
}
|
||
function getGroupPrefix(atomic2, flagMods, useFlagMods) {
|
||
if (atomic2) {
|
||
return ">";
|
||
}
|
||
let mods = "";
|
||
if (flagMods && useFlagMods) {
|
||
const { enable, disable } = flagMods;
|
||
mods = (enable?.ignoreCase ? "i" : "") + (enable?.dotAll ? "s" : "") + (disable ? "-" : "") + (disable?.ignoreCase ? "i" : "") + (disable?.dotAll ? "s" : "");
|
||
}
|
||
return `${mods}:`;
|
||
}
|
||
function getQuantifierStr({ min, max, greedy, possessive: possessive2 }) {
|
||
let base;
|
||
if (!min && max === 1) {
|
||
base = "?";
|
||
} else if (!min && max === Infinity) {
|
||
base = "*";
|
||
} else if (min === 1 && max === Infinity) {
|
||
base = "+";
|
||
} else if (min === max) {
|
||
base = `{${min}}`;
|
||
} else {
|
||
base = `{${min},${max === Infinity ? "" : max}}`;
|
||
}
|
||
return base + (possessive2 ? "+" : greedy ? "" : "?");
|
||
}
|
||
function isDigitCharCode(value) {
|
||
return value > 47 && value < 58;
|
||
}
|
||
|
||
// src/subclass.js
|
||
var EmulatedRegExp = class _EmulatedRegExp extends RegExpSubclass {
|
||
/**
|
||
@private
|
||
@type {string | null}
|
||
*/
|
||
#strategy;
|
||
/**
|
||
Can be used to serialize the arguments used to create the instance.
|
||
@type {{
|
||
pattern: string;
|
||
flags: string;
|
||
options: EmulatedRegExpOptions;
|
||
}}
|
||
*/
|
||
rawArgs;
|
||
/**
|
||
@overload
|
||
@param {string} pattern
|
||
@param {string} [flags]
|
||
@param {EmulatedRegExpOptions} [options]
|
||
*/
|
||
/**
|
||
@overload
|
||
@param {EmulatedRegExp} pattern
|
||
@param {string} [flags]
|
||
*/
|
||
constructor(pattern, flags, options) {
|
||
if (pattern instanceof RegExp) {
|
||
if (options) {
|
||
throw new Error("Cannot provide options when copying a regexp");
|
||
}
|
||
super(pattern, flags);
|
||
if (pattern instanceof _EmulatedRegExp) {
|
||
this.#strategy = pattern.#strategy;
|
||
this.rawArgs = pattern.rawArgs;
|
||
} else {
|
||
this.#strategy = null;
|
||
this.rawArgs = {
|
||
pattern: pattern.source,
|
||
flags: pattern.flags,
|
||
options: {}
|
||
};
|
||
}
|
||
if (flags !== void 0) {
|
||
this.rawArgs.flags = flags;
|
||
}
|
||
} else {
|
||
const opts = {
|
||
strategy: null,
|
||
useEmulationGroups: false,
|
||
...options
|
||
};
|
||
super(pattern, flags, { useEmulationGroups: opts.useEmulationGroups });
|
||
this.#strategy = opts.strategy;
|
||
this.rawArgs = {
|
||
pattern,
|
||
flags: flags ?? "",
|
||
options: {
|
||
...opts.strategy && { strategy: opts.strategy },
|
||
...opts.useEmulationGroups && { useEmulationGroups: true }
|
||
}
|
||
};
|
||
}
|
||
}
|
||
/**
|
||
Called internally by all String/RegExp methods that use regexes. Provides special case handling
|
||
that requires coupling with pattern changes during transpilation.
|
||
@override
|
||
@param {string} str
|
||
@returns {RegExpExecArray | null}
|
||
*/
|
||
exec(str) {
|
||
const exec = super.exec;
|
||
const useLastIndex = this.global || this.sticky;
|
||
const pos = this.lastIndex;
|
||
if (this.#strategy === "search_start_clip" && useLastIndex && pos) {
|
||
this.lastIndex = 0;
|
||
const match = exec.call(this, str.slice(pos));
|
||
if (match) {
|
||
adjustMatchDetailsForOffset(match, this, str, pos);
|
||
this.lastIndex += pos;
|
||
}
|
||
return match;
|
||
}
|
||
return exec.call(this, str);
|
||
}
|
||
};
|
||
function adjustMatchDetailsForOffset(match, re, input, offset) {
|
||
match.input = input;
|
||
match.index += offset;
|
||
if (re.hasIndices) {
|
||
const indices = match.indices;
|
||
for (let i = 0; i < indices.length; i++) {
|
||
const arr = indices[i];
|
||
if (arr) {
|
||
indices[i] = [arr[0] + offset, arr[1] + offset];
|
||
}
|
||
}
|
||
const groupIndices = indices.groups;
|
||
if (groupIndices) {
|
||
Object.keys(groupIndices).forEach((key) => {
|
||
const arr = groupIndices[key];
|
||
if (arr) {
|
||
groupIndices[key] = [arr[0] + offset, arr[1] + offset];
|
||
}
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
// node_modules/.pnpm/regex-recursion@5.1.1/node_modules/regex-recursion/src/index.js
|
||
var r3 = String.raw;
|
||
var gRToken = r3`\\g<(?<gRNameOrNum>[^>&]+)&R=(?<gRDepth>[^>]+)>`;
|
||
var recursiveToken = r3`\(\?R=(?<rDepth>[^\)]+)\)|${gRToken}`;
|
||
var namedCapturingDelim = r3`\(\?<(?![=!])(?<captureName>[^>]+)>`;
|
||
var token = new RegExp(r3`${namedCapturingDelim}|${recursiveToken}|\(\?|\\?.`, "gsu");
|
||
var overlappingRecursionMsg = "Cannot use multiple overlapping recursions";
|
||
var emulationGroupMarkerRe = new RegExp(r3`(?:\$[1-9]\d*)?${emulationGroupMarker.replace(/\$/g, r3`\$`)}`, "y");
|
||
function recursion(expression, data) {
|
||
if (!new RegExp(recursiveToken, "su").test(expression)) {
|
||
return expression;
|
||
}
|
||
if (hasUnescaped(expression, r3`\(\?\(DEFINE\)`, Context.DEFAULT)) {
|
||
throw new Error("DEFINE groups cannot be used with recursion");
|
||
}
|
||
const useEmulationGroups = !!data?.useEmulationGroups;
|
||
const hasNumberedBackref = hasUnescaped(expression, r3`\\[1-9]`, Context.DEFAULT);
|
||
const groupContentsStartPos = /* @__PURE__ */ new Map();
|
||
const openGroups = [];
|
||
let hasRecursed = false;
|
||
let numCharClassesOpen = 0;
|
||
let numCaptures = 0;
|
||
let match;
|
||
token.lastIndex = 0;
|
||
while (match = token.exec(expression)) {
|
||
const { 0: m, groups: { captureName, rDepth, gRNameOrNum, gRDepth } } = match;
|
||
if (m === "[") {
|
||
numCharClassesOpen++;
|
||
} else if (!numCharClassesOpen) {
|
||
if (rDepth) {
|
||
assertMaxInBounds(rDepth);
|
||
if (hasRecursed) {
|
||
throw new Error(overlappingRecursionMsg);
|
||
}
|
||
if (hasNumberedBackref) {
|
||
throw new Error("Numbered backrefs cannot be used with global recursion");
|
||
}
|
||
const pre = expression.slice(0, match.index);
|
||
const post = expression.slice(token.lastIndex);
|
||
if (hasUnescaped(post, recursiveToken, Context.DEFAULT)) {
|
||
throw new Error(overlappingRecursionMsg);
|
||
}
|
||
return makeRecursive(pre, post, +rDepth, false, useEmulationGroups);
|
||
} else if (gRNameOrNum) {
|
||
assertMaxInBounds(gRDepth);
|
||
let isWithinReffedGroup = false;
|
||
for (const g of openGroups) {
|
||
if (g.name === gRNameOrNum || g.num === +gRNameOrNum) {
|
||
isWithinReffedGroup = true;
|
||
if (g.hasRecursedWithin) {
|
||
throw new Error(overlappingRecursionMsg);
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
if (!isWithinReffedGroup) {
|
||
throw new Error(r3`Recursive \g cannot be used outside the referenced group "\g<${gRNameOrNum}&R=${gRDepth}>"`);
|
||
}
|
||
const startPos = groupContentsStartPos.get(gRNameOrNum);
|
||
const groupContents = getGroupContents(expression, startPos);
|
||
if (hasNumberedBackref && hasUnescaped(groupContents, r3`${namedCapturingDelim}|\((?!\?)`, Context.DEFAULT)) {
|
||
throw new Error("Numbered backrefs cannot be used with recursion of capturing groups");
|
||
}
|
||
const groupContentsPre = expression.slice(startPos, match.index);
|
||
const groupContentsPost = groupContents.slice(groupContentsPre.length + m.length);
|
||
const expansion = makeRecursive(groupContentsPre, groupContentsPost, +gRDepth, true, useEmulationGroups);
|
||
const pre = expression.slice(0, startPos);
|
||
const post = expression.slice(startPos + groupContents.length);
|
||
expression = `${pre}${expansion}${post}`;
|
||
token.lastIndex += expansion.length - m.length - groupContentsPre.length - groupContentsPost.length;
|
||
openGroups.forEach((g) => g.hasRecursedWithin = true);
|
||
hasRecursed = true;
|
||
} else if (captureName) {
|
||
numCaptures++;
|
||
groupContentsStartPos.set(String(numCaptures), token.lastIndex);
|
||
groupContentsStartPos.set(captureName, token.lastIndex);
|
||
openGroups.push({
|
||
num: numCaptures,
|
||
name: captureName
|
||
});
|
||
} else if (m.startsWith("(")) {
|
||
const isUnnamedCapture = m === "(";
|
||
if (isUnnamedCapture) {
|
||
numCaptures++;
|
||
groupContentsStartPos.set(
|
||
String(numCaptures),
|
||
token.lastIndex + (useEmulationGroups ? emulationGroupMarkerLength(expression, token.lastIndex) : 0)
|
||
);
|
||
}
|
||
openGroups.push(isUnnamedCapture ? { num: numCaptures } : {});
|
||
} else if (m === ")") {
|
||
openGroups.pop();
|
||
}
|
||
} else if (m === "]") {
|
||
numCharClassesOpen--;
|
||
}
|
||
}
|
||
return expression;
|
||
}
|
||
function assertMaxInBounds(max) {
|
||
const errMsg = `Max depth must be integer between 2 and 100; used ${max}`;
|
||
if (!/^[1-9]\d*$/.test(max)) {
|
||
throw new Error(errMsg);
|
||
}
|
||
max = +max;
|
||
if (max < 2 || max > 100) {
|
||
throw new Error(errMsg);
|
||
}
|
||
}
|
||
function makeRecursive(pre, post, maxDepth, isSubpattern, useEmulationGroups) {
|
||
const namesInRecursed = /* @__PURE__ */ new Set();
|
||
if (isSubpattern) {
|
||
forEachUnescaped(pre + post, namedCapturingDelim, ({ groups: { captureName } }) => {
|
||
namesInRecursed.add(captureName);
|
||
}, Context.DEFAULT);
|
||
}
|
||
const reps = maxDepth - 1;
|
||
return `${pre}${repeatWithDepth(`(?:${pre}`, reps, isSubpattern ? namesInRecursed : null, "forward", useEmulationGroups)}(?:)${repeatWithDepth(`${post})`, reps, isSubpattern ? namesInRecursed : null, "backward", useEmulationGroups)}${post}`;
|
||
}
|
||
function repeatWithDepth(expression, reps, namesInRecursed, direction, useEmulationGroups) {
|
||
const startNum = 2;
|
||
const depthNum = (i) => direction === "backward" ? reps - i + startNum - 1 : i + startNum;
|
||
let result = "";
|
||
for (let i = 0; i < reps; i++) {
|
||
const captureNum = depthNum(i);
|
||
result += replaceUnescaped(
|
||
expression,
|
||
// NOTE: Not currently handling *named* emulation groups that already exist in the pattern
|
||
r3`${namedCapturingDelim}|\\k<(?<backref>[^>]+)>${useEmulationGroups ? r3`|(?<unnamed>\()(?!\?)(?:${emulationGroupMarkerRe.source})?` : ""}`,
|
||
({ 0: m, index, groups: { captureName, backref, unnamed } }) => {
|
||
if (backref && namesInRecursed && !namesInRecursed.has(backref)) {
|
||
return m;
|
||
}
|
||
if (unnamed) {
|
||
return `(${emulationGroupMarker}`;
|
||
}
|
||
const suffix = `_$${captureNum}`;
|
||
return captureName ? `(?<${captureName}${suffix}>${useEmulationGroups ? emulationGroupMarker : ""}` : r3`\k<${backref}${suffix}>`;
|
||
},
|
||
Context.DEFAULT
|
||
);
|
||
}
|
||
return result;
|
||
}
|
||
function emulationGroupMarkerLength(expression, index) {
|
||
emulationGroupMarkerRe.lastIndex = index;
|
||
const match = emulationGroupMarkerRe.exec(expression);
|
||
return match ? match[0].length : 0;
|
||
}
|
||
|
||
// src/index.js
|
||
function toDetails(pattern, options) {
|
||
const opts = getOptions(options);
|
||
const avoidSubclass = opts.avoidSubclass;
|
||
const tokenized = tokenize(pattern, opts.flags, {
|
||
captureGroup: opts.rules.captureGroup,
|
||
singleline: opts.rules.singleline
|
||
});
|
||
const onigurumaAst = parse(tokenized, {
|
||
skipBackrefValidation: opts.rules.allowOrphanBackrefs,
|
||
verbose: opts.verbose
|
||
});
|
||
const regexAst = transform(onigurumaAst, {
|
||
accuracy: opts.accuracy,
|
||
asciiWordBoundaries: opts.rules.asciiWordBoundaries,
|
||
avoidSubclass,
|
||
bestEffortTarget: opts.target
|
||
});
|
||
const generated = generate(regexAst, opts);
|
||
const pluginData = { useEmulationGroups: !avoidSubclass };
|
||
const result = {
|
||
pattern: atomic(possessive(recursion(generated.pattern, pluginData)), pluginData),
|
||
flags: `${opts.hasIndices ? "d" : ""}${opts.global ? "g" : ""}${generated.flags}${generated.options.disable.v ? "u" : "v"}`
|
||
};
|
||
const useEmulationGroups = !avoidSubclass && result.pattern.includes(emulationGroupMarker);
|
||
const strategy = regexAst._strategy;
|
||
if (useEmulationGroups || strategy) {
|
||
result.options = {
|
||
...strategy && { strategy },
|
||
...useEmulationGroups && { useEmulationGroups }
|
||
};
|
||
}
|
||
return result;
|
||
}
|
||
function toOnigurumaAst(pattern, options) {
|
||
const flags = options?.flags ?? "";
|
||
const captureGroup = options?.rules?.captureGroup ?? false;
|
||
return parse(tokenize(pattern, flags, { captureGroup }));
|
||
}
|
||
function toRegExp(pattern, options) {
|
||
const result = toDetails(pattern, options);
|
||
if (result.options) {
|
||
return new EmulatedRegExp(result.pattern, result.flags, result.options);
|
||
}
|
||
return new RegExp(result.pattern, result.flags);
|
||
}
|
||
//# sourceMappingURL=index.js.map
|