Files
2025-07-24 18:46:24 +02:00

1877 lines
59 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// src/utils.js
var cp = String.fromCodePoint;
var r = String.raw;
var envFlags = {
flagGroups: (() => {
try {
new RegExp("(?i:)");
} catch {
return false;
}
return true;
})(),
unicodeSets: (() => {
try {
new RegExp("", "v");
} catch {
return false;
}
return true;
})()
};
envFlags.bugFlagVLiteralHyphenIsRange = envFlags.unicodeSets ? (() => {
try {
new RegExp(r`[\d\-a]`, "v");
} catch {
return true;
}
return false;
})() : false;
envFlags.bugNestedClassIgnoresNegation = envFlags.unicodeSets && new RegExp("[[^a]]", "v").test("a");
function getNewCurrentFlags(current, { enable, disable }) {
return {
dotAll: !disable?.dotAll && !!(enable?.dotAll || current.dotAll),
ignoreCase: !disable?.ignoreCase && !!(enable?.ignoreCase || current.ignoreCase)
};
}
function getOrInsert(map, key, defaultValue) {
if (!map.has(key)) {
map.set(key, defaultValue);
}
return map.get(key);
}
function isMinTarget(target, min) {
return EsVersion[target] >= EsVersion[min];
}
function throwIfNullish(value, msg) {
if (value == null) {
throw new Error(msg ?? "Value expected");
}
return value;
}
// src/options.js
var EsVersion = {
ES2025: 2025,
ES2024: 2024,
ES2018: 2018
};
var Target = (
/** @type {const} */
{
auto: "auto",
ES2025: "ES2025",
ES2024: "ES2024",
ES2018: "ES2018"
}
);
function getOptions(options = {}) {
if ({}.toString.call(options) !== "[object Object]") {
throw new Error("Unexpected options");
}
if (options.target !== void 0 && !Target[options.target]) {
throw new Error(`Unexpected target "${options.target}"`);
}
const opts = {
// Sets the level of emulation rigor/strictness.
accuracy: "default",
// Disables advanced emulation that relies on returning a `RegExp` subclass, resulting in
// certain patterns not being emulatable.
avoidSubclass: false,
// Oniguruma flags; a string with `i`, `m`, `x`, `D`, `S`, `W`, `y{g}` in any order (all
// optional). Oniguruma's `m` is equivalent to JavaScript's `s` (`dotAll`).
flags: "",
// Include JavaScript flag `g` (`global`) in the result.
global: false,
// Include JavaScript flag `d` (`hasIndices`) in the result.
hasIndices: false,
// Delay regex construction until first use if the transpiled pattern is at least this length.
lazyCompileLength: Infinity,
// JavaScript version used for generated regexes. Using `auto` detects the best value based on
// your environment. Later targets allow faster processing, simpler generated source, and
// support for additional features.
target: "auto",
// Disables minifications that simplify the pattern without changing the meaning.
verbose: false,
...options,
// Advanced options that override standard behavior, error checking, and flags when enabled.
rules: {
// Useful with TextMate grammars that merge backreferences across patterns.
allowOrphanBackrefs: false,
// Use ASCII `\b` and `\B`, which increases search performance of generated regexes.
asciiWordBoundaries: false,
// Allow unnamed captures and numbered calls (backreferences and subroutines) when using
// named capture. This is Oniguruma option `ONIG_OPTION_CAPTURE_GROUP`; on by default in
// `vscode-oniguruma`.
captureGroup: false,
// Change the recursion depth limit from Oniguruma's `20` to an integer `2``20`.
recursionLimit: 20,
// `^` as `\A`; `$` as`\Z`. Improves search performance of generated regexes without changing
// the meaning if searching line by line. This is Oniguruma option `ONIG_OPTION_SINGLELINE`.
singleline: false,
...options.rules
}
};
if (opts.target === "auto") {
opts.target = envFlags.flagGroups ? "ES2025" : envFlags.unicodeSets ? "ES2024" : "ES2018";
}
return opts;
}
// src/unicode.js
import { slug } from "oniguruma-parser/parser";
var asciiSpaceChar = "[ -\r ]";
var CharsWithoutIgnoreCaseExpansion = /* @__PURE__ */ new Set([
cp(304),
// İ
cp(305)
// ı
]);
var defaultWordChar = r`[\p{L}\p{M}\p{N}\p{Pc}]`;
function getIgnoreCaseMatchChars(char) {
if (CharsWithoutIgnoreCaseExpansion.has(char)) {
return [char];
}
const set = /* @__PURE__ */ new Set();
const lower = char.toLowerCase();
const upper = lower.toUpperCase();
const title = LowerToTitleCaseMap.get(lower);
const altLower = LowerToAlternativeLowerCaseMap.get(lower);
const altUpper = LowerToAlternativeUpperCaseMap.get(lower);
if ([...upper].length === 1) {
set.add(upper);
}
altUpper && set.add(altUpper);
title && set.add(title);
set.add(lower);
altLower && set.add(altLower);
return [...set];
}
var JsUnicodePropertyMap = /* @__PURE__ */ new Map(
`C Other
Cc Control cntrl
Cf Format
Cn Unassigned
Co Private_Use
Cs Surrogate
L Letter
LC Cased_Letter
Ll Lowercase_Letter
Lm Modifier_Letter
Lo Other_Letter
Lt Titlecase_Letter
Lu Uppercase_Letter
M Mark Combining_Mark
Mc Spacing_Mark
Me Enclosing_Mark
Mn Nonspacing_Mark
N Number
Nd Decimal_Number digit
Nl Letter_Number
No Other_Number
P Punctuation punct
Pc Connector_Punctuation
Pd Dash_Punctuation
Pe Close_Punctuation
Pf Final_Punctuation
Pi Initial_Punctuation
Po Other_Punctuation
Ps Open_Punctuation
S Symbol
Sc Currency_Symbol
Sk Modifier_Symbol
Sm Math_Symbol
So Other_Symbol
Z Separator
Zl Line_Separator
Zp Paragraph_Separator
Zs Space_Separator
ASCII
ASCII_Hex_Digit AHex
Alphabetic Alpha
Any
Assigned
Bidi_Control Bidi_C
Bidi_Mirrored Bidi_M
Case_Ignorable CI
Cased
Changes_When_Casefolded CWCF
Changes_When_Casemapped CWCM
Changes_When_Lowercased CWL
Changes_When_NFKC_Casefolded CWKCF
Changes_When_Titlecased CWT
Changes_When_Uppercased CWU
Dash
Default_Ignorable_Code_Point DI
Deprecated Dep
Diacritic Dia
Emoji
Emoji_Component EComp
Emoji_Modifier EMod
Emoji_Modifier_Base EBase
Emoji_Presentation EPres
Extended_Pictographic ExtPict
Extender Ext
Grapheme_Base Gr_Base
Grapheme_Extend Gr_Ext
Hex_Digit Hex
IDS_Binary_Operator IDSB
IDS_Trinary_Operator IDST
ID_Continue IDC
ID_Start IDS
Ideographic Ideo
Join_Control Join_C
Logical_Order_Exception LOE
Lowercase Lower
Math
Noncharacter_Code_Point NChar
Pattern_Syntax Pat_Syn
Pattern_White_Space Pat_WS
Quotation_Mark QMark
Radical
Regional_Indicator RI
Sentence_Terminal STerm
Soft_Dotted SD
Terminal_Punctuation Term
Unified_Ideograph UIdeo
Uppercase Upper
Variation_Selector VS
White_Space space
XID_Continue XIDC
XID_Start XIDS`.split(/\s/).map((p) => [slug(p), p])
);
var LowerToAlternativeLowerCaseMap = /* @__PURE__ */ new Map([
["s", cp(383)],
// s, ſ
[cp(383), "s"]
// ſ, s
]);
var LowerToAlternativeUpperCaseMap = /* @__PURE__ */ new Map([
[cp(223), cp(7838)],
// ß, ẞ
[cp(107), cp(8490)],
// k, (Kelvin)
[cp(229), cp(8491)],
// å, Å (Angstrom)
[cp(969), cp(8486)]
// ω, Ω (Ohm)
]);
var LowerToTitleCaseMap = new Map([
titleEntry(453),
titleEntry(456),
titleEntry(459),
titleEntry(498),
...titleRange(8072, 8079),
...titleRange(8088, 8095),
...titleRange(8104, 8111),
titleEntry(8124),
titleEntry(8140),
titleEntry(8188)
]);
var PosixClassMap = /* @__PURE__ */ new Map([
["alnum", r`[\p{Alpha}\p{Nd}]`],
["alpha", r`\p{Alpha}`],
["ascii", r`\p{ASCII}`],
["blank", r`[\p{Zs}\t]`],
["cntrl", r`\p{Cc}`],
["digit", r`\p{Nd}`],
["graph", r`[\P{space}&&\P{Cc}&&\P{Cn}&&\P{Cs}]`],
["lower", r`\p{Lower}`],
["print", r`[[\P{space}&&\P{Cc}&&\P{Cn}&&\P{Cs}]\p{Zs}]`],
["punct", r`[\p{P}\p{S}]`],
// Updated value from Onig 6.9.9; changed from Unicode `\p{punct}`
["space", r`\p{space}`],
["upper", r`\p{Upper}`],
["word", r`[\p{Alpha}\p{M}\p{Nd}\p{Pc}]`],
["xdigit", r`\p{AHex}`]
]);
function range(start, end) {
const range2 = [];
for (let i = start; i <= end; i++) {
range2.push(i);
}
return range2;
}
function titleEntry(codePoint) {
const char = cp(codePoint);
return [char.toLowerCase(), char];
}
function titleRange(start, end) {
return range(start, end).map((codePoint) => titleEntry(codePoint));
}
var UnicodePropertiesWithSpecificCase = /* @__PURE__ */ new Set([
"Lower",
"Lowercase",
"Upper",
"Uppercase",
"Ll",
"Lowercase_Letter",
"Lt",
"Titlecase_Letter",
"Lu",
"Uppercase_Letter"
// The `Changes_When_*` properties (and their aliases) could be included, but they're very rare.
// Some other properties include a handful of chars with specific cases only, but these chars are
// generally extreme edge cases and using such properties case insensitively generally produces
// undesired behavior anyway
]);
// src/transform.js
import { createAlternative, createAssertion, createBackreference, createCapturingGroup, createCharacter, createCharacterClass, createCharacterSet, createGroup, createLookaroundAssertion, createQuantifier, createSubroutine, createUnicodeProperty, hasOnlyChild, parse, slug as slug2 } from "oniguruma-parser/parser";
import { traverse } from "oniguruma-parser/traverser";
function transform(ast, options) {
const opts = {
// A couple edge cases exist where options `accuracy` and `bestEffortTarget` are used:
// - `CharacterSet` kind `text_segment` (`\X`): An exact representation would require heavy
// Unicode data; a best-effort approximation requires knowing the target.
// - `CharacterSet` kind `posix` with values `graph` and `print`: Their complex Unicode
// representations would be hard to change to ASCII versions after the fact in the generator
// based on `target`/`accuracy`, so produce the appropriate structure here.
accuracy: "default",
asciiWordBoundaries: false,
avoidSubclass: false,
bestEffortTarget: "ES2025",
...options
};
addParentProperties(ast);
const firstPassState = {
accuracy: opts.accuracy,
asciiWordBoundaries: opts.asciiWordBoundaries,
avoidSubclass: opts.avoidSubclass,
flagDirectivesByAlt: /* @__PURE__ */ new Map(),
jsGroupNameMap: /* @__PURE__ */ new Map(),
minTargetEs2024: isMinTarget(opts.bestEffortTarget, "ES2024"),
passedLookbehind: false,
strategy: null,
// Subroutines can appear before the groups they ref, so collect reffed nodes for a second pass
subroutineRefMap: /* @__PURE__ */ new Map(),
supportedGNodes: /* @__PURE__ */ new Set(),
digitIsAscii: ast.flags.digitIsAscii,
spaceIsAscii: ast.flags.spaceIsAscii,
wordIsAscii: ast.flags.wordIsAscii
};
traverse(ast, FirstPassVisitor, firstPassState);
const globalFlags = {
dotAll: ast.flags.dotAll,
ignoreCase: ast.flags.ignoreCase
};
const secondPassState = {
currentFlags: globalFlags,
prevFlags: null,
globalFlags,
groupOriginByCopy: /* @__PURE__ */ new Map(),
groupsByName: /* @__PURE__ */ new Map(),
multiplexCapturesToLeftByRef: /* @__PURE__ */ new Map(),
openRefs: /* @__PURE__ */ new Map(),
reffedNodesByReferencer: /* @__PURE__ */ new Map(),
subroutineRefMap: firstPassState.subroutineRefMap
};
traverse(ast, SecondPassVisitor, secondPassState);
const thirdPassState = {
groupsByName: secondPassState.groupsByName,
highestOrphanBackref: 0,
numCapturesToLeft: 0,
reffedNodesByReferencer: secondPassState.reffedNodesByReferencer
};
traverse(ast, ThirdPassVisitor, thirdPassState);
ast._originMap = secondPassState.groupOriginByCopy;
ast._strategy = firstPassState.strategy;
return ast;
}
var FirstPassVisitor = {
AbsenceFunction({ node, parent, replaceWith }) {
const { body, kind } = node;
if (kind === "repeater") {
const innerGroup = createGroup();
innerGroup.body[0].body.push(
// Insert own alts as `body`
createLookaroundAssertion({ negate: true, body }),
createUnicodeProperty("Any")
);
const outerGroup = createGroup();
outerGroup.body[0].body.push(
createQuantifier("greedy", 0, Infinity, innerGroup)
);
replaceWith(setParentDeep(outerGroup, parent), { traverse: true });
} else {
throw new Error(`Unsupported absence function "(?~|"`);
}
},
Alternative: {
enter({ node, parent, key }, { flagDirectivesByAlt }) {
const flagDirectives = node.body.filter((el) => el.kind === "flags");
for (let i = key + 1; i < parent.body.length; i++) {
const forwardSiblingAlt = parent.body[i];
getOrInsert(flagDirectivesByAlt, forwardSiblingAlt, []).push(...flagDirectives);
}
},
exit({ node }, { flagDirectivesByAlt }) {
if (flagDirectivesByAlt.get(node)?.length) {
const flags = getCombinedFlagModsFromFlagNodes(flagDirectivesByAlt.get(node));
if (flags) {
const flagGroup = createGroup({ flags });
flagGroup.body[0].body = node.body;
node.body = [setParentDeep(flagGroup, node)];
}
}
}
},
Assertion({ node, parent, key, container, root, remove, replaceWith }, state) {
const { kind, negate } = node;
const { asciiWordBoundaries, avoidSubclass, supportedGNodes, wordIsAscii } = state;
if (kind === "text_segment_boundary") {
throw new Error(`Unsupported text segment boundary "\\${negate ? "Y" : "y"}"`);
} else if (kind === "line_end") {
replaceWith(setParentDeep(createLookaroundAssertion({ body: [
createAlternative({ body: [createAssertion("string_end")] }),
createAlternative({ body: [createCharacter(10)] })
// `\n`
] }), parent));
} else if (kind === "line_start") {
replaceWith(setParentDeep(parseFragment(r`(?<=\A|\n(?!\z))`, { skipLookbehindValidation: true }), parent));
} else if (kind === "search_start") {
if (supportedGNodes.has(node)) {
root.flags.sticky = true;
remove();
} else {
const prev = container[key - 1];
if (prev && isAlwaysNonZeroLength(prev)) {
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
} else if (avoidSubclass) {
throw new Error(r`Uses "\G" in a way that requires a subclass`);
} else {
replaceWith(setParent(createAssertion("string_start"), parent));
state.strategy = "clip_search";
}
}
} else if (kind === "string_end" || kind === "string_start") {
} else if (kind === "string_end_newline") {
replaceWith(setParentDeep(parseFragment(r`(?=\n?\z)`), parent));
} else if (kind === "word_boundary") {
if (!wordIsAscii && !asciiWordBoundaries) {
const b = `(?:(?<=${defaultWordChar})(?!${defaultWordChar})|(?<!${defaultWordChar})(?=${defaultWordChar}))`;
const B = `(?:(?<=${defaultWordChar})(?=${defaultWordChar})|(?<!${defaultWordChar})(?!${defaultWordChar}))`;
replaceWith(setParentDeep(parseFragment(negate ? B : b), parent));
}
} else {
throw new Error(`Unexpected assertion kind "${kind}"`);
}
},
Backreference({ node }, { jsGroupNameMap }) {
let { ref } = node;
if (typeof ref === "string" && !isValidJsGroupName(ref)) {
ref = getAndStoreJsGroupName(ref, jsGroupNameMap);
node.ref = ref;
}
},
CapturingGroup({ node }, { jsGroupNameMap, subroutineRefMap }) {
let { name } = node;
if (name && !isValidJsGroupName(name)) {
name = getAndStoreJsGroupName(name, jsGroupNameMap);
node.name = name;
}
subroutineRefMap.set(node.number, node);
if (name) {
subroutineRefMap.set(name, node);
}
},
CharacterClassRange({ node, parent, replaceWith }) {
if (parent.kind === "intersection") {
const cc = createCharacterClass({ body: [node] });
replaceWith(setParentDeep(cc, parent), { traverse: true });
}
},
CharacterSet({ node, parent, replaceWith }, { accuracy, minTargetEs2024, digitIsAscii, spaceIsAscii, wordIsAscii }) {
const { kind, negate, value } = node;
if (digitIsAscii && (kind === "digit" || value === "digit")) {
replaceWith(setParent(createCharacterSet("digit", { negate }), parent));
return;
}
if (spaceIsAscii && (kind === "space" || value === "space")) {
replaceWith(setParentDeep(setNegate(parseFragment(asciiSpaceChar), negate), parent));
return;
}
if (wordIsAscii && (kind === "word" || value === "word")) {
replaceWith(setParent(createCharacterSet("word", { negate }), parent));
return;
}
if (kind === "any") {
replaceWith(setParent(createUnicodeProperty("Any"), parent));
} else if (kind === "digit") {
replaceWith(setParent(createUnicodeProperty("Nd", { negate }), parent));
} else if (kind === "dot") {
} else if (kind === "text_segment") {
if (accuracy === "strict") {
throw new Error(r`Use of "\X" requires non-strict accuracy`);
}
const eBase = "\\p{Emoji}(?:\\p{EMod}|\\uFE0F\\u20E3?|[\\x{E0020}-\\x{E007E}]+\\x{E007F})?";
const emoji = r`\p{RI}{2}|${eBase}(?:\u200D${eBase})*`;
replaceWith(setParentDeep(parseFragment(
// Close approximation of an extended grapheme cluster; see: <unicode.org/reports/tr29/>
r`(?>\r\n|${minTargetEs2024 ? r`\p{RGI_Emoji}` : emoji}|\P{M}\p{M}*)`,
// Allow JS property `RGI_Emoji` through
{ skipPropertyNameValidation: true }
), parent));
} else if (kind === "hex") {
replaceWith(setParent(createUnicodeProperty("AHex", { negate }), parent));
} else if (kind === "newline") {
replaceWith(setParentDeep(parseFragment(negate ? "[^\n]" : "(?>\r\n?|[\n\v\f\x85\u2028\u2029])"), parent));
} else if (kind === "posix") {
if (!minTargetEs2024 && (value === "graph" || value === "print")) {
if (accuracy === "strict") {
throw new Error(`POSIX class "${value}" requires min target ES2024 or non-strict accuracy`);
}
let ascii = {
graph: "!-~",
print: " -~"
}[value];
if (negate) {
ascii = `\0-${cp(ascii.codePointAt(0) - 1)}${cp(ascii.codePointAt(2) + 1)}-\u{10FFFF}`;
}
replaceWith(setParentDeep(parseFragment(`[${ascii}]`), parent));
} else {
replaceWith(setParentDeep(setNegate(parseFragment(PosixClassMap.get(value)), negate), parent));
}
} else if (kind === "property") {
if (!JsUnicodePropertyMap.has(slug2(value))) {
node.key = "sc";
}
} else if (kind === "space") {
replaceWith(setParent(createUnicodeProperty("space", { negate }), parent));
} else if (kind === "word") {
replaceWith(setParentDeep(setNegate(parseFragment(defaultWordChar), negate), parent));
} else {
throw new Error(`Unexpected character set kind "${kind}"`);
}
},
Directive({ node, parent, root, remove, replaceWith, removeAllPrevSiblings, removeAllNextSiblings }) {
const { kind, flags } = node;
if (kind === "flags") {
if (!flags.enable && !flags.disable) {
remove();
} else {
const flagGroup = createGroup({ flags });
flagGroup.body[0].body = removeAllNextSiblings();
replaceWith(setParentDeep(flagGroup, parent), { traverse: true });
}
} else if (kind === "keep") {
const firstAlt = root.body[0];
const hasWrapperGroup = root.body.length === 1 && // Not emulatable if within a `CapturingGroup`
hasOnlyChild(firstAlt, { type: "Group" }) && firstAlt.body[0].body.length === 1;
const topLevel = hasWrapperGroup ? firstAlt.body[0] : root;
if (parent.parent !== topLevel || topLevel.body.length > 1) {
throw new Error(r`Uses "\K" in a way that's unsupported`);
}
const lookbehind = createLookaroundAssertion({ behind: true });
lookbehind.body[0].body = removeAllPrevSiblings();
replaceWith(setParentDeep(lookbehind, parent));
} else {
throw new Error(`Unexpected directive kind "${kind}"`);
}
},
Flags({ node, parent }) {
if (node.posixIsAscii) {
throw new Error('Unsupported flag "P"');
}
if (node.textSegmentMode === "word") {
throw new Error('Unsupported flag "y{w}"');
}
[
"digitIsAscii",
// Flag D
"extended",
// Flag x
"posixIsAscii",
// Flag P
"spaceIsAscii",
// Flag S
"wordIsAscii",
// Flag W
"textSegmentMode"
// Flag y{g} or y{w}
].forEach((f) => delete node[f]);
Object.assign(node, {
// JS flag g; no Onig equiv
global: false,
// JS flag d; no Onig equiv
hasIndices: false,
// JS flag m; no Onig equiv but its behavior is always on in Onig. Onig's only line break
// char is line feed, unlike JS, so this flag isn't used since it would produce inaccurate
// results (also allows `^` and `$` to be used in the generator for string start and end)
multiline: false,
// JS flag y; no Onig equiv, but used for `\G` emulation
sticky: node.sticky ?? false
// Note: Regex+ doesn't allow explicitly adding flags it handles implicitly, so leave out
// properties `unicode` (JS flag u) and `unicodeSets` (JS flag v). Keep the existing values
// for `ignoreCase` (flag i) and `dotAll` (JS flag s, but Onig flag m)
});
parent.options = {
disable: {
// Onig uses different rules for flag x than Regex+, so disable the implicit flag
x: true,
// Onig has no flag to control "named capture only" mode but contextually applies its
// behavior when named capturing is used, so disable Regex+'s implicit flag for it
n: true
},
force: {
// Always add flag v because we're generating an AST that relies on it (it enables JS
// support for Onig features nested classes, intersection, Unicode properties, etc.).
// However, the generator might disable flag v based on its `target` option
v: true
}
};
},
Group({ node }) {
if (!node.flags) {
return;
}
const { enable, disable } = node.flags;
enable?.extended && delete enable.extended;
disable?.extended && delete disable.extended;
enable?.dotAll && disable?.dotAll && delete enable.dotAll;
enable?.ignoreCase && disable?.ignoreCase && delete enable.ignoreCase;
enable && !Object.keys(enable).length && delete node.flags.enable;
disable && !Object.keys(disable).length && delete node.flags.disable;
!node.flags.enable && !node.flags.disable && delete node.flags;
},
LookaroundAssertion({ node }, state) {
const { kind } = node;
if (kind === "lookbehind") {
state.passedLookbehind = true;
}
},
NamedCallout({ node, parent, replaceWith }) {
const { kind } = node;
if (kind === "fail") {
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
} else {
throw new Error(`Unsupported named callout "(*${kind.toUpperCase()}"`);
}
},
Quantifier({ node }) {
if (node.body.type === "Quantifier") {
const group = createGroup();
group.body[0].body.push(node.body);
node.body = setParentDeep(group, node);
}
},
Regex: {
enter({ node }, { supportedGNodes }) {
const leadingGs = [];
let hasAltWithLeadG = false;
let hasAltWithoutLeadG = false;
for (const alt of node.body) {
if (alt.body.length === 1 && alt.body[0].kind === "search_start") {
alt.body.pop();
} else {
const leadingG = getLeadingG(alt.body);
if (leadingG) {
hasAltWithLeadG = true;
Array.isArray(leadingG) ? leadingGs.push(...leadingG) : leadingGs.push(leadingG);
} else {
hasAltWithoutLeadG = true;
}
}
}
if (hasAltWithLeadG && !hasAltWithoutLeadG) {
leadingGs.forEach((g) => supportedGNodes.add(g));
}
},
exit(_, { accuracy, passedLookbehind, strategy }) {
if (accuracy === "strict" && passedLookbehind && strategy) {
throw new Error(r`Uses "\G" in a way that requires non-strict accuracy`);
}
}
},
Subroutine({ node }, { jsGroupNameMap }) {
let { ref } = node;
if (typeof ref === "string" && !isValidJsGroupName(ref)) {
ref = getAndStoreJsGroupName(ref, jsGroupNameMap);
node.ref = ref;
}
}
};
var SecondPassVisitor = {
Backreference({ node }, { multiplexCapturesToLeftByRef, reffedNodesByReferencer }) {
const { orphan, ref } = node;
if (!orphan) {
reffedNodesByReferencer.set(node, [...multiplexCapturesToLeftByRef.get(ref).map(({ node: node2 }) => node2)]);
}
},
CapturingGroup: {
enter({
node,
parent,
replaceWith,
skip
}, {
groupOriginByCopy,
groupsByName,
multiplexCapturesToLeftByRef,
openRefs,
reffedNodesByReferencer
}) {
const origin = groupOriginByCopy.get(node);
if (origin && openRefs.has(node.number)) {
const recursion2 = setParent(createRecursion(node.number), parent);
reffedNodesByReferencer.set(recursion2, openRefs.get(node.number));
replaceWith(recursion2);
return;
}
openRefs.set(node.number, node);
multiplexCapturesToLeftByRef.set(node.number, []);
if (node.name) {
getOrInsert(multiplexCapturesToLeftByRef, node.name, []);
}
const multiplexNodes = multiplexCapturesToLeftByRef.get(node.name ?? node.number);
for (let i = 0; i < multiplexNodes.length; i++) {
const multiplex = multiplexNodes[i];
if (
// This group is from subroutine expansion, and there's a multiplex value from either the
// origin node or a prior subroutine expansion group with the same origin
origin === multiplex.node || origin && origin === multiplex.origin || // This group is not from subroutine expansion, and it comes after a subroutine expansion
// group that refers to this group
node === multiplex.origin
) {
multiplexNodes.splice(i, 1);
break;
}
}
multiplexCapturesToLeftByRef.get(node.number).push({ node, origin });
if (node.name) {
multiplexCapturesToLeftByRef.get(node.name).push({ node, origin });
}
if (node.name) {
const groupsWithSameName = getOrInsert(groupsByName, node.name, /* @__PURE__ */ new Map());
let hasDuplicateNameToRemove = false;
if (origin) {
hasDuplicateNameToRemove = true;
} else {
for (const groupInfo of groupsWithSameName.values()) {
if (!groupInfo.hasDuplicateNameToRemove) {
hasDuplicateNameToRemove = true;
break;
}
}
}
groupsByName.get(node.name).set(node, { node, hasDuplicateNameToRemove });
}
},
exit({ node }, { openRefs }) {
openRefs.delete(node.number);
}
},
Group: {
enter({ node }, state) {
state.prevFlags = state.currentFlags;
if (node.flags) {
state.currentFlags = getNewCurrentFlags(state.currentFlags, node.flags);
}
},
exit(_, state) {
state.currentFlags = state.prevFlags;
}
},
Subroutine({ node, parent, replaceWith }, state) {
const { isRecursive, ref } = node;
if (isRecursive) {
let reffed = parent;
while (reffed = reffed.parent) {
if (reffed.type === "CapturingGroup" && (reffed.name === ref || reffed.number === ref)) {
break;
}
}
state.reffedNodesByReferencer.set(node, reffed);
return;
}
const reffedGroupNode = state.subroutineRefMap.get(ref);
const isGlobalRecursion = ref === 0;
const expandedSubroutine = isGlobalRecursion ? createRecursion(0) : (
// The reffed group might itself contain subroutines, which are expanded during sub-traversal
cloneCapturingGroup(reffedGroupNode, state.groupOriginByCopy, null)
);
let replacement = expandedSubroutine;
if (!isGlobalRecursion) {
const reffedGroupFlagMods = getCombinedFlagModsFromFlagNodes(getAllParents(
reffedGroupNode,
(p) => p.type === "Group" && !!p.flags
));
const reffedGroupFlags = reffedGroupFlagMods ? getNewCurrentFlags(state.globalFlags, reffedGroupFlagMods) : state.globalFlags;
if (!areFlagsEqual(reffedGroupFlags, state.currentFlags)) {
replacement = createGroup({
flags: getFlagModsFromFlags(reffedGroupFlags)
});
replacement.body[0].body.push(expandedSubroutine);
}
}
replaceWith(setParentDeep(replacement, parent), { traverse: !isGlobalRecursion });
}
};
var ThirdPassVisitor = {
Backreference({ node, parent, replaceWith }, state) {
if (node.orphan) {
state.highestOrphanBackref = Math.max(state.highestOrphanBackref, node.ref);
return;
}
const reffedNodes = state.reffedNodesByReferencer.get(node);
const participants = reffedNodes.filter((reffed) => canParticipateWithNode(reffed, node));
if (!participants.length) {
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
} else if (participants.length > 1) {
const group = createGroup({
atomic: true,
body: participants.reverse().map((reffed) => createAlternative({
body: [createBackreference(reffed.number)]
}))
});
replaceWith(setParentDeep(group, parent));
} else {
node.ref = participants[0].number;
}
},
CapturingGroup({ node }, state) {
node.number = ++state.numCapturesToLeft;
if (node.name) {
if (state.groupsByName.get(node.name).get(node).hasDuplicateNameToRemove) {
delete node.name;
}
}
},
Regex: {
exit({ node }, state) {
const numCapsNeeded = Math.max(state.highestOrphanBackref - state.numCapturesToLeft, 0);
for (let i = 0; i < numCapsNeeded; i++) {
const emptyCapture = createCapturingGroup();
node.body.at(-1).body.push(emptyCapture);
}
}
},
Subroutine({ node }, state) {
if (!node.isRecursive || node.ref === 0) {
return;
}
node.ref = state.reffedNodesByReferencer.get(node).number;
}
};
function addParentProperties(root) {
traverse(root, {
"*"({ node, parent }) {
node.parent = parent;
}
});
}
function areFlagsEqual(a, b) {
return a.dotAll === b.dotAll && a.ignoreCase === b.ignoreCase;
}
function canParticipateWithNode(capture, node) {
let rightmostPoint = node;
do {
if (rightmostPoint.type === "Regex") {
return false;
}
if (rightmostPoint.type === "Alternative") {
continue;
}
if (rightmostPoint === capture) {
return false;
}
const kidsOfParent = getKids(rightmostPoint.parent);
for (const kid of kidsOfParent) {
if (kid === rightmostPoint) {
break;
}
if (kid === capture || isAncestorOf(kid, capture)) {
return true;
}
}
} while (rightmostPoint = rightmostPoint.parent);
throw new Error("Unexpected path");
}
function cloneCapturingGroup(obj, originMap, up, up2) {
const store = Array.isArray(obj) ? [] : {};
for (const [key, value] of Object.entries(obj)) {
if (key === "parent") {
store.parent = Array.isArray(up) ? up2 : up;
} else if (value && typeof value === "object") {
store[key] = cloneCapturingGroup(value, originMap, store, up);
} else {
if (key === "type" && value === "CapturingGroup") {
originMap.set(store, originMap.get(obj) ?? obj);
}
store[key] = value;
}
}
return store;
}
function createRecursion(ref) {
const node = createSubroutine(ref);
node.isRecursive = true;
return node;
}
function getAllParents(node, filterFn) {
const results = [];
while (node = node.parent) {
if (!filterFn || filterFn(node)) {
results.push(node);
}
}
return results;
}
function getAndStoreJsGroupName(name, map) {
if (map.has(name)) {
return map.get(name);
}
const jsName = `$${map.size}_${name.replace(/^[^$_\p{IDS}]|[^$\u200C\u200D\p{IDC}]/ug, "_")}`;
map.set(name, jsName);
return jsName;
}
function getCombinedFlagModsFromFlagNodes(flagNodes) {
const flagProps = ["dotAll", "ignoreCase"];
const combinedFlags = { enable: {}, disable: {} };
flagNodes.forEach(({ flags }) => {
flagProps.forEach((prop) => {
if (flags.enable?.[prop]) {
delete combinedFlags.disable[prop];
combinedFlags.enable[prop] = true;
}
if (flags.disable?.[prop]) {
combinedFlags.disable[prop] = true;
}
});
});
if (!Object.keys(combinedFlags.enable).length) {
delete combinedFlags.enable;
}
if (!Object.keys(combinedFlags.disable).length) {
delete combinedFlags.disable;
}
if (combinedFlags.enable || combinedFlags.disable) {
return combinedFlags;
}
return null;
}
function getFlagModsFromFlags({ dotAll, ignoreCase }) {
const mods = {};
if (dotAll || ignoreCase) {
mods.enable = {};
dotAll && (mods.enable.dotAll = true);
ignoreCase && (mods.enable.ignoreCase = true);
}
if (!dotAll || !ignoreCase) {
mods.disable = {};
!dotAll && (mods.disable.dotAll = true);
!ignoreCase && (mods.disable.ignoreCase = true);
}
return mods;
}
function getKids(node) {
if (!node) {
throw new Error("Node expected");
}
const { body } = node;
return Array.isArray(body) ? body : body ? [body] : null;
}
function getLeadingG(els) {
const firstToConsider = els.find((el) => el.kind === "search_start" || isLoneGLookaround(el, { negate: false }) || !isAlwaysZeroLength(el));
if (!firstToConsider) {
return null;
}
if (firstToConsider.kind === "search_start") {
return firstToConsider;
}
if (firstToConsider.type === "LookaroundAssertion") {
return firstToConsider.body[0].body[0];
}
if (firstToConsider.type === "CapturingGroup" || firstToConsider.type === "Group") {
const gNodesForGroup = [];
for (const alt of firstToConsider.body) {
const leadingG = getLeadingG(alt.body);
if (!leadingG) {
return null;
}
Array.isArray(leadingG) ? gNodesForGroup.push(...leadingG) : gNodesForGroup.push(leadingG);
}
return gNodesForGroup;
}
return null;
}
function isAncestorOf(node, descendant) {
const kids = getKids(node) ?? [];
for (const kid of kids) {
if (kid === descendant || isAncestorOf(kid, descendant)) {
return true;
}
}
return false;
}
function isAlwaysZeroLength({ type }) {
return type === "Assertion" || type === "Directive" || type === "LookaroundAssertion";
}
function isAlwaysNonZeroLength(node) {
const types = [
"Character",
"CharacterClass",
"CharacterSet"
];
return types.includes(node.type) || node.type === "Quantifier" && node.min && types.includes(node.body.type);
}
function isLoneGLookaround(node, options) {
const opts = {
negate: null,
...options
};
return node.type === "LookaroundAssertion" && (opts.negate === null || node.negate === opts.negate) && node.body.length === 1 && hasOnlyChild(node.body[0], {
type: "Assertion",
kind: "search_start"
});
}
function isValidJsGroupName(name) {
return /^[$_\p{IDS}][$\u200C\u200D\p{IDC}]*$/u.test(name);
}
function parseFragment(pattern, options) {
const ast = parse(pattern, {
...options,
// Providing a custom set of Unicode property names avoids converting some JS Unicode
// properties (ex: `\p{Alpha}`) to Onig POSIX classes
unicodePropertyMap: JsUnicodePropertyMap
});
const alts = ast.body;
if (alts.length > 1 || alts[0].body.length > 1) {
return createGroup({ body: alts });
}
return alts[0].body[0];
}
function setNegate(node, negate) {
node.negate = negate;
return node;
}
function setParent(node, parent) {
node.parent = parent;
return node;
}
function setParentDeep(node, parent) {
addParentProperties(node);
node.parent = parent;
return node;
}
// src/generate.js
import { createAlternative as createAlternative2, createCharacter as createCharacter2, createGroup as createGroup2 } from "oniguruma-parser/parser";
import { traverse as traverse2 } from "oniguruma-parser/traverser";
function generate(ast, options) {
const opts = getOptions(options);
const minTargetEs2024 = isMinTarget(opts.target, "ES2024");
const minTargetEs2025 = isMinTarget(opts.target, "ES2025");
const recursionLimit = opts.rules.recursionLimit;
if (!Number.isInteger(recursionLimit) || recursionLimit < 2 || recursionLimit > 20) {
throw new Error("Invalid recursionLimit; use 2-20");
}
let hasCaseInsensitiveNode = null;
let hasCaseSensitiveNode = null;
if (!minTargetEs2025) {
const iStack = [ast.flags.ignoreCase];
traverse2(ast, FlagModifierVisitor, {
getCurrentModI: () => iStack.at(-1),
popModI() {
iStack.pop();
},
pushModI(isIOn) {
iStack.push(isIOn);
},
setHasCasedChar() {
if (iStack.at(-1)) {
hasCaseInsensitiveNode = true;
} else {
hasCaseSensitiveNode = true;
}
}
});
}
const appliedGlobalFlags = {
dotAll: ast.flags.dotAll,
// - Turn global flag i on if a case insensitive node was used and no case sensitive nodes were
// used (to avoid unnecessary node expansion).
// - Turn global flag i off if a case sensitive node was used (since case sensitivity can't be
// forced without the use of ES2025 flag groups)
ignoreCase: !!((ast.flags.ignoreCase || hasCaseInsensitiveNode) && !hasCaseSensitiveNode)
};
let lastNode = ast;
const state = {
accuracy: opts.accuracy,
appliedGlobalFlags,
captureMap: /* @__PURE__ */ new Map(),
currentFlags: {
dotAll: ast.flags.dotAll,
ignoreCase: ast.flags.ignoreCase
},
inCharClass: false,
lastNode,
originMap: ast._originMap,
recursionLimit,
useAppliedIgnoreCase: !!(!minTargetEs2025 && hasCaseInsensitiveNode && hasCaseSensitiveNode),
useFlagMods: minTargetEs2025,
useFlagV: minTargetEs2024,
verbose: opts.verbose
};
function gen(node) {
state.lastNode = lastNode;
lastNode = node;
const fn = throwIfNullish(generator[node.type], `Unexpected node type "${node.type}"`);
return fn(node, state, gen);
}
const result = {
pattern: ast.body.map(gen).join("|"),
// Could reset `lastNode` at this point via `lastNode = ast`, but it isn't needed by flags
flags: gen(ast.flags),
options: { ...ast.options }
};
if (!minTargetEs2024) {
delete result.options.force.v;
result.options.disable.v = true;
result.options.unicodeSetsPlugin = null;
}
result._captureTransfers = /* @__PURE__ */ new Map();
result._hiddenCaptures = [];
state.captureMap.forEach((value, key) => {
if (value.hidden) {
result._hiddenCaptures.push(key);
}
if (value.transferTo) {
getOrInsert(result._captureTransfers, value.transferTo, []).push(key);
}
});
return result;
}
var FlagModifierVisitor = {
"*": {
enter({ node }, state) {
if (isAnyGroup(node)) {
const currentModI = state.getCurrentModI();
state.pushModI(
node.flags ? getNewCurrentFlags({ ignoreCase: currentModI }, node.flags).ignoreCase : currentModI
);
}
},
exit({ node }, state) {
if (isAnyGroup(node)) {
state.popModI();
}
}
},
Backreference(_, state) {
state.setHasCasedChar();
},
Character({ node }, state) {
if (charHasCase(cp(node.value))) {
state.setHasCasedChar();
}
},
CharacterClassRange({ node, skip }, state) {
skip();
if (getCasesOutsideCharClassRange(node, { firstOnly: true }).length) {
state.setHasCasedChar();
}
},
CharacterSet({ node }, state) {
if (node.kind === "property" && UnicodePropertiesWithSpecificCase.has(node.value)) {
state.setHasCasedChar();
}
}
};
var generator = {
/**
@param {AlternativeNode} node
*/
Alternative({ body }, _, gen) {
return body.map(gen).join("");
},
/**
@param {AssertionNode} node
*/
Assertion({ kind, negate }) {
if (kind === "string_end") {
return "$";
}
if (kind === "string_start") {
return "^";
}
if (kind === "word_boundary") {
return negate ? r`\B` : r`\b`;
}
throw new Error(`Unexpected assertion kind "${kind}"`);
},
/**
@param {BackreferenceNode} node
*/
Backreference({ ref }, state) {
if (typeof ref !== "number") {
throw new Error("Unexpected named backref in transformed AST");
}
if (!state.useFlagMods && state.accuracy === "strict" && state.currentFlags.ignoreCase && !state.captureMap.get(ref).ignoreCase) {
throw new Error("Use of case-insensitive backref to case-sensitive group requires target ES2025 or non-strict accuracy");
}
return "\\" + ref;
},
/**
@param {CapturingGroupNode} node
*/
CapturingGroup(node, state, gen) {
const { body, name, number } = node;
const data = { ignoreCase: state.currentFlags.ignoreCase };
const origin = state.originMap.get(node);
if (origin) {
data.hidden = true;
if (number > origin.number) {
data.transferTo = origin.number;
}
}
state.captureMap.set(number, data);
return `(${name ? `?<${name}>` : ""}${body.map(gen).join("|")})`;
},
/**
@param {CharacterNode} node
*/
Character({ value }, state) {
const char = cp(value);
const escaped = getCharEscape(value, {
escDigit: state.lastNode.type === "Backreference",
inCharClass: state.inCharClass,
useFlagV: state.useFlagV
});
if (escaped !== char) {
return escaped;
}
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase && charHasCase(char)) {
const cases = getIgnoreCaseMatchChars(char);
return state.inCharClass ? cases.join("") : cases.length > 1 ? `[${cases.join("")}]` : cases[0];
}
return char;
},
/**
@param {CharacterClassNode} node
*/
CharacterClass(node, state, gen) {
const { kind, negate, parent } = node;
let { body } = node;
if (kind === "intersection" && !state.useFlagV) {
throw new Error("Use of class intersection requires min target ES2024");
}
if (envFlags.bugFlagVLiteralHyphenIsRange && state.useFlagV && body.some(isLiteralHyphen)) {
body = [createCharacter2(45), ...body.filter((kid) => !isLiteralHyphen(kid))];
}
const genClass = () => `[${negate ? "^" : ""}${body.map(gen).join(kind === "intersection" ? "&&" : "")}]`;
if (!state.inCharClass) {
if (
// Already established `kind !== 'intersection'` if `!state.useFlagV`; don't check again
(!state.useFlagV || envFlags.bugNestedClassIgnoresNegation) && !negate
) {
const negatedChildClasses = body.filter(
(kid) => kid.type === "CharacterClass" && kid.kind === "union" && kid.negate
);
if (negatedChildClasses.length) {
const group = createGroup2();
const groupFirstAlt = group.body[0];
group.parent = parent;
groupFirstAlt.parent = group;
body = body.filter((kid) => !negatedChildClasses.includes(kid));
node.body = body;
if (body.length) {
node.parent = groupFirstAlt;
groupFirstAlt.body.push(node);
} else {
group.body.pop();
}
negatedChildClasses.forEach((cc) => {
const newAlt = createAlternative2({ body: [cc] });
cc.parent = newAlt;
newAlt.parent = group;
group.body.push(newAlt);
});
return gen(group);
}
}
state.inCharClass = true;
const result = genClass();
state.inCharClass = false;
return result;
}
const firstEl = body[0];
if (
// Already established that the parent is a char class via `inCharClass`; don't check again
kind === "union" && !negate && firstEl && // Allows many nested classes to work with `target` ES2018 which doesn't support nesting
((!state.useFlagV || !state.verbose) && parent.kind === "union" && !(envFlags.bugFlagVLiteralHyphenIsRange && state.useFlagV) || !state.verbose && parent.kind === "intersection" && // JS doesn't allow intersection with union or ranges
body.length === 1 && firstEl.type !== "CharacterClassRange")
) {
return body.map(gen).join("");
}
if (!state.useFlagV && parent.type === "CharacterClass") {
throw new Error("Use of nested character class requires min target ES2024");
}
return genClass();
},
/**
@param {CharacterClassRangeNode} node
*/
CharacterClassRange(node, state) {
const min = node.min.value;
const max = node.max.value;
const escOpts = {
escDigit: false,
inCharClass: true,
useFlagV: state.useFlagV
};
const minStr = getCharEscape(min, escOpts);
const maxStr = getCharEscape(max, escOpts);
const extraChars = /* @__PURE__ */ new Set();
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase) {
const charsOutsideRange = getCasesOutsideCharClassRange(node);
const ranges = getCodePointRangesFromChars(charsOutsideRange);
ranges.forEach((value) => {
extraChars.add(
Array.isArray(value) ? `${getCharEscape(value[0], escOpts)}-${getCharEscape(value[1], escOpts)}` : getCharEscape(value, escOpts)
);
});
}
return `${minStr}-${maxStr}${[...extraChars].join("")}`;
},
/**
@param {CharacterSetNode} node
*/
CharacterSet({ kind, negate, value, key }, state) {
if (kind === "dot") {
return state.currentFlags.dotAll ? state.appliedGlobalFlags.dotAll || state.useFlagMods ? "." : "[^]" : (
// Onig's only line break char is line feed, unlike JS
r`[^\n]`
);
}
if (kind === "digit") {
return negate ? r`\D` : r`\d`;
}
if (kind === "property") {
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase && UnicodePropertiesWithSpecificCase.has(value)) {
throw new Error(`Unicode property "${value}" can't be case-insensitive when other chars have specific case`);
}
return `${negate ? r`\P` : r`\p`}{${key ? `${key}=` : ""}${value}}`;
}
if (kind === "word") {
return negate ? r`\W` : r`\w`;
}
throw new Error(`Unexpected character set kind "${kind}"`);
},
/**
@param {FlagsNode} node
*/
Flags(node, state) {
return (
// The transformer should never turn on the properties for flags d, g, m since Onig doesn't
// have equivs. Flag m is never used since Onig uses different line break chars than JS
// (node.hasIndices ? 'd' : '') +
// (node.global ? 'g' : '') +
// (node.multiline ? 'm' : '') +
(state.appliedGlobalFlags.ignoreCase ? "i" : "") + (node.dotAll ? "s" : "") + (node.sticky ? "y" : "")
);
},
/**
@param {GroupNode} node
*/
Group({ atomic: atomic2, body, flags, parent }, state, gen) {
const currentFlags = state.currentFlags;
if (flags) {
state.currentFlags = getNewCurrentFlags(currentFlags, flags);
}
const contents = body.map(gen).join("|");
const result = !state.verbose && body.length === 1 && // Single alt
parent.type !== "Quantifier" && !atomic2 && (!state.useFlagMods || !flags) ? contents : `(?${getGroupPrefix(atomic2, flags, state.useFlagMods)}${contents})`;
state.currentFlags = currentFlags;
return result;
},
/**
@param {LookaroundAssertionNode} node
*/
LookaroundAssertion({ body, kind, negate }, _, gen) {
const prefix = `${kind === "lookahead" ? "" : "<"}${negate ? "!" : "="}`;
return `(?${prefix}${body.map(gen).join("|")})`;
},
/**
@param {QuantifierNode} node
*/
Quantifier(node, _, gen) {
return gen(node.body) + getQuantifierStr(node);
},
/**
@param {SubroutineNode & {isRecursive: true}} node
*/
Subroutine({ isRecursive, ref }, state) {
if (!isRecursive) {
throw new Error("Unexpected non-recursive subroutine in transformed AST");
}
const limit = state.recursionLimit;
return ref === 0 ? `(?R=${limit})` : r`\g<${ref}&R=${limit}>`;
}
};
var BaseEscapeChars = /* @__PURE__ */ new Set([
"$",
"(",
")",
"*",
"+",
".",
"?",
"[",
"\\",
"]",
"^",
"{",
"|",
"}"
]);
var CharClassEscapeChars = /* @__PURE__ */ new Set([
"-",
"\\",
"]",
"^",
// Literal `[` doesn't require escaping with flag u, but this can help work around regex source
// linters and regex syntax processors that expect unescaped `[` to create a nested class
"["
]);
var CharClassEscapeCharsFlagV = /* @__PURE__ */ new Set([
"(",
")",
"-",
"/",
"[",
"\\",
"]",
"^",
"{",
"|",
"}",
// Double punctuators; also includes already-listed `-` and `^`
"!",
"#",
"$",
"%",
"&",
"*",
"+",
",",
".",
":",
";",
"<",
"=",
">",
"?",
"@",
"`",
"~"
]);
var CharCodeEscapeMap = /* @__PURE__ */ new Map([
[9, r`\t`],
// horizontal tab
[10, r`\n`],
// line feed
[11, r`\v`],
// vertical tab
[12, r`\f`],
// form feed
[13, r`\r`],
// carriage return
[8232, r`\u2028`],
// line separator
[8233, r`\u2029`],
// paragraph separator
[65279, r`\uFEFF`]
// ZWNBSP/BOM
]);
var casedRe = /^\p{Cased}$/u;
function charHasCase(char) {
return casedRe.test(char);
}
function getCasesOutsideCharClassRange(node, options) {
const firstOnly = !!options?.firstOnly;
const min = node.min.value;
const max = node.max.value;
const found = [];
if (min < 65 && (max === 65535 || max >= 131071) || min === 65536 && max >= 131071) {
return found;
}
for (let i = min; i <= max; i++) {
const char = cp(i);
if (!charHasCase(char)) {
continue;
}
const charsOutsideRange = getIgnoreCaseMatchChars(char).filter((caseOfChar) => {
const num = caseOfChar.codePointAt(0);
return num < min || num > max;
});
if (charsOutsideRange.length) {
found.push(...charsOutsideRange);
if (firstOnly) {
break;
}
}
}
return found;
}
function getCharEscape(codePoint, { escDigit, inCharClass, useFlagV }) {
if (CharCodeEscapeMap.has(codePoint)) {
return CharCodeEscapeMap.get(codePoint);
}
if (
// Control chars, etc.; condition modeled on the Chrome developer console's display for strings
codePoint < 32 || codePoint > 126 && codePoint < 160 || // Unicode planes 4-16; unassigned, special purpose, and private use area
codePoint > 262143 || // Avoid corrupting a preceding backref by immediately following it with a literal digit
escDigit && isDigitCharCode(codePoint)
) {
return codePoint > 255 ? `\\u{${codePoint.toString(16).toUpperCase()}}` : `\\x${codePoint.toString(16).toUpperCase().padStart(2, "0")}`;
}
const escapeChars = inCharClass ? useFlagV ? CharClassEscapeCharsFlagV : CharClassEscapeChars : BaseEscapeChars;
const char = cp(codePoint);
return (escapeChars.has(char) ? "\\" : "") + char;
}
function getCodePointRangesFromChars(chars) {
const codePoints = chars.map((char) => char.codePointAt(0)).sort((a, b) => a - b);
const values = [];
let start = null;
for (let i = 0; i < codePoints.length; i++) {
if (codePoints[i + 1] === codePoints[i] + 1) {
start ??= codePoints[i];
} else if (start === null) {
values.push(codePoints[i]);
} else {
values.push([start, codePoints[i]]);
start = null;
}
}
return values;
}
function getGroupPrefix(atomic2, flagMods, useFlagMods) {
if (atomic2) {
return ">";
}
let mods = "";
if (flagMods && useFlagMods) {
const { enable, disable } = flagMods;
mods = (enable?.ignoreCase ? "i" : "") + (enable?.dotAll ? "s" : "") + (disable ? "-" : "") + (disable?.ignoreCase ? "i" : "") + (disable?.dotAll ? "s" : "");
}
return `${mods}:`;
}
function getQuantifierStr({ kind, max, min }) {
let base;
if (!min && max === 1) {
base = "?";
} else if (!min && max === Infinity) {
base = "*";
} else if (min === 1 && max === Infinity) {
base = "+";
} else if (min === max) {
base = `{${min}}`;
} else {
base = `{${min},${max === Infinity ? "" : max}}`;
}
return base + {
greedy: "",
lazy: "?",
possessive: "+"
}[kind];
}
function isAnyGroup({ type }) {
return type === "CapturingGroup" || type === "Group" || type === "LookaroundAssertion";
}
function isDigitCharCode(value) {
return value > 47 && value < 58;
}
function isLiteralHyphen({ type, value }) {
return type === "Character" && value === 45;
}
// src/subclass.js
var EmulatedRegExp = class _EmulatedRegExp extends RegExp {
/**
@type {Map<number, {
hidden?: true;
transferTo?: number;
}>}
*/
#captureMap = /* @__PURE__ */ new Map();
/**
@type {RegExp | EmulatedRegExp | null}
*/
#compiled = null;
/**
@type {string}
*/
#pattern;
/**
@type {Map<number, string>?}
*/
#nameMap = null;
/**
@type {string?}
*/
#strategy = null;
/**
Can be used to serialize the instance.
@type {EmulatedRegExpOptions}
*/
rawOptions = {};
// Override the getter with one that works with lazy-compiled regexes
get source() {
return this.#pattern || "(?:)";
}
/**
@overload
@param {string} pattern
@param {string} [flags]
@param {EmulatedRegExpOptions} [options]
*/
/**
@overload
@param {EmulatedRegExp} pattern
@param {string} [flags]
*/
constructor(pattern, flags, options) {
const lazyCompile = !!options?.lazyCompile;
if (pattern instanceof RegExp) {
if (options) {
throw new Error("Cannot provide options when copying a regexp");
}
const re = pattern;
super(re, flags);
this.#pattern = re.source;
if (re instanceof _EmulatedRegExp) {
this.#captureMap = re.#captureMap;
this.#nameMap = re.#nameMap;
this.#strategy = re.#strategy;
this.rawOptions = re.rawOptions;
}
} else {
const opts = {
hiddenCaptures: [],
strategy: null,
transfers: [],
...options
};
super(lazyCompile ? "" : pattern, flags);
this.#pattern = pattern;
this.#captureMap = createCaptureMap(opts.hiddenCaptures, opts.transfers);
this.#strategy = opts.strategy;
this.rawOptions = options ?? {};
}
if (!lazyCompile) {
this.#compiled = this;
}
}
/**
Called internally by all String/RegExp methods that use regexes.
@override
@param {string} str
@returns {RegExpExecArray?}
*/
exec(str) {
if (!this.#compiled) {
const { lazyCompile, ...rest } = this.rawOptions;
this.#compiled = new _EmulatedRegExp(this.#pattern, this.flags, rest);
}
const useLastIndex = this.global || this.sticky;
const pos = this.lastIndex;
if (this.#strategy === "clip_search" && useLastIndex && pos) {
this.lastIndex = 0;
const match = this.#execCore(str.slice(pos));
if (match) {
adjustMatchDetailsForOffset(match, pos, str, this.hasIndices);
this.lastIndex += pos;
}
return match;
}
return this.#execCore(str);
}
/**
Adds support for hidden and transfer captures.
@param {string} str
@returns
*/
#execCore(str) {
this.#compiled.lastIndex = this.lastIndex;
const match = super.exec.call(this.#compiled, str);
this.lastIndex = this.#compiled.lastIndex;
if (!match || !this.#captureMap.size) {
return match;
}
const matchCopy = [...match];
match.length = 1;
let indicesCopy;
if (this.hasIndices) {
indicesCopy = [...match.indices];
match.indices.length = 1;
}
const mappedNums = [0];
for (let i = 1; i < matchCopy.length; i++) {
const { hidden, transferTo } = this.#captureMap.get(i) ?? {};
if (hidden) {
mappedNums.push(null);
} else {
mappedNums.push(match.length);
match.push(matchCopy[i]);
if (this.hasIndices) {
match.indices.push(indicesCopy[i]);
}
}
if (transferTo && matchCopy[i] !== void 0) {
const to = mappedNums[transferTo];
if (!to) {
throw new Error(`Invalid capture transfer to "${to}"`);
}
match[to] = matchCopy[i];
if (this.hasIndices) {
match.indices[to] = indicesCopy[i];
}
if (match.groups) {
if (!this.#nameMap) {
this.#nameMap = createNameMap(this.source);
}
const name = this.#nameMap.get(transferTo);
if (name) {
match.groups[name] = matchCopy[i];
if (this.hasIndices) {
match.indices.groups[name] = indicesCopy[i];
}
}
}
}
}
return match;
}
};
function adjustMatchDetailsForOffset(match, offset, input, hasIndices) {
match.index += offset;
match.input = input;
if (hasIndices) {
const indices = match.indices;
for (let i = 0; i < indices.length; i++) {
const arr = indices[i];
if (arr) {
indices[i] = [arr[0] + offset, arr[1] + offset];
}
}
const groupIndices = indices.groups;
if (groupIndices) {
Object.keys(groupIndices).forEach((key) => {
const arr = groupIndices[key];
if (arr) {
groupIndices[key] = [arr[0] + offset, arr[1] + offset];
}
});
}
}
}
function createCaptureMap(hiddenCaptures, transfers) {
const captureMap = /* @__PURE__ */ new Map();
for (const num of hiddenCaptures) {
captureMap.set(num, {
hidden: true
});
}
for (const [to, from] of transfers) {
for (const num of from) {
getOrInsert(captureMap, num, {}).transferTo = to;
}
}
return captureMap;
}
function createNameMap(pattern) {
const re = /(?<capture>\((?:\?<(?![=!])(?<name>[^>]+)>|(?!\?)))|\\?./gsu;
const map = /* @__PURE__ */ new Map();
let numCharClassesOpen = 0;
let numCaptures = 0;
let match;
while (match = re.exec(pattern)) {
const { 0: m, groups: { capture, name } } = match;
if (m === "[") {
numCharClassesOpen++;
} else if (!numCharClassesOpen) {
if (capture) {
numCaptures++;
if (name) {
map.set(numCaptures, name);
}
}
} else if (m === "]") {
numCharClassesOpen--;
}
}
return map;
}
// src/index.js
import { parse as parse2 } from "oniguruma-parser/parser";
import { atomic, possessive } from "regex/internals";
import { recursion } from "regex-recursion";
function toRegExp(pattern, options) {
const d = toRegExpDetails(pattern, options);
if (d.options) {
return new EmulatedRegExp(d.pattern, d.flags, d.options);
}
return new RegExp(d.pattern, d.flags);
}
function toRegExpDetails(pattern, options) {
const opts = getOptions(options);
const onigurumaAst = parse2(pattern, {
flags: opts.flags,
normalizeUnknownPropertyNames: true,
rules: {
captureGroup: opts.rules.captureGroup,
singleline: opts.rules.singleline
},
skipBackrefValidation: opts.rules.allowOrphanBackrefs,
unicodePropertyMap: JsUnicodePropertyMap
});
const regexPlusAst = transform(onigurumaAst, {
accuracy: opts.accuracy,
asciiWordBoundaries: opts.rules.asciiWordBoundaries,
avoidSubclass: opts.avoidSubclass,
bestEffortTarget: opts.target
});
const generated = generate(regexPlusAst, opts);
const recursionResult = recursion(generated.pattern, {
captureTransfers: generated._captureTransfers,
hiddenCaptures: generated._hiddenCaptures,
mode: "external"
});
const possessiveResult = possessive(recursionResult.pattern);
const atomicResult = atomic(possessiveResult.pattern, {
captureTransfers: recursionResult.captureTransfers,
hiddenCaptures: recursionResult.hiddenCaptures
});
const details = {
pattern: atomicResult.pattern,
flags: `${opts.hasIndices ? "d" : ""}${opts.global ? "g" : ""}${generated.flags}${generated.options.disable.v ? "u" : "v"}`
};
if (opts.avoidSubclass) {
if (opts.lazyCompileLength !== Infinity) {
throw new Error("Lazy compilation requires subclass");
}
} else {
const hiddenCaptures = atomicResult.hiddenCaptures.sort((a, b) => a - b);
const transfers = Array.from(atomicResult.captureTransfers);
const strategy = regexPlusAst._strategy;
const lazyCompile = details.pattern.length >= opts.lazyCompileLength;
if (hiddenCaptures.length || transfers.length || strategy || lazyCompile) {
details.options = {
...hiddenCaptures.length && { hiddenCaptures },
...transfers.length && { transfers },
...strategy && { strategy },
...lazyCompile && { lazyCompile }
};
}
}
return details;
}
export {
EmulatedRegExp,
toRegExp,
toRegExpDetails
};
//# sourceMappingURL=index.js.map