1877 lines
59 KiB
JavaScript
1877 lines
59 KiB
JavaScript
// src/utils.js
|
||
var cp = String.fromCodePoint;
|
||
var r = String.raw;
|
||
var envFlags = {
|
||
flagGroups: (() => {
|
||
try {
|
||
new RegExp("(?i:)");
|
||
} catch {
|
||
return false;
|
||
}
|
||
return true;
|
||
})(),
|
||
unicodeSets: (() => {
|
||
try {
|
||
new RegExp("", "v");
|
||
} catch {
|
||
return false;
|
||
}
|
||
return true;
|
||
})()
|
||
};
|
||
envFlags.bugFlagVLiteralHyphenIsRange = envFlags.unicodeSets ? (() => {
|
||
try {
|
||
new RegExp(r`[\d\-a]`, "v");
|
||
} catch {
|
||
return true;
|
||
}
|
||
return false;
|
||
})() : false;
|
||
envFlags.bugNestedClassIgnoresNegation = envFlags.unicodeSets && new RegExp("[[^a]]", "v").test("a");
|
||
function getNewCurrentFlags(current, { enable, disable }) {
|
||
return {
|
||
dotAll: !disable?.dotAll && !!(enable?.dotAll || current.dotAll),
|
||
ignoreCase: !disable?.ignoreCase && !!(enable?.ignoreCase || current.ignoreCase)
|
||
};
|
||
}
|
||
function getOrInsert(map, key, defaultValue) {
|
||
if (!map.has(key)) {
|
||
map.set(key, defaultValue);
|
||
}
|
||
return map.get(key);
|
||
}
|
||
function isMinTarget(target, min) {
|
||
return EsVersion[target] >= EsVersion[min];
|
||
}
|
||
function throwIfNullish(value, msg) {
|
||
if (value == null) {
|
||
throw new Error(msg ?? "Value expected");
|
||
}
|
||
return value;
|
||
}
|
||
|
||
// src/options.js
|
||
var EsVersion = {
|
||
ES2025: 2025,
|
||
ES2024: 2024,
|
||
ES2018: 2018
|
||
};
|
||
var Target = (
|
||
/** @type {const} */
|
||
{
|
||
auto: "auto",
|
||
ES2025: "ES2025",
|
||
ES2024: "ES2024",
|
||
ES2018: "ES2018"
|
||
}
|
||
);
|
||
function getOptions(options = {}) {
|
||
if ({}.toString.call(options) !== "[object Object]") {
|
||
throw new Error("Unexpected options");
|
||
}
|
||
if (options.target !== void 0 && !Target[options.target]) {
|
||
throw new Error(`Unexpected target "${options.target}"`);
|
||
}
|
||
const opts = {
|
||
// Sets the level of emulation rigor/strictness.
|
||
accuracy: "default",
|
||
// Disables advanced emulation that relies on returning a `RegExp` subclass, resulting in
|
||
// certain patterns not being emulatable.
|
||
avoidSubclass: false,
|
||
// Oniguruma flags; a string with `i`, `m`, `x`, `D`, `S`, `W`, `y{g}` in any order (all
|
||
// optional). Oniguruma's `m` is equivalent to JavaScript's `s` (`dotAll`).
|
||
flags: "",
|
||
// Include JavaScript flag `g` (`global`) in the result.
|
||
global: false,
|
||
// Include JavaScript flag `d` (`hasIndices`) in the result.
|
||
hasIndices: false,
|
||
// Delay regex construction until first use if the transpiled pattern is at least this length.
|
||
lazyCompileLength: Infinity,
|
||
// JavaScript version used for generated regexes. Using `auto` detects the best value based on
|
||
// your environment. Later targets allow faster processing, simpler generated source, and
|
||
// support for additional features.
|
||
target: "auto",
|
||
// Disables minifications that simplify the pattern without changing the meaning.
|
||
verbose: false,
|
||
...options,
|
||
// Advanced options that override standard behavior, error checking, and flags when enabled.
|
||
rules: {
|
||
// Useful with TextMate grammars that merge backreferences across patterns.
|
||
allowOrphanBackrefs: false,
|
||
// Use ASCII `\b` and `\B`, which increases search performance of generated regexes.
|
||
asciiWordBoundaries: false,
|
||
// Allow unnamed captures and numbered calls (backreferences and subroutines) when using
|
||
// named capture. This is Oniguruma option `ONIG_OPTION_CAPTURE_GROUP`; on by default in
|
||
// `vscode-oniguruma`.
|
||
captureGroup: false,
|
||
// Change the recursion depth limit from Oniguruma's `20` to an integer `2`–`20`.
|
||
recursionLimit: 20,
|
||
// `^` as `\A`; `$` as`\Z`. Improves search performance of generated regexes without changing
|
||
// the meaning if searching line by line. This is Oniguruma option `ONIG_OPTION_SINGLELINE`.
|
||
singleline: false,
|
||
...options.rules
|
||
}
|
||
};
|
||
if (opts.target === "auto") {
|
||
opts.target = envFlags.flagGroups ? "ES2025" : envFlags.unicodeSets ? "ES2024" : "ES2018";
|
||
}
|
||
return opts;
|
||
}
|
||
|
||
// src/unicode.js
|
||
import { slug } from "oniguruma-parser/parser";
|
||
var asciiSpaceChar = "[ -\r ]";
|
||
var CharsWithoutIgnoreCaseExpansion = /* @__PURE__ */ new Set([
|
||
cp(304),
|
||
// İ
|
||
cp(305)
|
||
// ı
|
||
]);
|
||
var defaultWordChar = r`[\p{L}\p{M}\p{N}\p{Pc}]`;
|
||
function getIgnoreCaseMatchChars(char) {
|
||
if (CharsWithoutIgnoreCaseExpansion.has(char)) {
|
||
return [char];
|
||
}
|
||
const set = /* @__PURE__ */ new Set();
|
||
const lower = char.toLowerCase();
|
||
const upper = lower.toUpperCase();
|
||
const title = LowerToTitleCaseMap.get(lower);
|
||
const altLower = LowerToAlternativeLowerCaseMap.get(lower);
|
||
const altUpper = LowerToAlternativeUpperCaseMap.get(lower);
|
||
if ([...upper].length === 1) {
|
||
set.add(upper);
|
||
}
|
||
altUpper && set.add(altUpper);
|
||
title && set.add(title);
|
||
set.add(lower);
|
||
altLower && set.add(altLower);
|
||
return [...set];
|
||
}
|
||
var JsUnicodePropertyMap = /* @__PURE__ */ new Map(
|
||
`C Other
|
||
Cc Control cntrl
|
||
Cf Format
|
||
Cn Unassigned
|
||
Co Private_Use
|
||
Cs Surrogate
|
||
L Letter
|
||
LC Cased_Letter
|
||
Ll Lowercase_Letter
|
||
Lm Modifier_Letter
|
||
Lo Other_Letter
|
||
Lt Titlecase_Letter
|
||
Lu Uppercase_Letter
|
||
M Mark Combining_Mark
|
||
Mc Spacing_Mark
|
||
Me Enclosing_Mark
|
||
Mn Nonspacing_Mark
|
||
N Number
|
||
Nd Decimal_Number digit
|
||
Nl Letter_Number
|
||
No Other_Number
|
||
P Punctuation punct
|
||
Pc Connector_Punctuation
|
||
Pd Dash_Punctuation
|
||
Pe Close_Punctuation
|
||
Pf Final_Punctuation
|
||
Pi Initial_Punctuation
|
||
Po Other_Punctuation
|
||
Ps Open_Punctuation
|
||
S Symbol
|
||
Sc Currency_Symbol
|
||
Sk Modifier_Symbol
|
||
Sm Math_Symbol
|
||
So Other_Symbol
|
||
Z Separator
|
||
Zl Line_Separator
|
||
Zp Paragraph_Separator
|
||
Zs Space_Separator
|
||
ASCII
|
||
ASCII_Hex_Digit AHex
|
||
Alphabetic Alpha
|
||
Any
|
||
Assigned
|
||
Bidi_Control Bidi_C
|
||
Bidi_Mirrored Bidi_M
|
||
Case_Ignorable CI
|
||
Cased
|
||
Changes_When_Casefolded CWCF
|
||
Changes_When_Casemapped CWCM
|
||
Changes_When_Lowercased CWL
|
||
Changes_When_NFKC_Casefolded CWKCF
|
||
Changes_When_Titlecased CWT
|
||
Changes_When_Uppercased CWU
|
||
Dash
|
||
Default_Ignorable_Code_Point DI
|
||
Deprecated Dep
|
||
Diacritic Dia
|
||
Emoji
|
||
Emoji_Component EComp
|
||
Emoji_Modifier EMod
|
||
Emoji_Modifier_Base EBase
|
||
Emoji_Presentation EPres
|
||
Extended_Pictographic ExtPict
|
||
Extender Ext
|
||
Grapheme_Base Gr_Base
|
||
Grapheme_Extend Gr_Ext
|
||
Hex_Digit Hex
|
||
IDS_Binary_Operator IDSB
|
||
IDS_Trinary_Operator IDST
|
||
ID_Continue IDC
|
||
ID_Start IDS
|
||
Ideographic Ideo
|
||
Join_Control Join_C
|
||
Logical_Order_Exception LOE
|
||
Lowercase Lower
|
||
Math
|
||
Noncharacter_Code_Point NChar
|
||
Pattern_Syntax Pat_Syn
|
||
Pattern_White_Space Pat_WS
|
||
Quotation_Mark QMark
|
||
Radical
|
||
Regional_Indicator RI
|
||
Sentence_Terminal STerm
|
||
Soft_Dotted SD
|
||
Terminal_Punctuation Term
|
||
Unified_Ideograph UIdeo
|
||
Uppercase Upper
|
||
Variation_Selector VS
|
||
White_Space space
|
||
XID_Continue XIDC
|
||
XID_Start XIDS`.split(/\s/).map((p) => [slug(p), p])
|
||
);
|
||
var LowerToAlternativeLowerCaseMap = /* @__PURE__ */ new Map([
|
||
["s", cp(383)],
|
||
// s, ſ
|
||
[cp(383), "s"]
|
||
// ſ, s
|
||
]);
|
||
var LowerToAlternativeUpperCaseMap = /* @__PURE__ */ new Map([
|
||
[cp(223), cp(7838)],
|
||
// ß, ẞ
|
||
[cp(107), cp(8490)],
|
||
// k, K (Kelvin)
|
||
[cp(229), cp(8491)],
|
||
// å, Å (Angstrom)
|
||
[cp(969), cp(8486)]
|
||
// ω, Ω (Ohm)
|
||
]);
|
||
var LowerToTitleCaseMap = new Map([
|
||
titleEntry(453),
|
||
titleEntry(456),
|
||
titleEntry(459),
|
||
titleEntry(498),
|
||
...titleRange(8072, 8079),
|
||
...titleRange(8088, 8095),
|
||
...titleRange(8104, 8111),
|
||
titleEntry(8124),
|
||
titleEntry(8140),
|
||
titleEntry(8188)
|
||
]);
|
||
var PosixClassMap = /* @__PURE__ */ new Map([
|
||
["alnum", r`[\p{Alpha}\p{Nd}]`],
|
||
["alpha", r`\p{Alpha}`],
|
||
["ascii", r`\p{ASCII}`],
|
||
["blank", r`[\p{Zs}\t]`],
|
||
["cntrl", r`\p{Cc}`],
|
||
["digit", r`\p{Nd}`],
|
||
["graph", r`[\P{space}&&\P{Cc}&&\P{Cn}&&\P{Cs}]`],
|
||
["lower", r`\p{Lower}`],
|
||
["print", r`[[\P{space}&&\P{Cc}&&\P{Cn}&&\P{Cs}]\p{Zs}]`],
|
||
["punct", r`[\p{P}\p{S}]`],
|
||
// Updated value from Onig 6.9.9; changed from Unicode `\p{punct}`
|
||
["space", r`\p{space}`],
|
||
["upper", r`\p{Upper}`],
|
||
["word", r`[\p{Alpha}\p{M}\p{Nd}\p{Pc}]`],
|
||
["xdigit", r`\p{AHex}`]
|
||
]);
|
||
function range(start, end) {
|
||
const range2 = [];
|
||
for (let i = start; i <= end; i++) {
|
||
range2.push(i);
|
||
}
|
||
return range2;
|
||
}
|
||
function titleEntry(codePoint) {
|
||
const char = cp(codePoint);
|
||
return [char.toLowerCase(), char];
|
||
}
|
||
function titleRange(start, end) {
|
||
return range(start, end).map((codePoint) => titleEntry(codePoint));
|
||
}
|
||
var UnicodePropertiesWithSpecificCase = /* @__PURE__ */ new Set([
|
||
"Lower",
|
||
"Lowercase",
|
||
"Upper",
|
||
"Uppercase",
|
||
"Ll",
|
||
"Lowercase_Letter",
|
||
"Lt",
|
||
"Titlecase_Letter",
|
||
"Lu",
|
||
"Uppercase_Letter"
|
||
// The `Changes_When_*` properties (and their aliases) could be included, but they're very rare.
|
||
// Some other properties include a handful of chars with specific cases only, but these chars are
|
||
// generally extreme edge cases and using such properties case insensitively generally produces
|
||
// undesired behavior anyway
|
||
]);
|
||
|
||
// src/transform.js
|
||
import { createAlternative, createAssertion, createBackreference, createCapturingGroup, createCharacter, createCharacterClass, createCharacterSet, createGroup, createLookaroundAssertion, createQuantifier, createSubroutine, createUnicodeProperty, hasOnlyChild, parse, slug as slug2 } from "oniguruma-parser/parser";
|
||
import { traverse } from "oniguruma-parser/traverser";
|
||
function transform(ast, options) {
|
||
const opts = {
|
||
// A couple edge cases exist where options `accuracy` and `bestEffortTarget` are used:
|
||
// - `CharacterSet` kind `text_segment` (`\X`): An exact representation would require heavy
|
||
// Unicode data; a best-effort approximation requires knowing the target.
|
||
// - `CharacterSet` kind `posix` with values `graph` and `print`: Their complex Unicode
|
||
// representations would be hard to change to ASCII versions after the fact in the generator
|
||
// based on `target`/`accuracy`, so produce the appropriate structure here.
|
||
accuracy: "default",
|
||
asciiWordBoundaries: false,
|
||
avoidSubclass: false,
|
||
bestEffortTarget: "ES2025",
|
||
...options
|
||
};
|
||
addParentProperties(ast);
|
||
const firstPassState = {
|
||
accuracy: opts.accuracy,
|
||
asciiWordBoundaries: opts.asciiWordBoundaries,
|
||
avoidSubclass: opts.avoidSubclass,
|
||
flagDirectivesByAlt: /* @__PURE__ */ new Map(),
|
||
jsGroupNameMap: /* @__PURE__ */ new Map(),
|
||
minTargetEs2024: isMinTarget(opts.bestEffortTarget, "ES2024"),
|
||
passedLookbehind: false,
|
||
strategy: null,
|
||
// Subroutines can appear before the groups they ref, so collect reffed nodes for a second pass
|
||
subroutineRefMap: /* @__PURE__ */ new Map(),
|
||
supportedGNodes: /* @__PURE__ */ new Set(),
|
||
digitIsAscii: ast.flags.digitIsAscii,
|
||
spaceIsAscii: ast.flags.spaceIsAscii,
|
||
wordIsAscii: ast.flags.wordIsAscii
|
||
};
|
||
traverse(ast, FirstPassVisitor, firstPassState);
|
||
const globalFlags = {
|
||
dotAll: ast.flags.dotAll,
|
||
ignoreCase: ast.flags.ignoreCase
|
||
};
|
||
const secondPassState = {
|
||
currentFlags: globalFlags,
|
||
prevFlags: null,
|
||
globalFlags,
|
||
groupOriginByCopy: /* @__PURE__ */ new Map(),
|
||
groupsByName: /* @__PURE__ */ new Map(),
|
||
multiplexCapturesToLeftByRef: /* @__PURE__ */ new Map(),
|
||
openRefs: /* @__PURE__ */ new Map(),
|
||
reffedNodesByReferencer: /* @__PURE__ */ new Map(),
|
||
subroutineRefMap: firstPassState.subroutineRefMap
|
||
};
|
||
traverse(ast, SecondPassVisitor, secondPassState);
|
||
const thirdPassState = {
|
||
groupsByName: secondPassState.groupsByName,
|
||
highestOrphanBackref: 0,
|
||
numCapturesToLeft: 0,
|
||
reffedNodesByReferencer: secondPassState.reffedNodesByReferencer
|
||
};
|
||
traverse(ast, ThirdPassVisitor, thirdPassState);
|
||
ast._originMap = secondPassState.groupOriginByCopy;
|
||
ast._strategy = firstPassState.strategy;
|
||
return ast;
|
||
}
|
||
var FirstPassVisitor = {
|
||
AbsenceFunction({ node, parent, replaceWith }) {
|
||
const { body, kind } = node;
|
||
if (kind === "repeater") {
|
||
const innerGroup = createGroup();
|
||
innerGroup.body[0].body.push(
|
||
// Insert own alts as `body`
|
||
createLookaroundAssertion({ negate: true, body }),
|
||
createUnicodeProperty("Any")
|
||
);
|
||
const outerGroup = createGroup();
|
||
outerGroup.body[0].body.push(
|
||
createQuantifier("greedy", 0, Infinity, innerGroup)
|
||
);
|
||
replaceWith(setParentDeep(outerGroup, parent), { traverse: true });
|
||
} else {
|
||
throw new Error(`Unsupported absence function "(?~|"`);
|
||
}
|
||
},
|
||
Alternative: {
|
||
enter({ node, parent, key }, { flagDirectivesByAlt }) {
|
||
const flagDirectives = node.body.filter((el) => el.kind === "flags");
|
||
for (let i = key + 1; i < parent.body.length; i++) {
|
||
const forwardSiblingAlt = parent.body[i];
|
||
getOrInsert(flagDirectivesByAlt, forwardSiblingAlt, []).push(...flagDirectives);
|
||
}
|
||
},
|
||
exit({ node }, { flagDirectivesByAlt }) {
|
||
if (flagDirectivesByAlt.get(node)?.length) {
|
||
const flags = getCombinedFlagModsFromFlagNodes(flagDirectivesByAlt.get(node));
|
||
if (flags) {
|
||
const flagGroup = createGroup({ flags });
|
||
flagGroup.body[0].body = node.body;
|
||
node.body = [setParentDeep(flagGroup, node)];
|
||
}
|
||
}
|
||
}
|
||
},
|
||
Assertion({ node, parent, key, container, root, remove, replaceWith }, state) {
|
||
const { kind, negate } = node;
|
||
const { asciiWordBoundaries, avoidSubclass, supportedGNodes, wordIsAscii } = state;
|
||
if (kind === "text_segment_boundary") {
|
||
throw new Error(`Unsupported text segment boundary "\\${negate ? "Y" : "y"}"`);
|
||
} else if (kind === "line_end") {
|
||
replaceWith(setParentDeep(createLookaroundAssertion({ body: [
|
||
createAlternative({ body: [createAssertion("string_end")] }),
|
||
createAlternative({ body: [createCharacter(10)] })
|
||
// `\n`
|
||
] }), parent));
|
||
} else if (kind === "line_start") {
|
||
replaceWith(setParentDeep(parseFragment(r`(?<=\A|\n(?!\z))`, { skipLookbehindValidation: true }), parent));
|
||
} else if (kind === "search_start") {
|
||
if (supportedGNodes.has(node)) {
|
||
root.flags.sticky = true;
|
||
remove();
|
||
} else {
|
||
const prev = container[key - 1];
|
||
if (prev && isAlwaysNonZeroLength(prev)) {
|
||
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
|
||
} else if (avoidSubclass) {
|
||
throw new Error(r`Uses "\G" in a way that requires a subclass`);
|
||
} else {
|
||
replaceWith(setParent(createAssertion("string_start"), parent));
|
||
state.strategy = "clip_search";
|
||
}
|
||
}
|
||
} else if (kind === "string_end" || kind === "string_start") {
|
||
} else if (kind === "string_end_newline") {
|
||
replaceWith(setParentDeep(parseFragment(r`(?=\n?\z)`), parent));
|
||
} else if (kind === "word_boundary") {
|
||
if (!wordIsAscii && !asciiWordBoundaries) {
|
||
const b = `(?:(?<=${defaultWordChar})(?!${defaultWordChar})|(?<!${defaultWordChar})(?=${defaultWordChar}))`;
|
||
const B = `(?:(?<=${defaultWordChar})(?=${defaultWordChar})|(?<!${defaultWordChar})(?!${defaultWordChar}))`;
|
||
replaceWith(setParentDeep(parseFragment(negate ? B : b), parent));
|
||
}
|
||
} else {
|
||
throw new Error(`Unexpected assertion kind "${kind}"`);
|
||
}
|
||
},
|
||
Backreference({ node }, { jsGroupNameMap }) {
|
||
let { ref } = node;
|
||
if (typeof ref === "string" && !isValidJsGroupName(ref)) {
|
||
ref = getAndStoreJsGroupName(ref, jsGroupNameMap);
|
||
node.ref = ref;
|
||
}
|
||
},
|
||
CapturingGroup({ node }, { jsGroupNameMap, subroutineRefMap }) {
|
||
let { name } = node;
|
||
if (name && !isValidJsGroupName(name)) {
|
||
name = getAndStoreJsGroupName(name, jsGroupNameMap);
|
||
node.name = name;
|
||
}
|
||
subroutineRefMap.set(node.number, node);
|
||
if (name) {
|
||
subroutineRefMap.set(name, node);
|
||
}
|
||
},
|
||
CharacterClassRange({ node, parent, replaceWith }) {
|
||
if (parent.kind === "intersection") {
|
||
const cc = createCharacterClass({ body: [node] });
|
||
replaceWith(setParentDeep(cc, parent), { traverse: true });
|
||
}
|
||
},
|
||
CharacterSet({ node, parent, replaceWith }, { accuracy, minTargetEs2024, digitIsAscii, spaceIsAscii, wordIsAscii }) {
|
||
const { kind, negate, value } = node;
|
||
if (digitIsAscii && (kind === "digit" || value === "digit")) {
|
||
replaceWith(setParent(createCharacterSet("digit", { negate }), parent));
|
||
return;
|
||
}
|
||
if (spaceIsAscii && (kind === "space" || value === "space")) {
|
||
replaceWith(setParentDeep(setNegate(parseFragment(asciiSpaceChar), negate), parent));
|
||
return;
|
||
}
|
||
if (wordIsAscii && (kind === "word" || value === "word")) {
|
||
replaceWith(setParent(createCharacterSet("word", { negate }), parent));
|
||
return;
|
||
}
|
||
if (kind === "any") {
|
||
replaceWith(setParent(createUnicodeProperty("Any"), parent));
|
||
} else if (kind === "digit") {
|
||
replaceWith(setParent(createUnicodeProperty("Nd", { negate }), parent));
|
||
} else if (kind === "dot") {
|
||
} else if (kind === "text_segment") {
|
||
if (accuracy === "strict") {
|
||
throw new Error(r`Use of "\X" requires non-strict accuracy`);
|
||
}
|
||
const eBase = "\\p{Emoji}(?:\\p{EMod}|\\uFE0F\\u20E3?|[\\x{E0020}-\\x{E007E}]+\\x{E007F})?";
|
||
const emoji = r`\p{RI}{2}|${eBase}(?:\u200D${eBase})*`;
|
||
replaceWith(setParentDeep(parseFragment(
|
||
// Close approximation of an extended grapheme cluster; see: <unicode.org/reports/tr29/>
|
||
r`(?>\r\n|${minTargetEs2024 ? r`\p{RGI_Emoji}` : emoji}|\P{M}\p{M}*)`,
|
||
// Allow JS property `RGI_Emoji` through
|
||
{ skipPropertyNameValidation: true }
|
||
), parent));
|
||
} else if (kind === "hex") {
|
||
replaceWith(setParent(createUnicodeProperty("AHex", { negate }), parent));
|
||
} else if (kind === "newline") {
|
||
replaceWith(setParentDeep(parseFragment(negate ? "[^\n]" : "(?>\r\n?|[\n\v\f\x85\u2028\u2029])"), parent));
|
||
} else if (kind === "posix") {
|
||
if (!minTargetEs2024 && (value === "graph" || value === "print")) {
|
||
if (accuracy === "strict") {
|
||
throw new Error(`POSIX class "${value}" requires min target ES2024 or non-strict accuracy`);
|
||
}
|
||
let ascii = {
|
||
graph: "!-~",
|
||
print: " -~"
|
||
}[value];
|
||
if (negate) {
|
||
ascii = `\0-${cp(ascii.codePointAt(0) - 1)}${cp(ascii.codePointAt(2) + 1)}-\u{10FFFF}`;
|
||
}
|
||
replaceWith(setParentDeep(parseFragment(`[${ascii}]`), parent));
|
||
} else {
|
||
replaceWith(setParentDeep(setNegate(parseFragment(PosixClassMap.get(value)), negate), parent));
|
||
}
|
||
} else if (kind === "property") {
|
||
if (!JsUnicodePropertyMap.has(slug2(value))) {
|
||
node.key = "sc";
|
||
}
|
||
} else if (kind === "space") {
|
||
replaceWith(setParent(createUnicodeProperty("space", { negate }), parent));
|
||
} else if (kind === "word") {
|
||
replaceWith(setParentDeep(setNegate(parseFragment(defaultWordChar), negate), parent));
|
||
} else {
|
||
throw new Error(`Unexpected character set kind "${kind}"`);
|
||
}
|
||
},
|
||
Directive({ node, parent, root, remove, replaceWith, removeAllPrevSiblings, removeAllNextSiblings }) {
|
||
const { kind, flags } = node;
|
||
if (kind === "flags") {
|
||
if (!flags.enable && !flags.disable) {
|
||
remove();
|
||
} else {
|
||
const flagGroup = createGroup({ flags });
|
||
flagGroup.body[0].body = removeAllNextSiblings();
|
||
replaceWith(setParentDeep(flagGroup, parent), { traverse: true });
|
||
}
|
||
} else if (kind === "keep") {
|
||
const firstAlt = root.body[0];
|
||
const hasWrapperGroup = root.body.length === 1 && // Not emulatable if within a `CapturingGroup`
|
||
hasOnlyChild(firstAlt, { type: "Group" }) && firstAlt.body[0].body.length === 1;
|
||
const topLevel = hasWrapperGroup ? firstAlt.body[0] : root;
|
||
if (parent.parent !== topLevel || topLevel.body.length > 1) {
|
||
throw new Error(r`Uses "\K" in a way that's unsupported`);
|
||
}
|
||
const lookbehind = createLookaroundAssertion({ behind: true });
|
||
lookbehind.body[0].body = removeAllPrevSiblings();
|
||
replaceWith(setParentDeep(lookbehind, parent));
|
||
} else {
|
||
throw new Error(`Unexpected directive kind "${kind}"`);
|
||
}
|
||
},
|
||
Flags({ node, parent }) {
|
||
if (node.posixIsAscii) {
|
||
throw new Error('Unsupported flag "P"');
|
||
}
|
||
if (node.textSegmentMode === "word") {
|
||
throw new Error('Unsupported flag "y{w}"');
|
||
}
|
||
[
|
||
"digitIsAscii",
|
||
// Flag D
|
||
"extended",
|
||
// Flag x
|
||
"posixIsAscii",
|
||
// Flag P
|
||
"spaceIsAscii",
|
||
// Flag S
|
||
"wordIsAscii",
|
||
// Flag W
|
||
"textSegmentMode"
|
||
// Flag y{g} or y{w}
|
||
].forEach((f) => delete node[f]);
|
||
Object.assign(node, {
|
||
// JS flag g; no Onig equiv
|
||
global: false,
|
||
// JS flag d; no Onig equiv
|
||
hasIndices: false,
|
||
// JS flag m; no Onig equiv but its behavior is always on in Onig. Onig's only line break
|
||
// char is line feed, unlike JS, so this flag isn't used since it would produce inaccurate
|
||
// results (also allows `^` and `$` to be used in the generator for string start and end)
|
||
multiline: false,
|
||
// JS flag y; no Onig equiv, but used for `\G` emulation
|
||
sticky: node.sticky ?? false
|
||
// Note: Regex+ doesn't allow explicitly adding flags it handles implicitly, so leave out
|
||
// properties `unicode` (JS flag u) and `unicodeSets` (JS flag v). Keep the existing values
|
||
// for `ignoreCase` (flag i) and `dotAll` (JS flag s, but Onig flag m)
|
||
});
|
||
parent.options = {
|
||
disable: {
|
||
// Onig uses different rules for flag x than Regex+, so disable the implicit flag
|
||
x: true,
|
||
// Onig has no flag to control "named capture only" mode but contextually applies its
|
||
// behavior when named capturing is used, so disable Regex+'s implicit flag for it
|
||
n: true
|
||
},
|
||
force: {
|
||
// Always add flag v because we're generating an AST that relies on it (it enables JS
|
||
// support for Onig features nested classes, intersection, Unicode properties, etc.).
|
||
// However, the generator might disable flag v based on its `target` option
|
||
v: true
|
||
}
|
||
};
|
||
},
|
||
Group({ node }) {
|
||
if (!node.flags) {
|
||
return;
|
||
}
|
||
const { enable, disable } = node.flags;
|
||
enable?.extended && delete enable.extended;
|
||
disable?.extended && delete disable.extended;
|
||
enable?.dotAll && disable?.dotAll && delete enable.dotAll;
|
||
enable?.ignoreCase && disable?.ignoreCase && delete enable.ignoreCase;
|
||
enable && !Object.keys(enable).length && delete node.flags.enable;
|
||
disable && !Object.keys(disable).length && delete node.flags.disable;
|
||
!node.flags.enable && !node.flags.disable && delete node.flags;
|
||
},
|
||
LookaroundAssertion({ node }, state) {
|
||
const { kind } = node;
|
||
if (kind === "lookbehind") {
|
||
state.passedLookbehind = true;
|
||
}
|
||
},
|
||
NamedCallout({ node, parent, replaceWith }) {
|
||
const { kind } = node;
|
||
if (kind === "fail") {
|
||
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
|
||
} else {
|
||
throw new Error(`Unsupported named callout "(*${kind.toUpperCase()}"`);
|
||
}
|
||
},
|
||
Quantifier({ node }) {
|
||
if (node.body.type === "Quantifier") {
|
||
const group = createGroup();
|
||
group.body[0].body.push(node.body);
|
||
node.body = setParentDeep(group, node);
|
||
}
|
||
},
|
||
Regex: {
|
||
enter({ node }, { supportedGNodes }) {
|
||
const leadingGs = [];
|
||
let hasAltWithLeadG = false;
|
||
let hasAltWithoutLeadG = false;
|
||
for (const alt of node.body) {
|
||
if (alt.body.length === 1 && alt.body[0].kind === "search_start") {
|
||
alt.body.pop();
|
||
} else {
|
||
const leadingG = getLeadingG(alt.body);
|
||
if (leadingG) {
|
||
hasAltWithLeadG = true;
|
||
Array.isArray(leadingG) ? leadingGs.push(...leadingG) : leadingGs.push(leadingG);
|
||
} else {
|
||
hasAltWithoutLeadG = true;
|
||
}
|
||
}
|
||
}
|
||
if (hasAltWithLeadG && !hasAltWithoutLeadG) {
|
||
leadingGs.forEach((g) => supportedGNodes.add(g));
|
||
}
|
||
},
|
||
exit(_, { accuracy, passedLookbehind, strategy }) {
|
||
if (accuracy === "strict" && passedLookbehind && strategy) {
|
||
throw new Error(r`Uses "\G" in a way that requires non-strict accuracy`);
|
||
}
|
||
}
|
||
},
|
||
Subroutine({ node }, { jsGroupNameMap }) {
|
||
let { ref } = node;
|
||
if (typeof ref === "string" && !isValidJsGroupName(ref)) {
|
||
ref = getAndStoreJsGroupName(ref, jsGroupNameMap);
|
||
node.ref = ref;
|
||
}
|
||
}
|
||
};
|
||
var SecondPassVisitor = {
|
||
Backreference({ node }, { multiplexCapturesToLeftByRef, reffedNodesByReferencer }) {
|
||
const { orphan, ref } = node;
|
||
if (!orphan) {
|
||
reffedNodesByReferencer.set(node, [...multiplexCapturesToLeftByRef.get(ref).map(({ node: node2 }) => node2)]);
|
||
}
|
||
},
|
||
CapturingGroup: {
|
||
enter({
|
||
node,
|
||
parent,
|
||
replaceWith,
|
||
skip
|
||
}, {
|
||
groupOriginByCopy,
|
||
groupsByName,
|
||
multiplexCapturesToLeftByRef,
|
||
openRefs,
|
||
reffedNodesByReferencer
|
||
}) {
|
||
const origin = groupOriginByCopy.get(node);
|
||
if (origin && openRefs.has(node.number)) {
|
||
const recursion2 = setParent(createRecursion(node.number), parent);
|
||
reffedNodesByReferencer.set(recursion2, openRefs.get(node.number));
|
||
replaceWith(recursion2);
|
||
return;
|
||
}
|
||
openRefs.set(node.number, node);
|
||
multiplexCapturesToLeftByRef.set(node.number, []);
|
||
if (node.name) {
|
||
getOrInsert(multiplexCapturesToLeftByRef, node.name, []);
|
||
}
|
||
const multiplexNodes = multiplexCapturesToLeftByRef.get(node.name ?? node.number);
|
||
for (let i = 0; i < multiplexNodes.length; i++) {
|
||
const multiplex = multiplexNodes[i];
|
||
if (
|
||
// This group is from subroutine expansion, and there's a multiplex value from either the
|
||
// origin node or a prior subroutine expansion group with the same origin
|
||
origin === multiplex.node || origin && origin === multiplex.origin || // This group is not from subroutine expansion, and it comes after a subroutine expansion
|
||
// group that refers to this group
|
||
node === multiplex.origin
|
||
) {
|
||
multiplexNodes.splice(i, 1);
|
||
break;
|
||
}
|
||
}
|
||
multiplexCapturesToLeftByRef.get(node.number).push({ node, origin });
|
||
if (node.name) {
|
||
multiplexCapturesToLeftByRef.get(node.name).push({ node, origin });
|
||
}
|
||
if (node.name) {
|
||
const groupsWithSameName = getOrInsert(groupsByName, node.name, /* @__PURE__ */ new Map());
|
||
let hasDuplicateNameToRemove = false;
|
||
if (origin) {
|
||
hasDuplicateNameToRemove = true;
|
||
} else {
|
||
for (const groupInfo of groupsWithSameName.values()) {
|
||
if (!groupInfo.hasDuplicateNameToRemove) {
|
||
hasDuplicateNameToRemove = true;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
groupsByName.get(node.name).set(node, { node, hasDuplicateNameToRemove });
|
||
}
|
||
},
|
||
exit({ node }, { openRefs }) {
|
||
openRefs.delete(node.number);
|
||
}
|
||
},
|
||
Group: {
|
||
enter({ node }, state) {
|
||
state.prevFlags = state.currentFlags;
|
||
if (node.flags) {
|
||
state.currentFlags = getNewCurrentFlags(state.currentFlags, node.flags);
|
||
}
|
||
},
|
||
exit(_, state) {
|
||
state.currentFlags = state.prevFlags;
|
||
}
|
||
},
|
||
Subroutine({ node, parent, replaceWith }, state) {
|
||
const { isRecursive, ref } = node;
|
||
if (isRecursive) {
|
||
let reffed = parent;
|
||
while (reffed = reffed.parent) {
|
||
if (reffed.type === "CapturingGroup" && (reffed.name === ref || reffed.number === ref)) {
|
||
break;
|
||
}
|
||
}
|
||
state.reffedNodesByReferencer.set(node, reffed);
|
||
return;
|
||
}
|
||
const reffedGroupNode = state.subroutineRefMap.get(ref);
|
||
const isGlobalRecursion = ref === 0;
|
||
const expandedSubroutine = isGlobalRecursion ? createRecursion(0) : (
|
||
// The reffed group might itself contain subroutines, which are expanded during sub-traversal
|
||
cloneCapturingGroup(reffedGroupNode, state.groupOriginByCopy, null)
|
||
);
|
||
let replacement = expandedSubroutine;
|
||
if (!isGlobalRecursion) {
|
||
const reffedGroupFlagMods = getCombinedFlagModsFromFlagNodes(getAllParents(
|
||
reffedGroupNode,
|
||
(p) => p.type === "Group" && !!p.flags
|
||
));
|
||
const reffedGroupFlags = reffedGroupFlagMods ? getNewCurrentFlags(state.globalFlags, reffedGroupFlagMods) : state.globalFlags;
|
||
if (!areFlagsEqual(reffedGroupFlags, state.currentFlags)) {
|
||
replacement = createGroup({
|
||
flags: getFlagModsFromFlags(reffedGroupFlags)
|
||
});
|
||
replacement.body[0].body.push(expandedSubroutine);
|
||
}
|
||
}
|
||
replaceWith(setParentDeep(replacement, parent), { traverse: !isGlobalRecursion });
|
||
}
|
||
};
|
||
var ThirdPassVisitor = {
|
||
Backreference({ node, parent, replaceWith }, state) {
|
||
if (node.orphan) {
|
||
state.highestOrphanBackref = Math.max(state.highestOrphanBackref, node.ref);
|
||
return;
|
||
}
|
||
const reffedNodes = state.reffedNodesByReferencer.get(node);
|
||
const participants = reffedNodes.filter((reffed) => canParticipateWithNode(reffed, node));
|
||
if (!participants.length) {
|
||
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
|
||
} else if (participants.length > 1) {
|
||
const group = createGroup({
|
||
atomic: true,
|
||
body: participants.reverse().map((reffed) => createAlternative({
|
||
body: [createBackreference(reffed.number)]
|
||
}))
|
||
});
|
||
replaceWith(setParentDeep(group, parent));
|
||
} else {
|
||
node.ref = participants[0].number;
|
||
}
|
||
},
|
||
CapturingGroup({ node }, state) {
|
||
node.number = ++state.numCapturesToLeft;
|
||
if (node.name) {
|
||
if (state.groupsByName.get(node.name).get(node).hasDuplicateNameToRemove) {
|
||
delete node.name;
|
||
}
|
||
}
|
||
},
|
||
Regex: {
|
||
exit({ node }, state) {
|
||
const numCapsNeeded = Math.max(state.highestOrphanBackref - state.numCapturesToLeft, 0);
|
||
for (let i = 0; i < numCapsNeeded; i++) {
|
||
const emptyCapture = createCapturingGroup();
|
||
node.body.at(-1).body.push(emptyCapture);
|
||
}
|
||
}
|
||
},
|
||
Subroutine({ node }, state) {
|
||
if (!node.isRecursive || node.ref === 0) {
|
||
return;
|
||
}
|
||
node.ref = state.reffedNodesByReferencer.get(node).number;
|
||
}
|
||
};
|
||
function addParentProperties(root) {
|
||
traverse(root, {
|
||
"*"({ node, parent }) {
|
||
node.parent = parent;
|
||
}
|
||
});
|
||
}
|
||
function areFlagsEqual(a, b) {
|
||
return a.dotAll === b.dotAll && a.ignoreCase === b.ignoreCase;
|
||
}
|
||
function canParticipateWithNode(capture, node) {
|
||
let rightmostPoint = node;
|
||
do {
|
||
if (rightmostPoint.type === "Regex") {
|
||
return false;
|
||
}
|
||
if (rightmostPoint.type === "Alternative") {
|
||
continue;
|
||
}
|
||
if (rightmostPoint === capture) {
|
||
return false;
|
||
}
|
||
const kidsOfParent = getKids(rightmostPoint.parent);
|
||
for (const kid of kidsOfParent) {
|
||
if (kid === rightmostPoint) {
|
||
break;
|
||
}
|
||
if (kid === capture || isAncestorOf(kid, capture)) {
|
||
return true;
|
||
}
|
||
}
|
||
} while (rightmostPoint = rightmostPoint.parent);
|
||
throw new Error("Unexpected path");
|
||
}
|
||
function cloneCapturingGroup(obj, originMap, up, up2) {
|
||
const store = Array.isArray(obj) ? [] : {};
|
||
for (const [key, value] of Object.entries(obj)) {
|
||
if (key === "parent") {
|
||
store.parent = Array.isArray(up) ? up2 : up;
|
||
} else if (value && typeof value === "object") {
|
||
store[key] = cloneCapturingGroup(value, originMap, store, up);
|
||
} else {
|
||
if (key === "type" && value === "CapturingGroup") {
|
||
originMap.set(store, originMap.get(obj) ?? obj);
|
||
}
|
||
store[key] = value;
|
||
}
|
||
}
|
||
return store;
|
||
}
|
||
function createRecursion(ref) {
|
||
const node = createSubroutine(ref);
|
||
node.isRecursive = true;
|
||
return node;
|
||
}
|
||
function getAllParents(node, filterFn) {
|
||
const results = [];
|
||
while (node = node.parent) {
|
||
if (!filterFn || filterFn(node)) {
|
||
results.push(node);
|
||
}
|
||
}
|
||
return results;
|
||
}
|
||
function getAndStoreJsGroupName(name, map) {
|
||
if (map.has(name)) {
|
||
return map.get(name);
|
||
}
|
||
const jsName = `$${map.size}_${name.replace(/^[^$_\p{IDS}]|[^$\u200C\u200D\p{IDC}]/ug, "_")}`;
|
||
map.set(name, jsName);
|
||
return jsName;
|
||
}
|
||
function getCombinedFlagModsFromFlagNodes(flagNodes) {
|
||
const flagProps = ["dotAll", "ignoreCase"];
|
||
const combinedFlags = { enable: {}, disable: {} };
|
||
flagNodes.forEach(({ flags }) => {
|
||
flagProps.forEach((prop) => {
|
||
if (flags.enable?.[prop]) {
|
||
delete combinedFlags.disable[prop];
|
||
combinedFlags.enable[prop] = true;
|
||
}
|
||
if (flags.disable?.[prop]) {
|
||
combinedFlags.disable[prop] = true;
|
||
}
|
||
});
|
||
});
|
||
if (!Object.keys(combinedFlags.enable).length) {
|
||
delete combinedFlags.enable;
|
||
}
|
||
if (!Object.keys(combinedFlags.disable).length) {
|
||
delete combinedFlags.disable;
|
||
}
|
||
if (combinedFlags.enable || combinedFlags.disable) {
|
||
return combinedFlags;
|
||
}
|
||
return null;
|
||
}
|
||
function getFlagModsFromFlags({ dotAll, ignoreCase }) {
|
||
const mods = {};
|
||
if (dotAll || ignoreCase) {
|
||
mods.enable = {};
|
||
dotAll && (mods.enable.dotAll = true);
|
||
ignoreCase && (mods.enable.ignoreCase = true);
|
||
}
|
||
if (!dotAll || !ignoreCase) {
|
||
mods.disable = {};
|
||
!dotAll && (mods.disable.dotAll = true);
|
||
!ignoreCase && (mods.disable.ignoreCase = true);
|
||
}
|
||
return mods;
|
||
}
|
||
function getKids(node) {
|
||
if (!node) {
|
||
throw new Error("Node expected");
|
||
}
|
||
const { body } = node;
|
||
return Array.isArray(body) ? body : body ? [body] : null;
|
||
}
|
||
function getLeadingG(els) {
|
||
const firstToConsider = els.find((el) => el.kind === "search_start" || isLoneGLookaround(el, { negate: false }) || !isAlwaysZeroLength(el));
|
||
if (!firstToConsider) {
|
||
return null;
|
||
}
|
||
if (firstToConsider.kind === "search_start") {
|
||
return firstToConsider;
|
||
}
|
||
if (firstToConsider.type === "LookaroundAssertion") {
|
||
return firstToConsider.body[0].body[0];
|
||
}
|
||
if (firstToConsider.type === "CapturingGroup" || firstToConsider.type === "Group") {
|
||
const gNodesForGroup = [];
|
||
for (const alt of firstToConsider.body) {
|
||
const leadingG = getLeadingG(alt.body);
|
||
if (!leadingG) {
|
||
return null;
|
||
}
|
||
Array.isArray(leadingG) ? gNodesForGroup.push(...leadingG) : gNodesForGroup.push(leadingG);
|
||
}
|
||
return gNodesForGroup;
|
||
}
|
||
return null;
|
||
}
|
||
function isAncestorOf(node, descendant) {
|
||
const kids = getKids(node) ?? [];
|
||
for (const kid of kids) {
|
||
if (kid === descendant || isAncestorOf(kid, descendant)) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
function isAlwaysZeroLength({ type }) {
|
||
return type === "Assertion" || type === "Directive" || type === "LookaroundAssertion";
|
||
}
|
||
function isAlwaysNonZeroLength(node) {
|
||
const types = [
|
||
"Character",
|
||
"CharacterClass",
|
||
"CharacterSet"
|
||
];
|
||
return types.includes(node.type) || node.type === "Quantifier" && node.min && types.includes(node.body.type);
|
||
}
|
||
function isLoneGLookaround(node, options) {
|
||
const opts = {
|
||
negate: null,
|
||
...options
|
||
};
|
||
return node.type === "LookaroundAssertion" && (opts.negate === null || node.negate === opts.negate) && node.body.length === 1 && hasOnlyChild(node.body[0], {
|
||
type: "Assertion",
|
||
kind: "search_start"
|
||
});
|
||
}
|
||
function isValidJsGroupName(name) {
|
||
return /^[$_\p{IDS}][$\u200C\u200D\p{IDC}]*$/u.test(name);
|
||
}
|
||
function parseFragment(pattern, options) {
|
||
const ast = parse(pattern, {
|
||
...options,
|
||
// Providing a custom set of Unicode property names avoids converting some JS Unicode
|
||
// properties (ex: `\p{Alpha}`) to Onig POSIX classes
|
||
unicodePropertyMap: JsUnicodePropertyMap
|
||
});
|
||
const alts = ast.body;
|
||
if (alts.length > 1 || alts[0].body.length > 1) {
|
||
return createGroup({ body: alts });
|
||
}
|
||
return alts[0].body[0];
|
||
}
|
||
function setNegate(node, negate) {
|
||
node.negate = negate;
|
||
return node;
|
||
}
|
||
function setParent(node, parent) {
|
||
node.parent = parent;
|
||
return node;
|
||
}
|
||
function setParentDeep(node, parent) {
|
||
addParentProperties(node);
|
||
node.parent = parent;
|
||
return node;
|
||
}
|
||
|
||
// src/generate.js
|
||
import { createAlternative as createAlternative2, createCharacter as createCharacter2, createGroup as createGroup2 } from "oniguruma-parser/parser";
|
||
import { traverse as traverse2 } from "oniguruma-parser/traverser";
|
||
function generate(ast, options) {
|
||
const opts = getOptions(options);
|
||
const minTargetEs2024 = isMinTarget(opts.target, "ES2024");
|
||
const minTargetEs2025 = isMinTarget(opts.target, "ES2025");
|
||
const recursionLimit = opts.rules.recursionLimit;
|
||
if (!Number.isInteger(recursionLimit) || recursionLimit < 2 || recursionLimit > 20) {
|
||
throw new Error("Invalid recursionLimit; use 2-20");
|
||
}
|
||
let hasCaseInsensitiveNode = null;
|
||
let hasCaseSensitiveNode = null;
|
||
if (!minTargetEs2025) {
|
||
const iStack = [ast.flags.ignoreCase];
|
||
traverse2(ast, FlagModifierVisitor, {
|
||
getCurrentModI: () => iStack.at(-1),
|
||
popModI() {
|
||
iStack.pop();
|
||
},
|
||
pushModI(isIOn) {
|
||
iStack.push(isIOn);
|
||
},
|
||
setHasCasedChar() {
|
||
if (iStack.at(-1)) {
|
||
hasCaseInsensitiveNode = true;
|
||
} else {
|
||
hasCaseSensitiveNode = true;
|
||
}
|
||
}
|
||
});
|
||
}
|
||
const appliedGlobalFlags = {
|
||
dotAll: ast.flags.dotAll,
|
||
// - Turn global flag i on if a case insensitive node was used and no case sensitive nodes were
|
||
// used (to avoid unnecessary node expansion).
|
||
// - Turn global flag i off if a case sensitive node was used (since case sensitivity can't be
|
||
// forced without the use of ES2025 flag groups)
|
||
ignoreCase: !!((ast.flags.ignoreCase || hasCaseInsensitiveNode) && !hasCaseSensitiveNode)
|
||
};
|
||
let lastNode = ast;
|
||
const state = {
|
||
accuracy: opts.accuracy,
|
||
appliedGlobalFlags,
|
||
captureMap: /* @__PURE__ */ new Map(),
|
||
currentFlags: {
|
||
dotAll: ast.flags.dotAll,
|
||
ignoreCase: ast.flags.ignoreCase
|
||
},
|
||
inCharClass: false,
|
||
lastNode,
|
||
originMap: ast._originMap,
|
||
recursionLimit,
|
||
useAppliedIgnoreCase: !!(!minTargetEs2025 && hasCaseInsensitiveNode && hasCaseSensitiveNode),
|
||
useFlagMods: minTargetEs2025,
|
||
useFlagV: minTargetEs2024,
|
||
verbose: opts.verbose
|
||
};
|
||
function gen(node) {
|
||
state.lastNode = lastNode;
|
||
lastNode = node;
|
||
const fn = throwIfNullish(generator[node.type], `Unexpected node type "${node.type}"`);
|
||
return fn(node, state, gen);
|
||
}
|
||
const result = {
|
||
pattern: ast.body.map(gen).join("|"),
|
||
// Could reset `lastNode` at this point via `lastNode = ast`, but it isn't needed by flags
|
||
flags: gen(ast.flags),
|
||
options: { ...ast.options }
|
||
};
|
||
if (!minTargetEs2024) {
|
||
delete result.options.force.v;
|
||
result.options.disable.v = true;
|
||
result.options.unicodeSetsPlugin = null;
|
||
}
|
||
result._captureTransfers = /* @__PURE__ */ new Map();
|
||
result._hiddenCaptures = [];
|
||
state.captureMap.forEach((value, key) => {
|
||
if (value.hidden) {
|
||
result._hiddenCaptures.push(key);
|
||
}
|
||
if (value.transferTo) {
|
||
getOrInsert(result._captureTransfers, value.transferTo, []).push(key);
|
||
}
|
||
});
|
||
return result;
|
||
}
|
||
var FlagModifierVisitor = {
|
||
"*": {
|
||
enter({ node }, state) {
|
||
if (isAnyGroup(node)) {
|
||
const currentModI = state.getCurrentModI();
|
||
state.pushModI(
|
||
node.flags ? getNewCurrentFlags({ ignoreCase: currentModI }, node.flags).ignoreCase : currentModI
|
||
);
|
||
}
|
||
},
|
||
exit({ node }, state) {
|
||
if (isAnyGroup(node)) {
|
||
state.popModI();
|
||
}
|
||
}
|
||
},
|
||
Backreference(_, state) {
|
||
state.setHasCasedChar();
|
||
},
|
||
Character({ node }, state) {
|
||
if (charHasCase(cp(node.value))) {
|
||
state.setHasCasedChar();
|
||
}
|
||
},
|
||
CharacterClassRange({ node, skip }, state) {
|
||
skip();
|
||
if (getCasesOutsideCharClassRange(node, { firstOnly: true }).length) {
|
||
state.setHasCasedChar();
|
||
}
|
||
},
|
||
CharacterSet({ node }, state) {
|
||
if (node.kind === "property" && UnicodePropertiesWithSpecificCase.has(node.value)) {
|
||
state.setHasCasedChar();
|
||
}
|
||
}
|
||
};
|
||
var generator = {
|
||
/**
|
||
@param {AlternativeNode} node
|
||
*/
|
||
Alternative({ body }, _, gen) {
|
||
return body.map(gen).join("");
|
||
},
|
||
/**
|
||
@param {AssertionNode} node
|
||
*/
|
||
Assertion({ kind, negate }) {
|
||
if (kind === "string_end") {
|
||
return "$";
|
||
}
|
||
if (kind === "string_start") {
|
||
return "^";
|
||
}
|
||
if (kind === "word_boundary") {
|
||
return negate ? r`\B` : r`\b`;
|
||
}
|
||
throw new Error(`Unexpected assertion kind "${kind}"`);
|
||
},
|
||
/**
|
||
@param {BackreferenceNode} node
|
||
*/
|
||
Backreference({ ref }, state) {
|
||
if (typeof ref !== "number") {
|
||
throw new Error("Unexpected named backref in transformed AST");
|
||
}
|
||
if (!state.useFlagMods && state.accuracy === "strict" && state.currentFlags.ignoreCase && !state.captureMap.get(ref).ignoreCase) {
|
||
throw new Error("Use of case-insensitive backref to case-sensitive group requires target ES2025 or non-strict accuracy");
|
||
}
|
||
return "\\" + ref;
|
||
},
|
||
/**
|
||
@param {CapturingGroupNode} node
|
||
*/
|
||
CapturingGroup(node, state, gen) {
|
||
const { body, name, number } = node;
|
||
const data = { ignoreCase: state.currentFlags.ignoreCase };
|
||
const origin = state.originMap.get(node);
|
||
if (origin) {
|
||
data.hidden = true;
|
||
if (number > origin.number) {
|
||
data.transferTo = origin.number;
|
||
}
|
||
}
|
||
state.captureMap.set(number, data);
|
||
return `(${name ? `?<${name}>` : ""}${body.map(gen).join("|")})`;
|
||
},
|
||
/**
|
||
@param {CharacterNode} node
|
||
*/
|
||
Character({ value }, state) {
|
||
const char = cp(value);
|
||
const escaped = getCharEscape(value, {
|
||
escDigit: state.lastNode.type === "Backreference",
|
||
inCharClass: state.inCharClass,
|
||
useFlagV: state.useFlagV
|
||
});
|
||
if (escaped !== char) {
|
||
return escaped;
|
||
}
|
||
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase && charHasCase(char)) {
|
||
const cases = getIgnoreCaseMatchChars(char);
|
||
return state.inCharClass ? cases.join("") : cases.length > 1 ? `[${cases.join("")}]` : cases[0];
|
||
}
|
||
return char;
|
||
},
|
||
/**
|
||
@param {CharacterClassNode} node
|
||
*/
|
||
CharacterClass(node, state, gen) {
|
||
const { kind, negate, parent } = node;
|
||
let { body } = node;
|
||
if (kind === "intersection" && !state.useFlagV) {
|
||
throw new Error("Use of class intersection requires min target ES2024");
|
||
}
|
||
if (envFlags.bugFlagVLiteralHyphenIsRange && state.useFlagV && body.some(isLiteralHyphen)) {
|
||
body = [createCharacter2(45), ...body.filter((kid) => !isLiteralHyphen(kid))];
|
||
}
|
||
const genClass = () => `[${negate ? "^" : ""}${body.map(gen).join(kind === "intersection" ? "&&" : "")}]`;
|
||
if (!state.inCharClass) {
|
||
if (
|
||
// Already established `kind !== 'intersection'` if `!state.useFlagV`; don't check again
|
||
(!state.useFlagV || envFlags.bugNestedClassIgnoresNegation) && !negate
|
||
) {
|
||
const negatedChildClasses = body.filter(
|
||
(kid) => kid.type === "CharacterClass" && kid.kind === "union" && kid.negate
|
||
);
|
||
if (negatedChildClasses.length) {
|
||
const group = createGroup2();
|
||
const groupFirstAlt = group.body[0];
|
||
group.parent = parent;
|
||
groupFirstAlt.parent = group;
|
||
body = body.filter((kid) => !negatedChildClasses.includes(kid));
|
||
node.body = body;
|
||
if (body.length) {
|
||
node.parent = groupFirstAlt;
|
||
groupFirstAlt.body.push(node);
|
||
} else {
|
||
group.body.pop();
|
||
}
|
||
negatedChildClasses.forEach((cc) => {
|
||
const newAlt = createAlternative2({ body: [cc] });
|
||
cc.parent = newAlt;
|
||
newAlt.parent = group;
|
||
group.body.push(newAlt);
|
||
});
|
||
return gen(group);
|
||
}
|
||
}
|
||
state.inCharClass = true;
|
||
const result = genClass();
|
||
state.inCharClass = false;
|
||
return result;
|
||
}
|
||
const firstEl = body[0];
|
||
if (
|
||
// Already established that the parent is a char class via `inCharClass`; don't check again
|
||
kind === "union" && !negate && firstEl && // Allows many nested classes to work with `target` ES2018 which doesn't support nesting
|
||
((!state.useFlagV || !state.verbose) && parent.kind === "union" && !(envFlags.bugFlagVLiteralHyphenIsRange && state.useFlagV) || !state.verbose && parent.kind === "intersection" && // JS doesn't allow intersection with union or ranges
|
||
body.length === 1 && firstEl.type !== "CharacterClassRange")
|
||
) {
|
||
return body.map(gen).join("");
|
||
}
|
||
if (!state.useFlagV && parent.type === "CharacterClass") {
|
||
throw new Error("Use of nested character class requires min target ES2024");
|
||
}
|
||
return genClass();
|
||
},
|
||
/**
|
||
@param {CharacterClassRangeNode} node
|
||
*/
|
||
CharacterClassRange(node, state) {
|
||
const min = node.min.value;
|
||
const max = node.max.value;
|
||
const escOpts = {
|
||
escDigit: false,
|
||
inCharClass: true,
|
||
useFlagV: state.useFlagV
|
||
};
|
||
const minStr = getCharEscape(min, escOpts);
|
||
const maxStr = getCharEscape(max, escOpts);
|
||
const extraChars = /* @__PURE__ */ new Set();
|
||
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase) {
|
||
const charsOutsideRange = getCasesOutsideCharClassRange(node);
|
||
const ranges = getCodePointRangesFromChars(charsOutsideRange);
|
||
ranges.forEach((value) => {
|
||
extraChars.add(
|
||
Array.isArray(value) ? `${getCharEscape(value[0], escOpts)}-${getCharEscape(value[1], escOpts)}` : getCharEscape(value, escOpts)
|
||
);
|
||
});
|
||
}
|
||
return `${minStr}-${maxStr}${[...extraChars].join("")}`;
|
||
},
|
||
/**
|
||
@param {CharacterSetNode} node
|
||
*/
|
||
CharacterSet({ kind, negate, value, key }, state) {
|
||
if (kind === "dot") {
|
||
return state.currentFlags.dotAll ? state.appliedGlobalFlags.dotAll || state.useFlagMods ? "." : "[^]" : (
|
||
// Onig's only line break char is line feed, unlike JS
|
||
r`[^\n]`
|
||
);
|
||
}
|
||
if (kind === "digit") {
|
||
return negate ? r`\D` : r`\d`;
|
||
}
|
||
if (kind === "property") {
|
||
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase && UnicodePropertiesWithSpecificCase.has(value)) {
|
||
throw new Error(`Unicode property "${value}" can't be case-insensitive when other chars have specific case`);
|
||
}
|
||
return `${negate ? r`\P` : r`\p`}{${key ? `${key}=` : ""}${value}}`;
|
||
}
|
||
if (kind === "word") {
|
||
return negate ? r`\W` : r`\w`;
|
||
}
|
||
throw new Error(`Unexpected character set kind "${kind}"`);
|
||
},
|
||
/**
|
||
@param {FlagsNode} node
|
||
*/
|
||
Flags(node, state) {
|
||
return (
|
||
// The transformer should never turn on the properties for flags d, g, m since Onig doesn't
|
||
// have equivs. Flag m is never used since Onig uses different line break chars than JS
|
||
// (node.hasIndices ? 'd' : '') +
|
||
// (node.global ? 'g' : '') +
|
||
// (node.multiline ? 'm' : '') +
|
||
(state.appliedGlobalFlags.ignoreCase ? "i" : "") + (node.dotAll ? "s" : "") + (node.sticky ? "y" : "")
|
||
);
|
||
},
|
||
/**
|
||
@param {GroupNode} node
|
||
*/
|
||
Group({ atomic: atomic2, body, flags, parent }, state, gen) {
|
||
const currentFlags = state.currentFlags;
|
||
if (flags) {
|
||
state.currentFlags = getNewCurrentFlags(currentFlags, flags);
|
||
}
|
||
const contents = body.map(gen).join("|");
|
||
const result = !state.verbose && body.length === 1 && // Single alt
|
||
parent.type !== "Quantifier" && !atomic2 && (!state.useFlagMods || !flags) ? contents : `(?${getGroupPrefix(atomic2, flags, state.useFlagMods)}${contents})`;
|
||
state.currentFlags = currentFlags;
|
||
return result;
|
||
},
|
||
/**
|
||
@param {LookaroundAssertionNode} node
|
||
*/
|
||
LookaroundAssertion({ body, kind, negate }, _, gen) {
|
||
const prefix = `${kind === "lookahead" ? "" : "<"}${negate ? "!" : "="}`;
|
||
return `(?${prefix}${body.map(gen).join("|")})`;
|
||
},
|
||
/**
|
||
@param {QuantifierNode} node
|
||
*/
|
||
Quantifier(node, _, gen) {
|
||
return gen(node.body) + getQuantifierStr(node);
|
||
},
|
||
/**
|
||
@param {SubroutineNode & {isRecursive: true}} node
|
||
*/
|
||
Subroutine({ isRecursive, ref }, state) {
|
||
if (!isRecursive) {
|
||
throw new Error("Unexpected non-recursive subroutine in transformed AST");
|
||
}
|
||
const limit = state.recursionLimit;
|
||
return ref === 0 ? `(?R=${limit})` : r`\g<${ref}&R=${limit}>`;
|
||
}
|
||
};
|
||
var BaseEscapeChars = /* @__PURE__ */ new Set([
|
||
"$",
|
||
"(",
|
||
")",
|
||
"*",
|
||
"+",
|
||
".",
|
||
"?",
|
||
"[",
|
||
"\\",
|
||
"]",
|
||
"^",
|
||
"{",
|
||
"|",
|
||
"}"
|
||
]);
|
||
var CharClassEscapeChars = /* @__PURE__ */ new Set([
|
||
"-",
|
||
"\\",
|
||
"]",
|
||
"^",
|
||
// Literal `[` doesn't require escaping with flag u, but this can help work around regex source
|
||
// linters and regex syntax processors that expect unescaped `[` to create a nested class
|
||
"["
|
||
]);
|
||
var CharClassEscapeCharsFlagV = /* @__PURE__ */ new Set([
|
||
"(",
|
||
")",
|
||
"-",
|
||
"/",
|
||
"[",
|
||
"\\",
|
||
"]",
|
||
"^",
|
||
"{",
|
||
"|",
|
||
"}",
|
||
// Double punctuators; also includes already-listed `-` and `^`
|
||
"!",
|
||
"#",
|
||
"$",
|
||
"%",
|
||
"&",
|
||
"*",
|
||
"+",
|
||
",",
|
||
".",
|
||
":",
|
||
";",
|
||
"<",
|
||
"=",
|
||
">",
|
||
"?",
|
||
"@",
|
||
"`",
|
||
"~"
|
||
]);
|
||
var CharCodeEscapeMap = /* @__PURE__ */ new Map([
|
||
[9, r`\t`],
|
||
// horizontal tab
|
||
[10, r`\n`],
|
||
// line feed
|
||
[11, r`\v`],
|
||
// vertical tab
|
||
[12, r`\f`],
|
||
// form feed
|
||
[13, r`\r`],
|
||
// carriage return
|
||
[8232, r`\u2028`],
|
||
// line separator
|
||
[8233, r`\u2029`],
|
||
// paragraph separator
|
||
[65279, r`\uFEFF`]
|
||
// ZWNBSP/BOM
|
||
]);
|
||
var casedRe = /^\p{Cased}$/u;
|
||
function charHasCase(char) {
|
||
return casedRe.test(char);
|
||
}
|
||
function getCasesOutsideCharClassRange(node, options) {
|
||
const firstOnly = !!options?.firstOnly;
|
||
const min = node.min.value;
|
||
const max = node.max.value;
|
||
const found = [];
|
||
if (min < 65 && (max === 65535 || max >= 131071) || min === 65536 && max >= 131071) {
|
||
return found;
|
||
}
|
||
for (let i = min; i <= max; i++) {
|
||
const char = cp(i);
|
||
if (!charHasCase(char)) {
|
||
continue;
|
||
}
|
||
const charsOutsideRange = getIgnoreCaseMatchChars(char).filter((caseOfChar) => {
|
||
const num = caseOfChar.codePointAt(0);
|
||
return num < min || num > max;
|
||
});
|
||
if (charsOutsideRange.length) {
|
||
found.push(...charsOutsideRange);
|
||
if (firstOnly) {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
return found;
|
||
}
|
||
function getCharEscape(codePoint, { escDigit, inCharClass, useFlagV }) {
|
||
if (CharCodeEscapeMap.has(codePoint)) {
|
||
return CharCodeEscapeMap.get(codePoint);
|
||
}
|
||
if (
|
||
// Control chars, etc.; condition modeled on the Chrome developer console's display for strings
|
||
codePoint < 32 || codePoint > 126 && codePoint < 160 || // Unicode planes 4-16; unassigned, special purpose, and private use area
|
||
codePoint > 262143 || // Avoid corrupting a preceding backref by immediately following it with a literal digit
|
||
escDigit && isDigitCharCode(codePoint)
|
||
) {
|
||
return codePoint > 255 ? `\\u{${codePoint.toString(16).toUpperCase()}}` : `\\x${codePoint.toString(16).toUpperCase().padStart(2, "0")}`;
|
||
}
|
||
const escapeChars = inCharClass ? useFlagV ? CharClassEscapeCharsFlagV : CharClassEscapeChars : BaseEscapeChars;
|
||
const char = cp(codePoint);
|
||
return (escapeChars.has(char) ? "\\" : "") + char;
|
||
}
|
||
function getCodePointRangesFromChars(chars) {
|
||
const codePoints = chars.map((char) => char.codePointAt(0)).sort((a, b) => a - b);
|
||
const values = [];
|
||
let start = null;
|
||
for (let i = 0; i < codePoints.length; i++) {
|
||
if (codePoints[i + 1] === codePoints[i] + 1) {
|
||
start ??= codePoints[i];
|
||
} else if (start === null) {
|
||
values.push(codePoints[i]);
|
||
} else {
|
||
values.push([start, codePoints[i]]);
|
||
start = null;
|
||
}
|
||
}
|
||
return values;
|
||
}
|
||
function getGroupPrefix(atomic2, flagMods, useFlagMods) {
|
||
if (atomic2) {
|
||
return ">";
|
||
}
|
||
let mods = "";
|
||
if (flagMods && useFlagMods) {
|
||
const { enable, disable } = flagMods;
|
||
mods = (enable?.ignoreCase ? "i" : "") + (enable?.dotAll ? "s" : "") + (disable ? "-" : "") + (disable?.ignoreCase ? "i" : "") + (disable?.dotAll ? "s" : "");
|
||
}
|
||
return `${mods}:`;
|
||
}
|
||
function getQuantifierStr({ kind, max, min }) {
|
||
let base;
|
||
if (!min && max === 1) {
|
||
base = "?";
|
||
} else if (!min && max === Infinity) {
|
||
base = "*";
|
||
} else if (min === 1 && max === Infinity) {
|
||
base = "+";
|
||
} else if (min === max) {
|
||
base = `{${min}}`;
|
||
} else {
|
||
base = `{${min},${max === Infinity ? "" : max}}`;
|
||
}
|
||
return base + {
|
||
greedy: "",
|
||
lazy: "?",
|
||
possessive: "+"
|
||
}[kind];
|
||
}
|
||
function isAnyGroup({ type }) {
|
||
return type === "CapturingGroup" || type === "Group" || type === "LookaroundAssertion";
|
||
}
|
||
function isDigitCharCode(value) {
|
||
return value > 47 && value < 58;
|
||
}
|
||
function isLiteralHyphen({ type, value }) {
|
||
return type === "Character" && value === 45;
|
||
}
|
||
|
||
// src/subclass.js
|
||
var EmulatedRegExp = class _EmulatedRegExp extends RegExp {
|
||
/**
|
||
@type {Map<number, {
|
||
hidden?: true;
|
||
transferTo?: number;
|
||
}>}
|
||
*/
|
||
#captureMap = /* @__PURE__ */ new Map();
|
||
/**
|
||
@type {RegExp | EmulatedRegExp | null}
|
||
*/
|
||
#compiled = null;
|
||
/**
|
||
@type {string}
|
||
*/
|
||
#pattern;
|
||
/**
|
||
@type {Map<number, string>?}
|
||
*/
|
||
#nameMap = null;
|
||
/**
|
||
@type {string?}
|
||
*/
|
||
#strategy = null;
|
||
/**
|
||
Can be used to serialize the instance.
|
||
@type {EmulatedRegExpOptions}
|
||
*/
|
||
rawOptions = {};
|
||
// Override the getter with one that works with lazy-compiled regexes
|
||
get source() {
|
||
return this.#pattern || "(?:)";
|
||
}
|
||
/**
|
||
@overload
|
||
@param {string} pattern
|
||
@param {string} [flags]
|
||
@param {EmulatedRegExpOptions} [options]
|
||
*/
|
||
/**
|
||
@overload
|
||
@param {EmulatedRegExp} pattern
|
||
@param {string} [flags]
|
||
*/
|
||
constructor(pattern, flags, options) {
|
||
const lazyCompile = !!options?.lazyCompile;
|
||
if (pattern instanceof RegExp) {
|
||
if (options) {
|
||
throw new Error("Cannot provide options when copying a regexp");
|
||
}
|
||
const re = pattern;
|
||
super(re, flags);
|
||
this.#pattern = re.source;
|
||
if (re instanceof _EmulatedRegExp) {
|
||
this.#captureMap = re.#captureMap;
|
||
this.#nameMap = re.#nameMap;
|
||
this.#strategy = re.#strategy;
|
||
this.rawOptions = re.rawOptions;
|
||
}
|
||
} else {
|
||
const opts = {
|
||
hiddenCaptures: [],
|
||
strategy: null,
|
||
transfers: [],
|
||
...options
|
||
};
|
||
super(lazyCompile ? "" : pattern, flags);
|
||
this.#pattern = pattern;
|
||
this.#captureMap = createCaptureMap(opts.hiddenCaptures, opts.transfers);
|
||
this.#strategy = opts.strategy;
|
||
this.rawOptions = options ?? {};
|
||
}
|
||
if (!lazyCompile) {
|
||
this.#compiled = this;
|
||
}
|
||
}
|
||
/**
|
||
Called internally by all String/RegExp methods that use regexes.
|
||
@override
|
||
@param {string} str
|
||
@returns {RegExpExecArray?}
|
||
*/
|
||
exec(str) {
|
||
if (!this.#compiled) {
|
||
const { lazyCompile, ...rest } = this.rawOptions;
|
||
this.#compiled = new _EmulatedRegExp(this.#pattern, this.flags, rest);
|
||
}
|
||
const useLastIndex = this.global || this.sticky;
|
||
const pos = this.lastIndex;
|
||
if (this.#strategy === "clip_search" && useLastIndex && pos) {
|
||
this.lastIndex = 0;
|
||
const match = this.#execCore(str.slice(pos));
|
||
if (match) {
|
||
adjustMatchDetailsForOffset(match, pos, str, this.hasIndices);
|
||
this.lastIndex += pos;
|
||
}
|
||
return match;
|
||
}
|
||
return this.#execCore(str);
|
||
}
|
||
/**
|
||
Adds support for hidden and transfer captures.
|
||
@param {string} str
|
||
@returns
|
||
*/
|
||
#execCore(str) {
|
||
this.#compiled.lastIndex = this.lastIndex;
|
||
const match = super.exec.call(this.#compiled, str);
|
||
this.lastIndex = this.#compiled.lastIndex;
|
||
if (!match || !this.#captureMap.size) {
|
||
return match;
|
||
}
|
||
const matchCopy = [...match];
|
||
match.length = 1;
|
||
let indicesCopy;
|
||
if (this.hasIndices) {
|
||
indicesCopy = [...match.indices];
|
||
match.indices.length = 1;
|
||
}
|
||
const mappedNums = [0];
|
||
for (let i = 1; i < matchCopy.length; i++) {
|
||
const { hidden, transferTo } = this.#captureMap.get(i) ?? {};
|
||
if (hidden) {
|
||
mappedNums.push(null);
|
||
} else {
|
||
mappedNums.push(match.length);
|
||
match.push(matchCopy[i]);
|
||
if (this.hasIndices) {
|
||
match.indices.push(indicesCopy[i]);
|
||
}
|
||
}
|
||
if (transferTo && matchCopy[i] !== void 0) {
|
||
const to = mappedNums[transferTo];
|
||
if (!to) {
|
||
throw new Error(`Invalid capture transfer to "${to}"`);
|
||
}
|
||
match[to] = matchCopy[i];
|
||
if (this.hasIndices) {
|
||
match.indices[to] = indicesCopy[i];
|
||
}
|
||
if (match.groups) {
|
||
if (!this.#nameMap) {
|
||
this.#nameMap = createNameMap(this.source);
|
||
}
|
||
const name = this.#nameMap.get(transferTo);
|
||
if (name) {
|
||
match.groups[name] = matchCopy[i];
|
||
if (this.hasIndices) {
|
||
match.indices.groups[name] = indicesCopy[i];
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return match;
|
||
}
|
||
};
|
||
function adjustMatchDetailsForOffset(match, offset, input, hasIndices) {
|
||
match.index += offset;
|
||
match.input = input;
|
||
if (hasIndices) {
|
||
const indices = match.indices;
|
||
for (let i = 0; i < indices.length; i++) {
|
||
const arr = indices[i];
|
||
if (arr) {
|
||
indices[i] = [arr[0] + offset, arr[1] + offset];
|
||
}
|
||
}
|
||
const groupIndices = indices.groups;
|
||
if (groupIndices) {
|
||
Object.keys(groupIndices).forEach((key) => {
|
||
const arr = groupIndices[key];
|
||
if (arr) {
|
||
groupIndices[key] = [arr[0] + offset, arr[1] + offset];
|
||
}
|
||
});
|
||
}
|
||
}
|
||
}
|
||
function createCaptureMap(hiddenCaptures, transfers) {
|
||
const captureMap = /* @__PURE__ */ new Map();
|
||
for (const num of hiddenCaptures) {
|
||
captureMap.set(num, {
|
||
hidden: true
|
||
});
|
||
}
|
||
for (const [to, from] of transfers) {
|
||
for (const num of from) {
|
||
getOrInsert(captureMap, num, {}).transferTo = to;
|
||
}
|
||
}
|
||
return captureMap;
|
||
}
|
||
function createNameMap(pattern) {
|
||
const re = /(?<capture>\((?:\?<(?![=!])(?<name>[^>]+)>|(?!\?)))|\\?./gsu;
|
||
const map = /* @__PURE__ */ new Map();
|
||
let numCharClassesOpen = 0;
|
||
let numCaptures = 0;
|
||
let match;
|
||
while (match = re.exec(pattern)) {
|
||
const { 0: m, groups: { capture, name } } = match;
|
||
if (m === "[") {
|
||
numCharClassesOpen++;
|
||
} else if (!numCharClassesOpen) {
|
||
if (capture) {
|
||
numCaptures++;
|
||
if (name) {
|
||
map.set(numCaptures, name);
|
||
}
|
||
}
|
||
} else if (m === "]") {
|
||
numCharClassesOpen--;
|
||
}
|
||
}
|
||
return map;
|
||
}
|
||
|
||
// src/index.js
|
||
import { parse as parse2 } from "oniguruma-parser/parser";
|
||
import { atomic, possessive } from "regex/internals";
|
||
import { recursion } from "regex-recursion";
|
||
function toRegExp(pattern, options) {
|
||
const d = toRegExpDetails(pattern, options);
|
||
if (d.options) {
|
||
return new EmulatedRegExp(d.pattern, d.flags, d.options);
|
||
}
|
||
return new RegExp(d.pattern, d.flags);
|
||
}
|
||
function toRegExpDetails(pattern, options) {
|
||
const opts = getOptions(options);
|
||
const onigurumaAst = parse2(pattern, {
|
||
flags: opts.flags,
|
||
normalizeUnknownPropertyNames: true,
|
||
rules: {
|
||
captureGroup: opts.rules.captureGroup,
|
||
singleline: opts.rules.singleline
|
||
},
|
||
skipBackrefValidation: opts.rules.allowOrphanBackrefs,
|
||
unicodePropertyMap: JsUnicodePropertyMap
|
||
});
|
||
const regexPlusAst = transform(onigurumaAst, {
|
||
accuracy: opts.accuracy,
|
||
asciiWordBoundaries: opts.rules.asciiWordBoundaries,
|
||
avoidSubclass: opts.avoidSubclass,
|
||
bestEffortTarget: opts.target
|
||
});
|
||
const generated = generate(regexPlusAst, opts);
|
||
const recursionResult = recursion(generated.pattern, {
|
||
captureTransfers: generated._captureTransfers,
|
||
hiddenCaptures: generated._hiddenCaptures,
|
||
mode: "external"
|
||
});
|
||
const possessiveResult = possessive(recursionResult.pattern);
|
||
const atomicResult = atomic(possessiveResult.pattern, {
|
||
captureTransfers: recursionResult.captureTransfers,
|
||
hiddenCaptures: recursionResult.hiddenCaptures
|
||
});
|
||
const details = {
|
||
pattern: atomicResult.pattern,
|
||
flags: `${opts.hasIndices ? "d" : ""}${opts.global ? "g" : ""}${generated.flags}${generated.options.disable.v ? "u" : "v"}`
|
||
};
|
||
if (opts.avoidSubclass) {
|
||
if (opts.lazyCompileLength !== Infinity) {
|
||
throw new Error("Lazy compilation requires subclass");
|
||
}
|
||
} else {
|
||
const hiddenCaptures = atomicResult.hiddenCaptures.sort((a, b) => a - b);
|
||
const transfers = Array.from(atomicResult.captureTransfers);
|
||
const strategy = regexPlusAst._strategy;
|
||
const lazyCompile = details.pattern.length >= opts.lazyCompileLength;
|
||
if (hiddenCaptures.length || transfers.length || strategy || lazyCompile) {
|
||
details.options = {
|
||
...hiddenCaptures.length && { hiddenCaptures },
|
||
...transfers.length && { transfers },
|
||
...strategy && { strategy },
|
||
...lazyCompile && { lazyCompile }
|
||
};
|
||
}
|
||
}
|
||
return details;
|
||
}
|
||
export {
|
||
EmulatedRegExp,
|
||
toRegExp,
|
||
toRegExpDetails
|
||
};
|
||
//# sourceMappingURL=index.js.map
|