118 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			118 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /**
 | |
|  * @typedef CoreOptions
 | |
|  * @property {ReadonlyArray<string>} [subset=[]]
 | |
|  *   Whether to only escape the given subset of characters.
 | |
|  * @property {boolean} [escapeOnly=false]
 | |
|  *   Whether to only escape possibly dangerous characters.
 | |
|  *   Those characters are `"`, `&`, `'`, `<`, `>`, and `` ` ``.
 | |
|  *
 | |
|  * @typedef FormatOptions
 | |
|  * @property {(code: number, next: number, options: CoreWithFormatOptions) => string} format
 | |
|  *   Format strategy.
 | |
|  *
 | |
|  * @typedef {CoreOptions & FormatOptions & import('./util/format-smart.js').FormatSmartOptions} CoreWithFormatOptions
 | |
|  */
 | |
| 
 | |
| const defaultSubsetRegex = /["&'<>`]/g
 | |
| const surrogatePairsRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
 | |
| const controlCharactersRegex =
 | |
|   // eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
 | |
|   /[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g
 | |
| const regexEscapeRegex = /[|\\{}()[\]^$+*?.]/g
 | |
| 
 | |
| /** @type {WeakMap<ReadonlyArray<string>, RegExp>} */
 | |
| const subsetToRegexCache = new WeakMap()
 | |
| 
 | |
| /**
 | |
|  * Encode certain characters in `value`.
 | |
|  *
 | |
|  * @param {string} value
 | |
|  * @param {CoreWithFormatOptions} options
 | |
|  * @returns {string}
 | |
|  */
 | |
| export function core(value, options) {
 | |
|   value = value.replace(
 | |
|     options.subset
 | |
|       ? charactersToExpressionCached(options.subset)
 | |
|       : defaultSubsetRegex,
 | |
|     basic
 | |
|   )
 | |
| 
 | |
|   if (options.subset || options.escapeOnly) {
 | |
|     return value
 | |
|   }
 | |
| 
 | |
|   return (
 | |
|     value
 | |
|       // Surrogate pairs.
 | |
|       .replace(surrogatePairsRegex, surrogate)
 | |
|       // BMP control characters (C0 except for LF, CR, SP; DEL; and some more
 | |
|       // non-ASCII ones).
 | |
|       .replace(controlCharactersRegex, basic)
 | |
|   )
 | |
| 
 | |
|   /**
 | |
|    * @param {string} pair
 | |
|    * @param {number} index
 | |
|    * @param {string} all
 | |
|    */
 | |
|   function surrogate(pair, index, all) {
 | |
|     return options.format(
 | |
|       (pair.charCodeAt(0) - 0xd800) * 0x400 +
 | |
|         pair.charCodeAt(1) -
 | |
|         0xdc00 +
 | |
|         0x10000,
 | |
|       all.charCodeAt(index + 2),
 | |
|       options
 | |
|     )
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * @param {string} character
 | |
|    * @param {number} index
 | |
|    * @param {string} all
 | |
|    */
 | |
|   function basic(character, index, all) {
 | |
|     return options.format(
 | |
|       character.charCodeAt(0),
 | |
|       all.charCodeAt(index + 1),
 | |
|       options
 | |
|     )
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * A wrapper function that caches the result of `charactersToExpression` with a WeakMap.
 | |
|  * This can improve performance when tooling calls `charactersToExpression` repeatedly
 | |
|  * with the same subset.
 | |
|  *
 | |
|  * @param {ReadonlyArray<string>} subset
 | |
|  * @returns {RegExp}
 | |
|  */
 | |
| function charactersToExpressionCached(subset) {
 | |
|   let cached = subsetToRegexCache.get(subset)
 | |
| 
 | |
|   if (!cached) {
 | |
|     cached = charactersToExpression(subset)
 | |
|     subsetToRegexCache.set(subset, cached)
 | |
|   }
 | |
| 
 | |
|   return cached
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @param {ReadonlyArray<string>} subset
 | |
|  * @returns {RegExp}
 | |
|  */
 | |
| function charactersToExpression(subset) {
 | |
|   /** @type {Array<string>} */
 | |
|   const groups = []
 | |
|   let index = -1
 | |
| 
 | |
|   while (++index < subset.length) {
 | |
|     groups.push(subset[index].replace(regexEscapeRegex, '\\$&'))
 | |
|   }
 | |
| 
 | |
|   return new RegExp('(?:' + groups.join('|') + ')', 'g')
 | |
| }
 |