Files

190 lines
4.7 KiB
JavaScript

/**
* @import {Root} from 'hast'
* @import {ParserError} from 'parse5'
* @import {Value} from 'vfile'
* @import {ErrorCode, Options} from './types.js'
*/
import {ok as assert} from 'devlop'
import {fromParse5} from 'hast-util-from-parse5'
import {parse, parseFragment} from 'parse5'
import {VFile} from 'vfile'
import {VFileMessage} from 'vfile-message'
import {errors} from './errors.js'
const base = 'https://html.spec.whatwg.org/multipage/parsing.html#parse-error-'
const dashToCamelRe = /-[a-z]/g
const formatCRe = /%c(?:([-+])(\d+))?/g
const formatXRe = /%x/g
const fatalities = {2: true, 1: false, 0: null}
/** @type {Readonly<Options>} */
const emptyOptions = {}
/**
* Turn serialized HTML into a hast tree.
*
* @param {VFile | Value} value
* Serialized HTML to parse.
* @param {Readonly<Options> | null | undefined} [options]
* Configuration (optional).
* @returns {Root}
* Tree.
*/
export function fromHtml(value, options) {
const settings = options || emptyOptions
const onerror = settings.onerror
const file = value instanceof VFile ? value : new VFile(value)
const parseFunction = settings.fragment ? parseFragment : parse
const document = String(file)
const p5Document = parseFunction(document, {
sourceCodeLocationInfo: true,
// Note `parse5` types currently do not allow `undefined`.
onParseError: settings.onerror ? internalOnerror : null,
scriptingEnabled: false
})
// `parse5` returns document which are always mapped to roots.
return /** @type {Root} */ (
fromParse5(p5Document, {
file,
space: settings.space,
verbose: settings.verbose
})
)
/**
* Handle a parse error.
*
* @param {ParserError} error
* Parse5 error.
* @returns {undefined}
* Nothing.
*/
function internalOnerror(error) {
const code = error.code
const name = camelcase(code)
const setting = settings[name]
const config = setting === null || setting === undefined ? true : setting
const level = typeof config === 'number' ? config : config ? 1 : 0
if (level) {
const info = errors[name]
assert(info, 'expected known error from `parse5`')
const message = new VFileMessage(format(info.reason), {
place: {
start: {
line: error.startLine,
column: error.startCol,
offset: error.startOffset
},
end: {
line: error.endLine,
column: error.endCol,
offset: error.endOffset
}
},
ruleId: code,
source: 'hast-util-from-html'
})
if (file.path) {
message.file = file.path
message.name = file.path + ':' + message.name
}
message.fatal = fatalities[level]
message.note = format(info.description)
message.url = info.url === false ? undefined : base + code
assert(onerror, '`internalOnerror` is not passed if `onerror` is not set')
onerror(message)
}
/**
* Format a human readable string about an error.
*
* @param {string} value
* Value to format.
* @returns {string}
* Formatted.
*/
function format(value) {
return value.replace(formatCRe, formatC).replace(formatXRe, formatX)
/**
* Format the character.
*
* @param {string} _
* Match.
* @param {string} $1
* Sign (`-` or `+`, optional).
* @param {string} $2
* Offset.
* @returns {string}
* Formatted.
*/
function formatC(_, $1, $2) {
const offset =
($2 ? Number.parseInt($2, 10) : 0) * ($1 === '-' ? -1 : 1)
const char = document.charAt(error.startOffset + offset)
return visualizeCharacter(char)
}
/**
* Format the character code.
*
* @returns {string}
* Formatted.
*/
function formatX() {
return visualizeCharacterCode(document.charCodeAt(error.startOffset))
}
}
}
}
/**
* @param {string} value
* Error code in dash case.
* @returns {ErrorCode}
* Error code in camelcase.
*/
function camelcase(value) {
// This should match an error code.
return /** @type {ErrorCode} */ (value.replace(dashToCamelRe, dashToCamel))
}
/**
* @param {string} $0
* Match.
* @returns {string}
* Camelcased.
*/
function dashToCamel($0) {
return $0.charAt(1).toUpperCase()
}
/**
* @param {string} char
* Character.
* @returns {string}
* Formatted.
*/
function visualizeCharacter(char) {
return char === '`' ? '` ` `' : char
}
/**
* @param {number} charCode
* Character code.
* @returns {string}
* Formatted.
*/
function visualizeCharacterCode(charCode) {
return '0x' + charCode.toString(16).toUpperCase()
}