Refactor routing in App component to enhance navigation and improve error handling by integrating dynamic routes and updating the NotFound route.

This commit is contained in:
becarta
2025-05-23 12:43:00 +02:00
parent f40db0f5c9
commit a544759a3b
11127 changed files with 1647032 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
export {gfmAutolinkLiteral} from './lib/syntax.js'
export {gfmAutolinkLiteralHtml} from './lib/html.js'
/**
* Augment types.
*/
declare module 'micromark-util-types' {
/**
* Augment token with a field to improve performance.
*/
interface Token {
_gfmAutolinkLiteralWalkedInto?: boolean
}
/**
* Token types.
*/
interface TokenTypeMap {
literalAutolink: 'literalAutolink'
literalAutolinkEmail: 'literalAutolinkEmail'
literalAutolinkHttp: 'literalAutolinkHttp'
literalAutolinkWww: 'literalAutolinkWww'
}
}

View File

@@ -0,0 +1,2 @@
export {gfmAutolinkLiteral} from './lib/syntax.js'
export {gfmAutolinkLiteralHtml} from './lib/html.js'

View File

@@ -0,0 +1,10 @@
/**
* Create an HTML extension for `micromark` to support GitHub autolink literal
* when serializing to HTML.
*
* @returns {HtmlExtension}
* Extension for `micromark` that can be passed in `htmlExtensions` to
* support GitHub autolink literal when serializing to HTML.
*/
export function gfmAutolinkLiteralHtml(): HtmlExtension;
import type { HtmlExtension } from 'micromark-util-types';

View File

@@ -0,0 +1,56 @@
/**
* @import {CompileContext, Handle, HtmlExtension, Token} from 'micromark-util-types'
*/
import {sanitizeUri} from 'micromark-util-sanitize-uri'
/**
* Create an HTML extension for `micromark` to support GitHub autolink literal
* when serializing to HTML.
*
* @returns {HtmlExtension}
* Extension for `micromark` that can be passed in `htmlExtensions` to
* support GitHub autolink literal when serializing to HTML.
*/
export function gfmAutolinkLiteralHtml() {
return {
exit: {literalAutolinkEmail, literalAutolinkHttp, literalAutolinkWww}
}
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkWww(token) {
anchorFromToken.call(this, token, 'http://')
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkEmail(token) {
anchorFromToken.call(this, token, 'mailto:')
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkHttp(token) {
anchorFromToken.call(this, token)
}
/**
* @this CompileContext
* @param {Token} token
* @param {string | null | undefined} [protocol]
* @returns {undefined}
*/
function anchorFromToken(token, protocol) {
const url = this.sliceSerialize(token)
this.tag('<a href="' + sanitizeUri((protocol || '') + url) + '">')
this.raw(this.encode(url))
this.tag('</a>')
}

View File

@@ -0,0 +1,10 @@
/**
* Create an extension for `micromark` to support GitHub autolink literal
* syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* autolink literal syntax.
*/
export function gfmAutolinkLiteral(): Extension;
import type { Extension } from 'micromark-util-types';

View File

@@ -0,0 +1,983 @@
/**
* @import {Code, ConstructRecord, Event, Extension, Previous, State, TokenizeContext, Tokenizer} from 'micromark-util-types'
*/
import {
asciiAlpha,
asciiAlphanumeric,
asciiControl,
markdownLineEndingOrSpace,
unicodePunctuation,
unicodeWhitespace
} from 'micromark-util-character'
import {codes} from 'micromark-util-symbol'
const wwwPrefix = {tokenize: tokenizeWwwPrefix, partial: true}
const domain = {tokenize: tokenizeDomain, partial: true}
const path = {tokenize: tokenizePath, partial: true}
const trail = {tokenize: tokenizeTrail, partial: true}
const emailDomainDotTrail = {
tokenize: tokenizeEmailDomainDotTrail,
partial: true
}
const wwwAutolink = {
name: 'wwwAutolink',
tokenize: tokenizeWwwAutolink,
previous: previousWww
}
const protocolAutolink = {
name: 'protocolAutolink',
tokenize: tokenizeProtocolAutolink,
previous: previousProtocol
}
const emailAutolink = {
name: 'emailAutolink',
tokenize: tokenizeEmailAutolink,
previous: previousEmail
}
/** @type {ConstructRecord} */
const text = {}
/**
* Create an extension for `micromark` to support GitHub autolink literal
* syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* autolink literal syntax.
*/
export function gfmAutolinkLiteral() {
return {text}
}
/** @type {Code} */
let code = codes.digit0
// Add alphanumerics.
while (code < codes.leftCurlyBrace) {
text[code] = emailAutolink
code++
if (code === codes.colon) code = codes.uppercaseA
else if (code === codes.leftSquareBracket) code = codes.lowercaseA
}
text[codes.plusSign] = emailAutolink
text[codes.dash] = emailAutolink
text[codes.dot] = emailAutolink
text[codes.underscore] = emailAutolink
text[codes.uppercaseH] = [emailAutolink, protocolAutolink]
text[codes.lowercaseH] = [emailAutolink, protocolAutolink]
text[codes.uppercaseW] = [emailAutolink, wwwAutolink]
text[codes.lowercaseW] = [emailAutolink, wwwAutolink]
// To do: perform email autolink literals on events, afterwards.
// Thats where `markdown-rs` and `cmark-gfm` perform it.
// It should look for `@`, then for atext backwards, and then for a label
// forwards.
// To do: `mailto:`, `xmpp:` protocol as prefix.
/**
* Email autolink literal.
*
* ```markdown
* > | a contact@example.org b
* ^^^^^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeEmailAutolink(effects, ok, nok) {
const self = this
/** @type {boolean | undefined} */
let dot
/** @type {boolean} */
let data
return start
/**
* Start of email autolink literal.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function start(code) {
if (
!gfmAtext(code) ||
!previousEmail.call(self, self.previous) ||
previousUnbalanced(self.events)
) {
return nok(code)
}
effects.enter('literalAutolink')
effects.enter('literalAutolinkEmail')
return atext(code)
}
/**
* In email atext.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function atext(code) {
if (gfmAtext(code)) {
effects.consume(code)
return atext
}
if (code === codes.atSign) {
effects.consume(code)
return emailDomain
}
return nok(code)
}
/**
* In email domain.
*
* The reference code is a bit overly complex as it handles the `@`, of which
* there may be just one.
* Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L318>
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomain(code) {
// Dot followed by alphanumerical (not `-` or `_`).
if (code === codes.dot) {
return effects.check(
emailDomainDotTrail,
emailDomainAfter,
emailDomainDot
)(code)
}
// Alphanumerical, `-`, and `_`.
if (
code === codes.dash ||
code === codes.underscore ||
asciiAlphanumeric(code)
) {
data = true
effects.consume(code)
return emailDomain
}
// To do: `/` if xmpp.
// Note: normally wed truncate trailing punctuation from the link.
// However, email autolink literals cannot contain any of those markers,
// except for `.`, but that can only occur if it isnt trailing.
// So we can ignore truncating!
return emailDomainAfter(code)
}
/**
* In email domain, on dot that is not a trail.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomainDot(code) {
effects.consume(code)
dot = true
return emailDomain
}
/**
* After email domain.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomainAfter(code) {
// Domain must not be empty, must include a dot, and must end in alphabetical.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L332>.
if (data && dot && asciiAlpha(self.previous)) {
effects.exit('literalAutolinkEmail')
effects.exit('literalAutolink')
return ok(code)
}
return nok(code)
}
}
/**
* `www` autolink literal.
*
* ```markdown
* > | a www.example.org b
* ^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeWwwAutolink(effects, ok, nok) {
const self = this
return wwwStart
/**
* Start of www autolink literal.
*
* ```markdown
* > | www.example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function wwwStart(code) {
if (
(code !== codes.uppercaseW && code !== codes.lowercaseW) ||
!previousWww.call(self, self.previous) ||
previousUnbalanced(self.events)
) {
return nok(code)
}
effects.enter('literalAutolink')
effects.enter('literalAutolinkWww')
// Note: we *check*, so we can discard the `www.` we parsed.
// If it worked, we consider it as a part of the domain.
return effects.check(
wwwPrefix,
effects.attempt(domain, effects.attempt(path, wwwAfter), nok),
nok
)(code)
}
/**
* After a www autolink literal.
*
* ```markdown
* > | www.example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function wwwAfter(code) {
effects.exit('literalAutolinkWww')
effects.exit('literalAutolink')
return ok(code)
}
}
/**
* Protocol autolink literal.
*
* ```markdown
* > | a https://example.org b
* ^^^^^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeProtocolAutolink(effects, ok, nok) {
const self = this
let buffer = ''
let seen = false
return protocolStart
/**
* Start of protocol autolink literal.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function protocolStart(code) {
if (
(code === codes.uppercaseH || code === codes.lowercaseH) &&
previousProtocol.call(self, self.previous) &&
!previousUnbalanced(self.events)
) {
effects.enter('literalAutolink')
effects.enter('literalAutolinkHttp')
buffer += String.fromCodePoint(code)
effects.consume(code)
return protocolPrefixInside
}
return nok(code)
}
/**
* In protocol.
*
* ```markdown
* > | https://example.com/a?b#c
* ^^^^^
* ```
*
* @type {State}
*/
function protocolPrefixInside(code) {
// `5` is size of `https`
if (asciiAlpha(code) && buffer.length < 5) {
// @ts-expect-error: definitely number.
buffer += String.fromCodePoint(code)
effects.consume(code)
return protocolPrefixInside
}
if (code === codes.colon) {
const protocol = buffer.toLowerCase()
if (protocol === 'http' || protocol === 'https') {
effects.consume(code)
return protocolSlashesInside
}
}
return nok(code)
}
/**
* In slashes.
*
* ```markdown
* > | https://example.com/a?b#c
* ^^
* ```
*
* @type {State}
*/
function protocolSlashesInside(code) {
if (code === codes.slash) {
effects.consume(code)
if (seen) {
return afterProtocol
}
seen = true
return protocolSlashesInside
}
return nok(code)
}
/**
* After protocol, before domain.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function afterProtocol(code) {
// To do: this is different from `markdown-rs`:
// https://github.com/wooorm/markdown-rs/blob/b3a921c761309ae00a51fe348d8a43adbc54b518/src/construct/gfm_autolink_literal.rs#L172-L182
return code === codes.eof ||
asciiControl(code) ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code) ||
unicodePunctuation(code)
? nok(code)
: effects.attempt(domain, effects.attempt(path, protocolAfter), nok)(code)
}
/**
* After a protocol autolink literal.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function protocolAfter(code) {
effects.exit('literalAutolinkHttp')
effects.exit('literalAutolink')
return ok(code)
}
}
/**
* `www` prefix.
*
* ```markdown
* > | a www.example.org b
* ^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeWwwPrefix(effects, ok, nok) {
let size = 0
return wwwPrefixInside
/**
* In www prefix.
*
* ```markdown
* > | www.example.com
* ^^^^
* ```
*
* @type {State}
*/
function wwwPrefixInside(code) {
if ((code === codes.uppercaseW || code === codes.lowercaseW) && size < 3) {
size++
effects.consume(code)
return wwwPrefixInside
}
if (code === codes.dot && size === 3) {
effects.consume(code)
return wwwPrefixAfter
}
return nok(code)
}
/**
* After www prefix.
*
* ```markdown
* > | www.example.com
* ^
* ```
*
* @type {State}
*/
function wwwPrefixAfter(code) {
// If there is *anything*, we can link.
return code === codes.eof ? nok(code) : ok(code)
}
}
/**
* Domain.
*
* ```markdown
* > | a https://example.org b
* ^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeDomain(effects, ok, nok) {
/** @type {boolean | undefined} */
let underscoreInLastSegment
/** @type {boolean | undefined} */
let underscoreInLastLastSegment
/** @type {boolean | undefined} */
let seen
return domainInside
/**
* In domain.
*
* ```markdown
* > | https://example.com/a
* ^^^^^^^^^^^
* ```
*
* @type {State}
*/
function domainInside(code) {
// Check whether this marker, which is a trailing punctuation
// marker, optionally followed by more trailing markers, and then
// followed by an end.
if (code === codes.dot || code === codes.underscore) {
return effects.check(trail, domainAfter, domainAtPunctuation)(code)
}
// GH documents that only alphanumerics (other than `-`, `.`, and `_`) can
// occur, which sounds like ASCII only, but they also support `www.點看.com`,
// so thats Unicode.
// Instead of some new production for Unicode alphanumerics, markdown
// already has that for Unicode punctuation and whitespace, so use those.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
if (
code === codes.eof ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code) ||
(code !== codes.dash && unicodePunctuation(code))
) {
return domainAfter(code)
}
seen = true
effects.consume(code)
return domainInside
}
/**
* In domain, at potential trailing punctuation, that was not trailing.
*
* ```markdown
* > | https://example.com
* ^
* ```
*
* @type {State}
*/
function domainAtPunctuation(code) {
// There is an underscore in the last segment of the domain
if (code === codes.underscore) {
underscoreInLastSegment = true
}
// Otherwise, its a `.`: save the last segment underscore in the
// penultimate segment slot.
else {
underscoreInLastLastSegment = underscoreInLastSegment
underscoreInLastSegment = undefined
}
effects.consume(code)
return domainInside
}
/**
* After domain.
*
* ```markdown
* > | https://example.com/a
* ^
* ```
*
* @type {State} */
function domainAfter(code) {
// Note: thats GH says a dot is needed, but its not true:
// <https://github.com/github/cmark-gfm/issues/279>
if (underscoreInLastLastSegment || underscoreInLastSegment || !seen) {
return nok(code)
}
return ok(code)
}
}
/**
* Path.
*
* ```markdown
* > | a https://example.org/stuff b
* ^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizePath(effects, ok) {
let sizeOpen = 0
let sizeClose = 0
return pathInside
/**
* In path.
*
* ```markdown
* > | https://example.com/a
* ^^
* ```
*
* @type {State}
*/
function pathInside(code) {
if (code === codes.leftParenthesis) {
sizeOpen++
effects.consume(code)
return pathInside
}
// To do: `markdown-rs` also needs this.
// If this is a paren, and there are less closings than openings,
// we dont check for a trail.
if (code === codes.rightParenthesis && sizeClose < sizeOpen) {
return pathAtPunctuation(code)
}
// Check whether this trailing punctuation marker is optionally
// followed by more trailing markers, and then followed
// by an end.
if (
code === codes.exclamationMark ||
code === codes.quotationMark ||
code === codes.ampersand ||
code === codes.apostrophe ||
code === codes.rightParenthesis ||
code === codes.asterisk ||
code === codes.comma ||
code === codes.dot ||
code === codes.colon ||
code === codes.semicolon ||
code === codes.lessThan ||
code === codes.questionMark ||
code === codes.rightSquareBracket ||
code === codes.underscore ||
code === codes.tilde
) {
return effects.check(trail, ok, pathAtPunctuation)(code)
}
if (
code === codes.eof ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
effects.consume(code)
return pathInside
}
/**
* In path, at potential trailing punctuation, that was not trailing.
*
* ```markdown
* > | https://example.com/a"b
* ^
* ```
*
* @type {State}
*/
function pathAtPunctuation(code) {
// Count closing parens.
if (code === codes.rightParenthesis) {
sizeClose++
}
effects.consume(code)
return pathInside
}
}
/**
* Trail.
*
* This calls `ok` if this *is* the trail, followed by an end, which means
* the entire trail is not part of the link.
* It calls `nok` if this *is* part of the link.
*
* ```markdown
* > | https://example.com").
* ^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTrail(effects, ok, nok) {
return trail
/**
* In trail of domain or path.
*
* ```markdown
* > | https://example.com").
* ^
* ```
*
* @type {State}
*/
function trail(code) {
// Regular trailing punctuation.
if (
code === codes.exclamationMark ||
code === codes.quotationMark ||
code === codes.apostrophe ||
code === codes.rightParenthesis ||
code === codes.asterisk ||
code === codes.comma ||
code === codes.dot ||
code === codes.colon ||
code === codes.semicolon ||
code === codes.questionMark ||
code === codes.underscore ||
code === codes.tilde
) {
effects.consume(code)
return trail
}
// `&` followed by one or more alphabeticals and then a `;`, is
// as a whole considered as trailing punctuation.
// In all other cases, it is considered as continuation of the URL.
if (code === codes.ampersand) {
effects.consume(code)
return trailCharacterReferenceStart
}
// Needed because we allow literals after `[`, as we fix:
// <https://github.com/github/cmark-gfm/issues/278>.
// Check that it is not followed by `(` or `[`.
if (code === codes.rightSquareBracket) {
effects.consume(code)
return trailBracketAfter
}
if (
// `<` is an end.
code === codes.lessThan ||
// So is whitespace.
code === codes.eof ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
return nok(code)
}
/**
* In trail, after `]`.
*
* > 👉 **Note**: this deviates from `cmark-gfm` to fix a bug.
* > See end of <https://github.com/github/cmark-gfm/issues/278> for more.
*
* ```markdown
* > | https://example.com](
* ^
* ```
*
* @type {State}
*/
function trailBracketAfter(code) {
// Whitespace or something that could start a resource or reference is the end.
// Switch back to trail otherwise.
if (
code === codes.eof ||
code === codes.leftParenthesis ||
code === codes.leftSquareBracket ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
return trail(code)
}
/**
* In character-reference like trail, after `&`.
*
* ```markdown
* > | https://example.com&amp;).
* ^
* ```
*
* @type {State}
*/
function trailCharacterReferenceStart(code) {
// When non-alpha, its not a trail.
return asciiAlpha(code) ? trailCharacterReferenceInside(code) : nok(code)
}
/**
* In character-reference like trail.
*
* ```markdown
* > | https://example.com&amp;).
* ^
* ```
*
* @type {State}
*/
function trailCharacterReferenceInside(code) {
// Switch back to trail if this is well-formed.
if (code === codes.semicolon) {
effects.consume(code)
return trail
}
if (asciiAlpha(code)) {
effects.consume(code)
return trailCharacterReferenceInside
}
// Its not a trail.
return nok(code)
}
}
/**
* Dot in email domain trail.
*
* This calls `ok` if this *is* the trail, followed by an end, which means
* the trail is not part of the link.
* It calls `nok` if this *is* part of the link.
*
* ```markdown
* > | contact@example.org.
* ^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeEmailDomainDotTrail(effects, ok, nok) {
return start
/**
* Dot.
*
* ```markdown
* > | contact@example.org.
* ^ ^
* ```
*
* @type {State}
*/
function start(code) {
// Must be dot.
effects.consume(code)
return after
}
/**
* After dot.
*
* ```markdown
* > | contact@example.org.
* ^ ^
* ```
*
* @type {State}
*/
function after(code) {
// Not a trail if alphanumeric.
return asciiAlphanumeric(code) ? nok(code) : ok(code)
}
}
/**
* See:
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>.
*
* @type {Previous}
*/
function previousWww(code) {
return (
code === codes.eof ||
code === codes.leftParenthesis ||
code === codes.asterisk ||
code === codes.underscore ||
code === codes.leftSquareBracket ||
code === codes.rightSquareBracket ||
code === codes.tilde ||
markdownLineEndingOrSpace(code)
)
}
/**
* See:
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L214>.
*
* @type {Previous}
*/
function previousProtocol(code) {
return !asciiAlpha(code)
}
/**
* @this {TokenizeContext}
* @type {Previous}
*/
function previousEmail(code) {
// Do not allow a slash “inside” atext.
// The reference code is a bit weird, but thats what it results in.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L307>.
// Other than slash, every preceding character is allowed.
return !(code === codes.slash || gfmAtext(code))
}
/**
* @param {Code} code
* @returns {boolean}
*/
function gfmAtext(code) {
return (
code === codes.plusSign ||
code === codes.dash ||
code === codes.dot ||
code === codes.underscore ||
asciiAlphanumeric(code)
)
}
/**
* @param {Array<Event>} events
* @returns {boolean}
*/
function previousUnbalanced(events) {
let index = events.length
let result = false
while (index--) {
const token = events[index][1]
if (
(token.type === 'labelLink' || token.type === 'labelImage') &&
!token._balanced
) {
result = true
break
}
// If weve seen this token, and it was marked as not having any unbalanced
// bracket before it, we can exit.
if (token._gfmAutolinkLiteralWalkedInto) {
result = false
break
}
}
if (events.length > 0 && !result) {
// Mark the last token as “walked into” w/o finding
// anything.
events[events.length - 1][1]._gfmAutolinkLiteralWalkedInto = true
}
return result
}

View File

@@ -0,0 +1,24 @@
export {gfmAutolinkLiteral} from './lib/syntax.js'
export {gfmAutolinkLiteralHtml} from './lib/html.js'
/**
* Augment types.
*/
declare module 'micromark-util-types' {
/**
* Augment token with a field to improve performance.
*/
interface Token {
_gfmAutolinkLiteralWalkedInto?: boolean
}
/**
* Token types.
*/
interface TokenTypeMap {
literalAutolink: 'literalAutolink'
literalAutolinkEmail: 'literalAutolinkEmail'
literalAutolinkHttp: 'literalAutolinkHttp'
literalAutolinkWww: 'literalAutolinkWww'
}
}

View File

@@ -0,0 +1,2 @@
export { gfmAutolinkLiteral } from './lib/syntax.js';
export { gfmAutolinkLiteralHtml } from './lib/html.js';

View File

@@ -0,0 +1,10 @@
/**
* Create an HTML extension for `micromark` to support GitHub autolink literal
* when serializing to HTML.
*
* @returns {HtmlExtension}
* Extension for `micromark` that can be passed in `htmlExtensions` to
* support GitHub autolink literal when serializing to HTML.
*/
export function gfmAutolinkLiteralHtml(): HtmlExtension;
import type { HtmlExtension } from 'micromark-util-types';

View File

@@ -0,0 +1,60 @@
/**
* @import {CompileContext, Handle, HtmlExtension, Token} from 'micromark-util-types'
*/
import { sanitizeUri } from 'micromark-util-sanitize-uri';
/**
* Create an HTML extension for `micromark` to support GitHub autolink literal
* when serializing to HTML.
*
* @returns {HtmlExtension}
* Extension for `micromark` that can be passed in `htmlExtensions` to
* support GitHub autolink literal when serializing to HTML.
*/
export function gfmAutolinkLiteralHtml() {
return {
exit: {
literalAutolinkEmail,
literalAutolinkHttp,
literalAutolinkWww
}
};
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkWww(token) {
anchorFromToken.call(this, token, 'http://');
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkEmail(token) {
anchorFromToken.call(this, token, 'mailto:');
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkHttp(token) {
anchorFromToken.call(this, token);
}
/**
* @this CompileContext
* @param {Token} token
* @param {string | null | undefined} [protocol]
* @returns {undefined}
*/
function anchorFromToken(token, protocol) {
const url = this.sliceSerialize(token);
this.tag('<a href="' + sanitizeUri((protocol || '') + url) + '">');
this.raw(this.encode(url));
this.tag('</a>');
}

View File

@@ -0,0 +1,10 @@
/**
* Create an extension for `micromark` to support GitHub autolink literal
* syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* autolink literal syntax.
*/
export function gfmAutolinkLiteral(): Extension;
import type { Extension } from 'micromark-util-types';

View File

@@ -0,0 +1,854 @@
/**
* @import {Code, ConstructRecord, Event, Extension, Previous, State, TokenizeContext, Tokenizer} from 'micromark-util-types'
*/
import { asciiAlpha, asciiAlphanumeric, asciiControl, markdownLineEndingOrSpace, unicodePunctuation, unicodeWhitespace } from 'micromark-util-character';
const wwwPrefix = {
tokenize: tokenizeWwwPrefix,
partial: true
};
const domain = {
tokenize: tokenizeDomain,
partial: true
};
const path = {
tokenize: tokenizePath,
partial: true
};
const trail = {
tokenize: tokenizeTrail,
partial: true
};
const emailDomainDotTrail = {
tokenize: tokenizeEmailDomainDotTrail,
partial: true
};
const wwwAutolink = {
name: 'wwwAutolink',
tokenize: tokenizeWwwAutolink,
previous: previousWww
};
const protocolAutolink = {
name: 'protocolAutolink',
tokenize: tokenizeProtocolAutolink,
previous: previousProtocol
};
const emailAutolink = {
name: 'emailAutolink',
tokenize: tokenizeEmailAutolink,
previous: previousEmail
};
/** @type {ConstructRecord} */
const text = {};
/**
* Create an extension for `micromark` to support GitHub autolink literal
* syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* autolink literal syntax.
*/
export function gfmAutolinkLiteral() {
return {
text
};
}
/** @type {Code} */
let code = 48;
// Add alphanumerics.
while (code < 123) {
text[code] = emailAutolink;
code++;
if (code === 58) code = 65;else if (code === 91) code = 97;
}
text[43] = emailAutolink;
text[45] = emailAutolink;
text[46] = emailAutolink;
text[95] = emailAutolink;
text[72] = [emailAutolink, protocolAutolink];
text[104] = [emailAutolink, protocolAutolink];
text[87] = [emailAutolink, wwwAutolink];
text[119] = [emailAutolink, wwwAutolink];
// To do: perform email autolink literals on events, afterwards.
// Thats where `markdown-rs` and `cmark-gfm` perform it.
// It should look for `@`, then for atext backwards, and then for a label
// forwards.
// To do: `mailto:`, `xmpp:` protocol as prefix.
/**
* Email autolink literal.
*
* ```markdown
* > | a contact@example.org b
* ^^^^^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeEmailAutolink(effects, ok, nok) {
const self = this;
/** @type {boolean | undefined} */
let dot;
/** @type {boolean} */
let data;
return start;
/**
* Start of email autolink literal.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function start(code) {
if (!gfmAtext(code) || !previousEmail.call(self, self.previous) || previousUnbalanced(self.events)) {
return nok(code);
}
effects.enter('literalAutolink');
effects.enter('literalAutolinkEmail');
return atext(code);
}
/**
* In email atext.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function atext(code) {
if (gfmAtext(code)) {
effects.consume(code);
return atext;
}
if (code === 64) {
effects.consume(code);
return emailDomain;
}
return nok(code);
}
/**
* In email domain.
*
* The reference code is a bit overly complex as it handles the `@`, of which
* there may be just one.
* Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L318>
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomain(code) {
// Dot followed by alphanumerical (not `-` or `_`).
if (code === 46) {
return effects.check(emailDomainDotTrail, emailDomainAfter, emailDomainDot)(code);
}
// Alphanumerical, `-`, and `_`.
if (code === 45 || code === 95 || asciiAlphanumeric(code)) {
data = true;
effects.consume(code);
return emailDomain;
}
// To do: `/` if xmpp.
// Note: normally wed truncate trailing punctuation from the link.
// However, email autolink literals cannot contain any of those markers,
// except for `.`, but that can only occur if it isnt trailing.
// So we can ignore truncating!
return emailDomainAfter(code);
}
/**
* In email domain, on dot that is not a trail.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomainDot(code) {
effects.consume(code);
dot = true;
return emailDomain;
}
/**
* After email domain.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomainAfter(code) {
// Domain must not be empty, must include a dot, and must end in alphabetical.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L332>.
if (data && dot && asciiAlpha(self.previous)) {
effects.exit('literalAutolinkEmail');
effects.exit('literalAutolink');
return ok(code);
}
return nok(code);
}
}
/**
* `www` autolink literal.
*
* ```markdown
* > | a www.example.org b
* ^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeWwwAutolink(effects, ok, nok) {
const self = this;
return wwwStart;
/**
* Start of www autolink literal.
*
* ```markdown
* > | www.example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function wwwStart(code) {
if (code !== 87 && code !== 119 || !previousWww.call(self, self.previous) || previousUnbalanced(self.events)) {
return nok(code);
}
effects.enter('literalAutolink');
effects.enter('literalAutolinkWww');
// Note: we *check*, so we can discard the `www.` we parsed.
// If it worked, we consider it as a part of the domain.
return effects.check(wwwPrefix, effects.attempt(domain, effects.attempt(path, wwwAfter), nok), nok)(code);
}
/**
* After a www autolink literal.
*
* ```markdown
* > | www.example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function wwwAfter(code) {
effects.exit('literalAutolinkWww');
effects.exit('literalAutolink');
return ok(code);
}
}
/**
* Protocol autolink literal.
*
* ```markdown
* > | a https://example.org b
* ^^^^^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeProtocolAutolink(effects, ok, nok) {
const self = this;
let buffer = '';
let seen = false;
return protocolStart;
/**
* Start of protocol autolink literal.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function protocolStart(code) {
if ((code === 72 || code === 104) && previousProtocol.call(self, self.previous) && !previousUnbalanced(self.events)) {
effects.enter('literalAutolink');
effects.enter('literalAutolinkHttp');
buffer += String.fromCodePoint(code);
effects.consume(code);
return protocolPrefixInside;
}
return nok(code);
}
/**
* In protocol.
*
* ```markdown
* > | https://example.com/a?b#c
* ^^^^^
* ```
*
* @type {State}
*/
function protocolPrefixInside(code) {
// `5` is size of `https`
if (asciiAlpha(code) && buffer.length < 5) {
// @ts-expect-error: definitely number.
buffer += String.fromCodePoint(code);
effects.consume(code);
return protocolPrefixInside;
}
if (code === 58) {
const protocol = buffer.toLowerCase();
if (protocol === 'http' || protocol === 'https') {
effects.consume(code);
return protocolSlashesInside;
}
}
return nok(code);
}
/**
* In slashes.
*
* ```markdown
* > | https://example.com/a?b#c
* ^^
* ```
*
* @type {State}
*/
function protocolSlashesInside(code) {
if (code === 47) {
effects.consume(code);
if (seen) {
return afterProtocol;
}
seen = true;
return protocolSlashesInside;
}
return nok(code);
}
/**
* After protocol, before domain.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function afterProtocol(code) {
// To do: this is different from `markdown-rs`:
// https://github.com/wooorm/markdown-rs/blob/b3a921c761309ae00a51fe348d8a43adbc54b518/src/construct/gfm_autolink_literal.rs#L172-L182
return code === null || asciiControl(code) || markdownLineEndingOrSpace(code) || unicodeWhitespace(code) || unicodePunctuation(code) ? nok(code) : effects.attempt(domain, effects.attempt(path, protocolAfter), nok)(code);
}
/**
* After a protocol autolink literal.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function protocolAfter(code) {
effects.exit('literalAutolinkHttp');
effects.exit('literalAutolink');
return ok(code);
}
}
/**
* `www` prefix.
*
* ```markdown
* > | a www.example.org b
* ^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeWwwPrefix(effects, ok, nok) {
let size = 0;
return wwwPrefixInside;
/**
* In www prefix.
*
* ```markdown
* > | www.example.com
* ^^^^
* ```
*
* @type {State}
*/
function wwwPrefixInside(code) {
if ((code === 87 || code === 119) && size < 3) {
size++;
effects.consume(code);
return wwwPrefixInside;
}
if (code === 46 && size === 3) {
effects.consume(code);
return wwwPrefixAfter;
}
return nok(code);
}
/**
* After www prefix.
*
* ```markdown
* > | www.example.com
* ^
* ```
*
* @type {State}
*/
function wwwPrefixAfter(code) {
// If there is *anything*, we can link.
return code === null ? nok(code) : ok(code);
}
}
/**
* Domain.
*
* ```markdown
* > | a https://example.org b
* ^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeDomain(effects, ok, nok) {
/** @type {boolean | undefined} */
let underscoreInLastSegment;
/** @type {boolean | undefined} */
let underscoreInLastLastSegment;
/** @type {boolean | undefined} */
let seen;
return domainInside;
/**
* In domain.
*
* ```markdown
* > | https://example.com/a
* ^^^^^^^^^^^
* ```
*
* @type {State}
*/
function domainInside(code) {
// Check whether this marker, which is a trailing punctuation
// marker, optionally followed by more trailing markers, and then
// followed by an end.
if (code === 46 || code === 95) {
return effects.check(trail, domainAfter, domainAtPunctuation)(code);
}
// GH documents that only alphanumerics (other than `-`, `.`, and `_`) can
// occur, which sounds like ASCII only, but they also support `www.點看.com`,
// so thats Unicode.
// Instead of some new production for Unicode alphanumerics, markdown
// already has that for Unicode punctuation and whitespace, so use those.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
if (code === null || markdownLineEndingOrSpace(code) || unicodeWhitespace(code) || code !== 45 && unicodePunctuation(code)) {
return domainAfter(code);
}
seen = true;
effects.consume(code);
return domainInside;
}
/**
* In domain, at potential trailing punctuation, that was not trailing.
*
* ```markdown
* > | https://example.com
* ^
* ```
*
* @type {State}
*/
function domainAtPunctuation(code) {
// There is an underscore in the last segment of the domain
if (code === 95) {
underscoreInLastSegment = true;
}
// Otherwise, its a `.`: save the last segment underscore in the
// penultimate segment slot.
else {
underscoreInLastLastSegment = underscoreInLastSegment;
underscoreInLastSegment = undefined;
}
effects.consume(code);
return domainInside;
}
/**
* After domain.
*
* ```markdown
* > | https://example.com/a
* ^
* ```
*
* @type {State} */
function domainAfter(code) {
// Note: thats GH says a dot is needed, but its not true:
// <https://github.com/github/cmark-gfm/issues/279>
if (underscoreInLastLastSegment || underscoreInLastSegment || !seen) {
return nok(code);
}
return ok(code);
}
}
/**
* Path.
*
* ```markdown
* > | a https://example.org/stuff b
* ^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizePath(effects, ok) {
let sizeOpen = 0;
let sizeClose = 0;
return pathInside;
/**
* In path.
*
* ```markdown
* > | https://example.com/a
* ^^
* ```
*
* @type {State}
*/
function pathInside(code) {
if (code === 40) {
sizeOpen++;
effects.consume(code);
return pathInside;
}
// To do: `markdown-rs` also needs this.
// If this is a paren, and there are less closings than openings,
// we dont check for a trail.
if (code === 41 && sizeClose < sizeOpen) {
return pathAtPunctuation(code);
}
// Check whether this trailing punctuation marker is optionally
// followed by more trailing markers, and then followed
// by an end.
if (code === 33 || code === 34 || code === 38 || code === 39 || code === 41 || code === 42 || code === 44 || code === 46 || code === 58 || code === 59 || code === 60 || code === 63 || code === 93 || code === 95 || code === 126) {
return effects.check(trail, ok, pathAtPunctuation)(code);
}
if (code === null || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
return ok(code);
}
effects.consume(code);
return pathInside;
}
/**
* In path, at potential trailing punctuation, that was not trailing.
*
* ```markdown
* > | https://example.com/a"b
* ^
* ```
*
* @type {State}
*/
function pathAtPunctuation(code) {
// Count closing parens.
if (code === 41) {
sizeClose++;
}
effects.consume(code);
return pathInside;
}
}
/**
* Trail.
*
* This calls `ok` if this *is* the trail, followed by an end, which means
* the entire trail is not part of the link.
* It calls `nok` if this *is* part of the link.
*
* ```markdown
* > | https://example.com").
* ^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTrail(effects, ok, nok) {
return trail;
/**
* In trail of domain or path.
*
* ```markdown
* > | https://example.com").
* ^
* ```
*
* @type {State}
*/
function trail(code) {
// Regular trailing punctuation.
if (code === 33 || code === 34 || code === 39 || code === 41 || code === 42 || code === 44 || code === 46 || code === 58 || code === 59 || code === 63 || code === 95 || code === 126) {
effects.consume(code);
return trail;
}
// `&` followed by one or more alphabeticals and then a `;`, is
// as a whole considered as trailing punctuation.
// In all other cases, it is considered as continuation of the URL.
if (code === 38) {
effects.consume(code);
return trailCharacterReferenceStart;
}
// Needed because we allow literals after `[`, as we fix:
// <https://github.com/github/cmark-gfm/issues/278>.
// Check that it is not followed by `(` or `[`.
if (code === 93) {
effects.consume(code);
return trailBracketAfter;
}
if (
// `<` is an end.
code === 60 ||
// So is whitespace.
code === null || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
return ok(code);
}
return nok(code);
}
/**
* In trail, after `]`.
*
* > 👉 **Note**: this deviates from `cmark-gfm` to fix a bug.
* > See end of <https://github.com/github/cmark-gfm/issues/278> for more.
*
* ```markdown
* > | https://example.com](
* ^
* ```
*
* @type {State}
*/
function trailBracketAfter(code) {
// Whitespace or something that could start a resource or reference is the end.
// Switch back to trail otherwise.
if (code === null || code === 40 || code === 91 || markdownLineEndingOrSpace(code) || unicodeWhitespace(code)) {
return ok(code);
}
return trail(code);
}
/**
* In character-reference like trail, after `&`.
*
* ```markdown
* > | https://example.com&amp;).
* ^
* ```
*
* @type {State}
*/
function trailCharacterReferenceStart(code) {
// When non-alpha, its not a trail.
return asciiAlpha(code) ? trailCharacterReferenceInside(code) : nok(code);
}
/**
* In character-reference like trail.
*
* ```markdown
* > | https://example.com&amp;).
* ^
* ```
*
* @type {State}
*/
function trailCharacterReferenceInside(code) {
// Switch back to trail if this is well-formed.
if (code === 59) {
effects.consume(code);
return trail;
}
if (asciiAlpha(code)) {
effects.consume(code);
return trailCharacterReferenceInside;
}
// Its not a trail.
return nok(code);
}
}
/**
* Dot in email domain trail.
*
* This calls `ok` if this *is* the trail, followed by an end, which means
* the trail is not part of the link.
* It calls `nok` if this *is* part of the link.
*
* ```markdown
* > | contact@example.org.
* ^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeEmailDomainDotTrail(effects, ok, nok) {
return start;
/**
* Dot.
*
* ```markdown
* > | contact@example.org.
* ^ ^
* ```
*
* @type {State}
*/
function start(code) {
// Must be dot.
effects.consume(code);
return after;
}
/**
* After dot.
*
* ```markdown
* > | contact@example.org.
* ^ ^
* ```
*
* @type {State}
*/
function after(code) {
// Not a trail if alphanumeric.
return asciiAlphanumeric(code) ? nok(code) : ok(code);
}
}
/**
* See:
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>.
*
* @type {Previous}
*/
function previousWww(code) {
return code === null || code === 40 || code === 42 || code === 95 || code === 91 || code === 93 || code === 126 || markdownLineEndingOrSpace(code);
}
/**
* See:
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L214>.
*
* @type {Previous}
*/
function previousProtocol(code) {
return !asciiAlpha(code);
}
/**
* @this {TokenizeContext}
* @type {Previous}
*/
function previousEmail(code) {
// Do not allow a slash “inside” atext.
// The reference code is a bit weird, but thats what it results in.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L307>.
// Other than slash, every preceding character is allowed.
return !(code === 47 || gfmAtext(code));
}
/**
* @param {Code} code
* @returns {boolean}
*/
function gfmAtext(code) {
return code === 43 || code === 45 || code === 46 || code === 95 || asciiAlphanumeric(code);
}
/**
* @param {Array<Event>} events
* @returns {boolean}
*/
function previousUnbalanced(events) {
let index = events.length;
let result = false;
while (index--) {
const token = events[index][1];
if ((token.type === 'labelLink' || token.type === 'labelImage') && !token._balanced) {
result = true;
break;
}
// If weve seen this token, and it was marked as not having any unbalanced
// bracket before it, we can exit.
if (token._gfmAutolinkLiteralWalkedInto) {
result = false;
break;
}
}
if (events.length > 0 && !result) {
// Mark the last token as “walked into” w/o finding
// anything.
events[events.length - 1][1]._gfmAutolinkLiteralWalkedInto = true;
}
return result;
}

View File

@@ -0,0 +1,22 @@
(The MIT License)
Copyright (c) 2020 Titus Wormer <tituswormer@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,116 @@
{
"name": "micromark-extension-gfm-autolink-literal",
"version": "2.1.0",
"description": "micromark extension to support GFM autolink literals",
"license": "MIT",
"keywords": [
"micromark",
"micromark-extension",
"literal",
"url",
"autolink",
"auto",
"link",
"gfm",
"markdown",
"unified"
],
"repository": "micromark/micromark-extension-gfm-autolink-literal",
"bugs": "https://github.com/micromark/micromark-extension-gfm-autolink-literal/issues",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
},
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
],
"sideEffects": false,
"type": "module",
"files": [
"dev/",
"lib/",
"index.d.ts",
"index.js"
],
"exports": {
"development": "./dev/index.js",
"default": "./index.js"
},
"dependencies": {
"micromark-util-character": "^2.0.0",
"micromark-util-sanitize-uri": "^2.0.0",
"micromark-util-symbol": "^2.0.0",
"micromark-util-types": "^2.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"c8": "^10.0.0",
"create-gfm-fixtures": "^1.0.0",
"micromark": "^4.0.0",
"micromark-build": "^2.0.0",
"prettier": "^3.0.0",
"rehype": "^13.0.0",
"remark-cli": "^12.0.0",
"remark-preset-wooorm": "^10.0.0",
"type-coverage": "^2.0.0",
"typescript": "^5.0.0",
"xo": "^0.58.0"
},
"scripts": {
"prepack": "npm run build && npm run format",
"build": "tsc --build --clean && tsc --build && type-coverage && micromark-build",
"format": "remark . -qfo && prettier . -w --log-level warn && xo --fix",
"test-api-prod": "node --conditions production test/index.js",
"test-api-dev": "node --conditions development test/index.js",
"test-api": "npm run test-api-dev && npm run test-api-prod",
"test-coverage": "c8 --100 --reporter lcov npm run test-api",
"test": "npm run build && npm run format && npm run test-coverage"
},
"prettier": {
"bracketSpacing": false,
"semi": false,
"singleQuote": true,
"tabWidth": 2,
"trailingComma": "none",
"useTabs": false
},
"remarkConfig": {
"plugins": [
"remark-preset-wooorm"
]
},
"typeCoverage": {
"atLeast": 100,
"detail": true,
"ignoreCatch": true,
"strict": true
},
"xo": {
"prettier": true,
"rules": {
"complexity": "off",
"unicorn/no-this-assignment": "off",
"unicorn/prefer-at": "off",
"unicorn/prefer-string-replace-all": "off"
},
"overrides": [
{
"files": [
"**/*.ts"
],
"rules": {
"@typescript-eslint/consistent-type-definitions": 0
}
},
{
"files": [
"test/**/*.js"
],
"rules": {
"no-await-in-loop": 0
}
}
]
}
}

View File

@@ -0,0 +1,422 @@
# micromark-extension-gfm-autolink-literal
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][size-badge]][size]
[![Sponsors][sponsors-badge]][collective]
[![Backers][backers-badge]][collective]
[![Chat][chat-badge]][chat]
[micromark][] extensions to support GFM [literal autolinks][spec].
## Contents
* [What is this?](#what-is-this)
* [When to use this](#when-to-use-this)
* [Install](#install)
* [Use](#use)
* [API](#api)
* [`gfmAutolinkLiteral()`](#gfmautolinkliteral)
* [`gfmAutolinkLiteralHtml()`](#gfmautolinkliteralhtml)
* [Bugs](#bugs)
* [Authoring](#authoring)
* [HTML](#html)
* [CSS](#css)
* [Syntax](#syntax)
* [Types](#types)
* [Compatibility](#compatibility)
* [Security](#security)
* [Related](#related)
* [Contribute](#contribute)
* [License](#license)
## What is this?
This package contains extensions that add support for the extra autolink syntax
enabled by GFM to [`micromark`][micromark].
GitHub employs different algorithms to autolink: one at parse time and one at
transform time (similar to how @mentions are done at transform time).
This difference can be observed because character references and escapes are
handled differently.
But also because issues/PRs/comments omit (perhaps by accident?) the second
algorithm for `www.`, `http://`, and `https://` links (but not for email links).
As this is a syntax extension, it focuses on the first algorithm.
The second algorithm is performed by
[`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal].
The `html` part of this micromark extension does not operate on an AST and hence
cant perform the second algorithm.
The implementation of autolink literal on github.com is currently buggy.
The bugs have been reported on [`cmark-gfm`][cmark-gfm].
This micromark extension matches github.com except for its bugs.
## When to use this
This project is useful when you want to support autolink literals in markdown.
You can use these extensions when you are working with [`micromark`][micromark].
To support all GFM features, use
[`micromark-extension-gfm`][micromark-extension-gfm] instead.
When you need a syntax tree, combine this package with
[`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal].
All these packages are used in [`remark-gfm`][remark-gfm], which focusses on
making it easier to transform content by abstracting these internals away.
## Install
This package is [ESM only][esm].
In Node.js (version 16+), install with [npm][]:
```sh
npm install micromark-extension-gfm-autolink-literal
```
In Deno with [`esm.sh`][esmsh]:
```js
import {gfmAutolinkLiteral, gfmAutolinkLiteralHtml} from 'https://esm.sh/micromark-extension-gfm-autolink-literal@2'
```
In browsers with [`esm.sh`][esmsh]:
```html
<script type="module">
import {gfmAutolinkLiteral, gfmAutolinkLiteralHtml} from 'https://esm.sh/micromark-extension-gfm-autolink-literal@2?bundle'
</script>
```
## Use
```js
import {micromark} from 'micromark'
import {
gfmAutolinkLiteral,
gfmAutolinkLiteralHtml
} from 'micromark-extension-gfm-autolink-literal'
const output = micromark('Just a URL: www.example.com.', {
extensions: [gfmAutolinkLiteral()],
htmlExtensions: [gfmAutolinkLiteralHtml()]
})
console.log(output)
```
Yields:
```html
<p>Just a URL: <a href="http://www.example.com">www.example.com</a>.</p>
```
## API
This package exports the identifiers
[`gfmAutolinkLiteral`][api-gfm-autolink-literal] and
[`gfmAutolinkLiteralHtml`][api-gfm-autolink-literal-html].
There is no default export.
The export map supports the [`development` condition][development].
Run `node --conditions development module.js` to get instrumented dev code.
Without this condition, production code is loaded.
### `gfmAutolinkLiteral()`
Create an extension for `micromark` to support GitHub autolink literal
syntax.
###### Parameters
Extension for `micromark` that can be passed in `extensions` to enable GFM
autolink literal syntax ([`Extension`][micromark-extension]).
### `gfmAutolinkLiteralHtml()`
Create an HTML extension for `micromark` to support GitHub autolink literal
when serializing to HTML.
###### Parameters
Extension for `micromark` that can be passed in `htmlExtensions` to support
GitHub autolink literal when serializing to HTML
([`HtmlExtension`][micromark-html-extension]).
## Bugs
GitHubs own algorithm to parse autolink literals contains three bugs.
A smaller bug is left unfixed in this project for consistency.
Two main bugs are not present in this project.
The issues relating to autolink literals are:
* [GFM autolink extension (`www.`, `https?://` parts): links dont work when
after bracket](https://github.com/github/cmark-gfm/issues/278)\
fixed here ✅
* [GFM autolink extension (`www.` part): uppercase does not match on
issues/PRs/comments](https://github.com/github/cmark-gfm/issues/280)\
fixed here ✅
* [GFM autolink extension (`www.` part): the word `www`
matches](https://github.com/github/cmark-gfm/issues/279)\
present here for consistency
## Authoring
It is recommended to use labels, either with a resource or a definition,
instead of autolink literals, as those allow relative URLs and descriptive
text to explain the URL in prose.
## HTML
GFM autolink literals relate to the `<a>` element in HTML.
See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info.
When an email autolink is used, the string `mailto:` is prepended when
generating the `href` attribute of the hyperlink.
When a www autolink is used, the string `http://` is prepended.
## CSS
As hyperlinks are the fundamental thing that makes the web, you will most
definitely have CSS for `a` elements already.
The same CSS can be used for autolink literals, too.
GitHub itself does not apply interesting CSS to autolink literals.
For any link, it currently (June 2022) [uses][css]:
```css
a {
background-color: transparent;
color: #58a6ff;
text-decoration: none;
}
a:active,
a:hover {
outline-width: 0;
}
a:hover {
text-decoration: underline;
}
a:not([href]) {
color: inherit;
text-decoration: none;
}
```
## Syntax
Autolink literals form with, roughly, the following BNF:
```bnf
gfm_autolink_literal ::= gfm_protocol_autolink | gfm_www_autolink | gfm_email_autolink
; Restriction: the code before must be `www_autolink_before`.
; Restriction: the code after `.` must not be eof.
www_autolink ::= 3('w' | 'W') '.' [domain [path]]
www_autolink_before ::= eof | eol | space_or_tab | '(' | '*' | '_' | '[' | ']' | '~'
; Restriction: the code before must be `http_autolink_before`.
; Restriction: the code after the protocol must be `http_autolink_protocol_after`.
http_autolink ::= ('h' | 'H') 2('t' | 'T') ('p' | 'P') ['s' | 'S'] ':' 2'/' domain [path]
http_autolink_before ::= byte - ascii_alpha
http_autolink_protocol_after ::= byte - eof - eol - ascii_control - unicode_whitespace - ode_punctuation
; Restriction: the code before must be `email_autolink_before`.
; Restriction: `ascii_digit` may not occur in the last label part of the label.
email_autolink ::= 1*('+' | '-' | '.' | '_' | ascii_alphanumeric) '@' 1*(1*label_segment l_dot_cont) 1*label_segment
email_autolink_before ::= byte - ascii_alpha - '/'
; Restriction: `_` may not occur in the last two domain parts.
domain ::= 1*(url_ampt_cont | domain_punct_cont | '-' | byte - eof - ascii_control - ode_whitespace - unicode_punctuation)
; Restriction: must not be followed by `punct`.
domain_punct_cont ::= '.' | '_'
; Restriction: must not be followed by `char-ref`.
url_ampt_cont ::= '&'
; Restriction: a counter `balance = 0` is increased for every `(`, and decreased for every `)`.
; Restriction: `)` must not be `paren_at_end`.
path ::= 1*(url_ampt_cont | path_punctuation_cont | '(' | ')' | byte - eof - eol - space_or_tab)
; Restriction: must not be followed by `punct`.
path_punctuation_cont ::= trailing_punctuation - '<'
; Restriction: must be followed by `punct` and `balance` must be less than `0`.
paren_at_end ::= ')'
label_segment ::= label_dash_underscore_cont | ascii_alpha | ascii_digit
; Restriction: if followed by `punct`, the whole email autolink is invalid.
label_dash_underscore_cont ::= '-' | '_'
; Restriction: must not be followed by `punct`.
label_dot_cont ::= '.'
punct ::= *trailing_punctuation ( byte - eof - eol - space_or_tab - '<' )
char_ref ::= *ascii_alpha ';' path_end
trailing_punctuation ::= '!' | '"' | '\'' | ')' | '*' | ',' | '.' | ':' | ';' | '<' | '?' | '_' | '~'
```
The grammar for GFM autolink literal is very relaxed: basically anything
except for whitespace is allowed after a prefix.
To use whitespace characters and otherwise impossible characters, in URLs,
you can use percent encoding:
```markdown
https://example.com/alpha%20bravo
```
Yields:
```html
<p><a href="https://example.com/alpha%20bravo">https://example.com/alpha%20bravo</a></p>
```
There are several cases where incorrect encoding of URLs would, in other
languages, result in a parse error.
In markdown, there are no errors, and URLs are normalized.
In addition, many characters are percent encoded
([`sanitizeUri`][micromark-util-sanitize-uri]).
For example:
```markdown
www.a👍b%
```
Yields:
```html
<p><a href="http://www.a%F0%9F%91%8Db%25">www.a👍b%</a></p>
```
There is a big difference between how www and protocol literals work
compared to how email literals work.
The first two are done when parsing, and work like anything else in
markdown.
But email literals are handled afterwards: when everything is parsed, we
look back at the events to figure out if there were email addresses.
This particularly affects how they interleave with character escapes and
character references.
## Types
This package is fully typed with [TypeScript][].
It exports no additional types.
## Compatibility
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line,
`micromark-extension-gfm-autolink-literal@^2`, compatible with Node.js 16.
This package works with `micromark` version `3` and later.
## Security
This package is safe.
Unlike other links in CommonMark, which allow arbitrary protocols, this
construct always produces safe links.
## Related
* [`micromark-extension-gfm`][micromark-extension-gfm]
— support all of GFM
* [`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal]
— support all of GFM in mdast
* [`mdast-util-gfm`][mdast-util-gfm]
— support all of GFM in mdast
* [`remark-gfm`][remark-gfm]
— support all of GFM in remark
## Contribute
See [`contributing.md` in `micromark/.github`][contributing] for ways to get
started.
See [`support.md`][support] for ways to get help.
This project has a [code of conduct][coc].
By interacting with this repository, organization, or community you agree to
abide by its terms.
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://github.com/micromark/micromark-extension-gfm-autolink-literal/workflows/main/badge.svg
[build]: https://github.com/micromark/micromark-extension-gfm-autolink-literal/actions
[coverage-badge]: https://img.shields.io/codecov/c/github/micromark/micromark-extension-gfm-autolink-literal.svg
[coverage]: https://codecov.io/github/micromark/micromark-extension-gfm-autolink-literal
[downloads-badge]: https://img.shields.io/npm/dm/micromark-extension-gfm-autolink-literal.svg
[downloads]: https://www.npmjs.com/package/micromark-extension-gfm-autolink-literal
[size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=micromark-extension-gfm-autolink-literal
[size]: https://bundlejs.com/?q=micromark-extension-gfm-autolink-literal
[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg
[backers-badge]: https://opencollective.com/unified/backers/badge.svg
[collective]: https://opencollective.com/unified
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://github.com/micromark/micromark/discussions
[npm]: https://docs.npmjs.com/cli/install
[esmsh]: https://esm.sh
[license]: license
[author]: https://wooorm.com
[contributing]: https://github.com/micromark/.github/blob/main/contributing.md
[support]: https://github.com/micromark/.github/blob/main/support.md
[coc]: https://github.com/micromark/.github/blob/main/code-of-conduct.md
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[typescript]: https://www.typescriptlang.org
[development]: https://nodejs.org/api/packages.html#packages_resolving_user_conditions
[micromark]: https://github.com/micromark/micromark
[micromark-extension-gfm]: https://github.com/micromark/micromark-extension-gfm
[micromark-util-sanitize-uri]: https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri
[micromark-extension]: https://github.com/micromark/micromark#syntaxextension
[micromark-html-extension]: https://github.com/micromark/micromark#htmlextension
[mdast-util-gfm]: https://github.com/syntax-tree/mdast-util-gfm
[mdast-util-gfm-autolink-literal]: https://github.com/syntax-tree/mdast-util-gfm-autolink-literal
[remark-gfm]: https://github.com/remarkjs/remark-gfm
[spec]: https://github.github.com/gfm/#autolinks-extension-
[html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
[css]: https://github.com/sindresorhus/github-markdown-css
[cmark-gfm]: https://github.com/github/cmark-gfm
[api-gfm-autolink-literal]: #gfmautolinkliteral
[api-gfm-autolink-literal-html]: #gfmautolinkliteralhtml