Refactor routing in App component to enhance navigation and improve error handling by integrating dynamic routes and updating the NotFound route.
This commit is contained in:
11
node_modules/parse-latin/lib/expressions.d.ts
generated
vendored
Normal file
11
node_modules/parse-latin/lib/expressions.d.ts
generated
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
export const affixSymbol: RegExp;
|
||||
export const newLine: RegExp;
|
||||
export const terminalMarker: RegExp;
|
||||
export const wordSymbolInner: RegExp;
|
||||
export const numerical: RegExp;
|
||||
export const digitStart: RegExp;
|
||||
export const lowerInitial: RegExp;
|
||||
export const surrogates: RegExp;
|
||||
export const punctuation: RegExp;
|
||||
export const word: RegExp;
|
||||
export const whiteSpace: RegExp;
|
19
node_modules/parse-latin/lib/expressions.js
generated
vendored
Normal file
19
node_modules/parse-latin/lib/expressions.js
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
82
node_modules/parse-latin/lib/index.d.ts
generated
vendored
Normal file
82
node_modules/parse-latin/lib/index.d.ts
generated
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
* Create a new parser.
|
||||
*/
|
||||
export class ParseLatin {
|
||||
/**
|
||||
* Create a new parser.
|
||||
*
|
||||
* This additionally supports `retext`-like call: where an instance is
|
||||
* created for each file, and the file is given on construction.
|
||||
*
|
||||
* @param {string | null | undefined} [doc]
|
||||
* Value to parse (optional).
|
||||
* @param {VFile | null | undefined} [file]
|
||||
* Corresponding file (optional).
|
||||
*/
|
||||
constructor(doc?: string | null | undefined, file?: VFile | null | undefined);
|
||||
/** @type {string | undefined} */
|
||||
doc: string | undefined;
|
||||
/** @type {Array<Plugin<Root>>} */
|
||||
tokenizeRootPlugins: Array<Plugin<Root>>;
|
||||
/** @type {Array<Plugin<Paragraph>>} */
|
||||
tokenizeParagraphPlugins: Array<Plugin<Paragraph>>;
|
||||
/** @type {Array<Plugin<Sentence>>} */
|
||||
tokenizeSentencePlugins: Array<Plugin<Sentence>>;
|
||||
/**
|
||||
* Turn natural language into a syntax tree.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Root}
|
||||
* Tree.
|
||||
*/
|
||||
parse(value?: string | null | undefined): Root;
|
||||
/**
|
||||
* Parse as a root.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Root}
|
||||
* Built tree.
|
||||
*/
|
||||
tokenizeRoot(value?: string | null | undefined): Root;
|
||||
/**
|
||||
* Parse as a paragraph.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Paragraph}
|
||||
* Built tree.
|
||||
*/
|
||||
tokenizeParagraph(value?: string | null | undefined): Paragraph;
|
||||
/**
|
||||
* Parse as a sentence.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Sentence}
|
||||
* Built tree.
|
||||
*/
|
||||
tokenizeSentence(value?: string | null | undefined): Sentence;
|
||||
/**
|
||||
* Transform a `value` into a list of nlcsts.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Array<SentenceContent>}
|
||||
* Built sentence content.
|
||||
*/
|
||||
tokenize(value?: string | null | undefined): Array<SentenceContent>;
|
||||
}
|
||||
export type Nodes = import('nlcst').Nodes;
|
||||
export type Parents = import('nlcst').Parents;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
||||
export type Root = import('nlcst').Root;
|
||||
export type RootContent = import('nlcst').RootContent;
|
||||
export type Sentence = import('nlcst').Sentence;
|
||||
export type SentenceContent = import('nlcst').SentenceContent;
|
||||
export type VFile = import('vfile').VFile;
|
||||
/**
|
||||
* Transform a node.
|
||||
*/
|
||||
export type Plugin<Node extends import("nlcst").Nodes> = (node: Node) => undefined | void;
|
352
node_modules/parse-latin/lib/index.js
generated
vendored
Normal file
352
node_modules/parse-latin/lib/index.js
generated
vendored
Normal file
@@ -0,0 +1,352 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Nodes} Nodes
|
||||
* @typedef {import('nlcst').Parents} Parents
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
* @typedef {import('nlcst').Root} Root
|
||||
* @typedef {import('nlcst').RootContent} RootContent
|
||||
* @typedef {import('nlcst').Sentence} Sentence
|
||||
* @typedef {import('nlcst').SentenceContent} SentenceContent
|
||||
* @typedef {import('vfile').VFile} VFile
|
||||
*/
|
||||
|
||||
/**
|
||||
* @template {Nodes} Node
|
||||
* Node type.
|
||||
* @callback Plugin
|
||||
* Transform a node.
|
||||
* @param {Node} node
|
||||
* The node.
|
||||
* @returns {undefined | void}
|
||||
* Nothing.
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {mergeAffixExceptions} from './plugin/merge-affix-exceptions.js'
|
||||
import {mergeAffixSymbol} from './plugin/merge-affix-symbol.js'
|
||||
import {breakImplicitSentences} from './plugin/break-implicit-sentences.js'
|
||||
import {makeFinalWhiteSpaceSiblings} from './plugin/make-final-white-space-siblings.js'
|
||||
import {makeInitialWhiteSpaceSiblings} from './plugin/make-initial-white-space-siblings.js'
|
||||
import {mergeFinalWordSymbol} from './plugin/merge-final-word-symbol.js'
|
||||
import {mergeInitialDigitSentences} from './plugin/merge-initial-digit-sentences.js'
|
||||
import {mergeInitialLowerCaseLetterSentences} from './plugin/merge-initial-lower-case-letter-sentences.js'
|
||||
import {mergeInitialWordSymbol} from './plugin/merge-initial-word-symbol.js'
|
||||
import {mergeInitialisms} from './plugin/merge-initialisms.js'
|
||||
import {mergeInnerWordSymbol} from './plugin/merge-inner-word-symbol.js'
|
||||
import {mergeInnerWordSlash} from './plugin/merge-inner-word-slash.js'
|
||||
import {mergeNonWordSentences} from './plugin/merge-non-word-sentences.js'
|
||||
import {mergePrefixExceptions} from './plugin/merge-prefix-exceptions.js'
|
||||
import {mergeRemainingFullStops} from './plugin/merge-remaining-full-stops.js'
|
||||
import {removeEmptyNodes} from './plugin/remove-empty-nodes.js'
|
||||
import {patchPosition} from './plugin/patch-position.js'
|
||||
import {
|
||||
newLine,
|
||||
punctuation,
|
||||
surrogates,
|
||||
terminalMarker,
|
||||
whiteSpace,
|
||||
word
|
||||
} from './expressions.js'
|
||||
|
||||
// PARSE LATIN
|
||||
|
||||
/**
|
||||
* Create a new parser.
|
||||
*/
|
||||
export class ParseLatin {
|
||||
/**
|
||||
* Create a new parser.
|
||||
*
|
||||
* This additionally supports `retext`-like call: where an instance is
|
||||
* created for each file, and the file is given on construction.
|
||||
*
|
||||
* @param {string | null | undefined} [doc]
|
||||
* Value to parse (optional).
|
||||
* @param {VFile | null | undefined} [file]
|
||||
* Corresponding file (optional).
|
||||
*/
|
||||
constructor(doc, file) {
|
||||
const value = file || doc
|
||||
|
||||
/** @type {string | undefined} */
|
||||
this.doc = value ? String(value) : undefined
|
||||
|
||||
/** @type {Array<Plugin<Root>>} */
|
||||
this.tokenizeRootPlugins = [...this.tokenizeRootPlugins]
|
||||
/** @type {Array<Plugin<Paragraph>>} */
|
||||
this.tokenizeParagraphPlugins = [...this.tokenizeParagraphPlugins]
|
||||
/** @type {Array<Plugin<Sentence>>} */
|
||||
this.tokenizeSentencePlugins = [...this.tokenizeSentencePlugins]
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn natural language into a syntax tree.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Root}
|
||||
* Tree.
|
||||
*/
|
||||
parse(value) {
|
||||
return this.tokenizeRoot(value || this.doc)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse as a root.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Root}
|
||||
* Built tree.
|
||||
*/
|
||||
tokenizeRoot(value) {
|
||||
const paragraph = this.tokenizeParagraph(value)
|
||||
/** @type {Root} */
|
||||
const result = {
|
||||
type: 'RootNode',
|
||||
children: splitNode(paragraph, 'WhiteSpaceNode', newLine)
|
||||
}
|
||||
|
||||
let index = -1
|
||||
while (this.tokenizeRootPlugins[++index]) {
|
||||
this.tokenizeRootPlugins[index](result)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse as a paragraph.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Paragraph}
|
||||
* Built tree.
|
||||
*/
|
||||
tokenizeParagraph(value) {
|
||||
const sentence = this.tokenizeSentence(value)
|
||||
/** @type {Paragraph} */
|
||||
const result = {
|
||||
type: 'ParagraphNode',
|
||||
children: splitNode(sentence, 'PunctuationNode', terminalMarker)
|
||||
}
|
||||
|
||||
let index = -1
|
||||
while (this.tokenizeParagraphPlugins[++index]) {
|
||||
this.tokenizeParagraphPlugins[index](result)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse as a sentence.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Sentence}
|
||||
* Built tree.
|
||||
*/
|
||||
tokenizeSentence(value) {
|
||||
const children = this.tokenize(value)
|
||||
/** @type {Sentence} */
|
||||
const result = {type: 'SentenceNode', children}
|
||||
|
||||
let index = -1
|
||||
while (this.tokenizeSentencePlugins[++index]) {
|
||||
this.tokenizeSentencePlugins[index](result)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform a `value` into a list of nlcsts.
|
||||
*
|
||||
* @param {string | null | undefined} [value]
|
||||
* Value to parse (optional).
|
||||
* @returns {Array<SentenceContent>}
|
||||
* Built sentence content.
|
||||
*/
|
||||
tokenize(value) {
|
||||
/** @type {Array<SentenceContent>} */
|
||||
const children = []
|
||||
|
||||
if (!value) {
|
||||
return children
|
||||
}
|
||||
|
||||
const currentPoint = {line: 1, column: 1, offset: 0}
|
||||
let from = 0
|
||||
let index = 0
|
||||
let start = {...currentPoint}
|
||||
/** @type {SentenceContent['type'] | undefined} */
|
||||
let previousType
|
||||
/** @type {string | undefined} */
|
||||
let previous
|
||||
|
||||
while (index < value.length) {
|
||||
const current = value.charAt(index)
|
||||
const currentType = whiteSpace.test(current)
|
||||
? 'WhiteSpaceNode'
|
||||
: punctuation.test(current)
|
||||
? 'PunctuationNode'
|
||||
: word.test(current)
|
||||
? 'WordNode'
|
||||
: 'SymbolNode'
|
||||
|
||||
if (
|
||||
from < index &&
|
||||
previousType &&
|
||||
currentType &&
|
||||
!(
|
||||
previousType === currentType &&
|
||||
// Words or white space continue.
|
||||
(previousType === 'WordNode' ||
|
||||
previousType === 'WhiteSpaceNode' ||
|
||||
// Same character of punctuation or symbol also continues.
|
||||
current === previous ||
|
||||
// Surrogates of punctuation or symbol also continue.
|
||||
surrogates.test(current))
|
||||
)
|
||||
) {
|
||||
// Flush the previous queue.
|
||||
children.push(createNode(previousType, value.slice(from, index)))
|
||||
from = index
|
||||
start = {...currentPoint}
|
||||
}
|
||||
|
||||
if (current === '\r' || (current === '\n' && previous !== '\r')) {
|
||||
currentPoint.line++
|
||||
currentPoint.column = 1
|
||||
} else if (current !== '\n') {
|
||||
currentPoint.column++
|
||||
}
|
||||
|
||||
currentPoint.offset++
|
||||
previousType = currentType
|
||||
previous = current
|
||||
index++
|
||||
}
|
||||
|
||||
if (previousType && from < index) {
|
||||
children.push(createNode(previousType, value.slice(from, index)))
|
||||
}
|
||||
|
||||
return children
|
||||
|
||||
/**
|
||||
* @param {SentenceContent['type']} type
|
||||
* Node type to build.
|
||||
* @param {string} value
|
||||
* Value.
|
||||
* @returns {SentenceContent}
|
||||
* Node.
|
||||
*/
|
||||
function createNode(type, value) {
|
||||
return type === 'WordNode'
|
||||
? {
|
||||
type: 'WordNode',
|
||||
children: [
|
||||
{
|
||||
type: 'TextNode',
|
||||
value,
|
||||
position: {start, end: {...currentPoint}}
|
||||
}
|
||||
],
|
||||
position: {start, end: {...currentPoint}}
|
||||
}
|
||||
: {type, value, position: {start, end: {...currentPoint}}}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List of transforms handling a sentence.
|
||||
*/
|
||||
ParseLatin.prototype.tokenizeSentencePlugins = [
|
||||
mergeInitialWordSymbol,
|
||||
mergeFinalWordSymbol,
|
||||
mergeInnerWordSymbol,
|
||||
mergeInnerWordSlash,
|
||||
mergeInitialisms,
|
||||
patchPosition
|
||||
]
|
||||
|
||||
/**
|
||||
* List of transforms handling a paragraph.
|
||||
*/
|
||||
ParseLatin.prototype.tokenizeParagraphPlugins = [
|
||||
mergeNonWordSentences,
|
||||
mergeAffixSymbol,
|
||||
mergeInitialLowerCaseLetterSentences,
|
||||
mergeInitialDigitSentences,
|
||||
mergePrefixExceptions,
|
||||
mergeAffixExceptions,
|
||||
mergeRemainingFullStops,
|
||||
makeInitialWhiteSpaceSiblings,
|
||||
makeFinalWhiteSpaceSiblings,
|
||||
breakImplicitSentences,
|
||||
removeEmptyNodes,
|
||||
patchPosition
|
||||
]
|
||||
|
||||
/**
|
||||
* List of transforms handling a root.
|
||||
*/
|
||||
ParseLatin.prototype.tokenizeRootPlugins = [
|
||||
makeInitialWhiteSpaceSiblings,
|
||||
makeFinalWhiteSpaceSiblings,
|
||||
removeEmptyNodes,
|
||||
patchPosition
|
||||
]
|
||||
|
||||
/**
|
||||
* A function that splits one node into several nodes.
|
||||
*
|
||||
* @template {Parents} Node
|
||||
* Node type.
|
||||
* @param {Node} node
|
||||
* Node to split.
|
||||
* @param {RegExp} expression
|
||||
* Split on this regex.
|
||||
* @param {Node['children'][number]['type']} childType
|
||||
* Split this node type.
|
||||
* @returns {Array<Node>}
|
||||
* The given node, split into several nodes.
|
||||
*/
|
||||
function splitNode(node, childType, expression) {
|
||||
/** @type {Array<Node>} */
|
||||
const result = []
|
||||
let index = -1
|
||||
let start = 0
|
||||
|
||||
while (++index < node.children.length) {
|
||||
const token = node.children[index]
|
||||
|
||||
if (
|
||||
index === node.children.length - 1 ||
|
||||
(token.type === childType && expression.test(toString(token)))
|
||||
) {
|
||||
/** @type {Node} */
|
||||
// @ts-expect-error: fine
|
||||
const parent = {
|
||||
type: node.type,
|
||||
children: node.children.slice(start, index + 1)
|
||||
}
|
||||
|
||||
const first = node.children[start]
|
||||
const last = token
|
||||
if (first.position && last.position) {
|
||||
parent.position = {
|
||||
start: first.position.start,
|
||||
end: last.position.end
|
||||
}
|
||||
}
|
||||
|
||||
result.push(parent)
|
||||
start = index + 1
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
3
node_modules/parse-latin/lib/plugin/break-implicit-sentences.d.ts
generated
vendored
Normal file
3
node_modules/parse-latin/lib/plugin/break-implicit-sentences.d.ts
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
export const breakImplicitSentences: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
||||
export type Sentence = import('nlcst').Sentence;
|
59
node_modules/parse-latin/lib/plugin/break-implicit-sentences.js
generated
vendored
Normal file
59
node_modules/parse-latin/lib/plugin/break-implicit-sentences.js
generated
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
* @typedef {import('nlcst').Sentence} Sentence
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Break a sentence if a white space with more than one new-line is found.
|
||||
export const breakImplicitSentences = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph>}
|
||||
*/
|
||||
|
||||
function (child, index, parent) {
|
||||
if (child.type !== 'SentenceNode') {
|
||||
return
|
||||
}
|
||||
|
||||
const children = child.children
|
||||
|
||||
// Ignore first and last child.
|
||||
let position = 0
|
||||
|
||||
while (++position < children.length - 1) {
|
||||
const node = children[position]
|
||||
|
||||
if (
|
||||
node.type !== 'WhiteSpaceNode' ||
|
||||
toString(node).split(/\r\n|\r|\n/).length < 3
|
||||
) {
|
||||
continue
|
||||
}
|
||||
|
||||
child.children = children.slice(0, position)
|
||||
|
||||
/** @type {Sentence} */
|
||||
const insertion = {
|
||||
type: 'SentenceNode',
|
||||
children: children.slice(position + 1)
|
||||
}
|
||||
|
||||
const tail = children[position - 1]
|
||||
const head = children[position + 1]
|
||||
|
||||
parent.children.splice(index + 1, 0, node, insertion)
|
||||
|
||||
if (child.position && tail.position && head.position) {
|
||||
const end = child.position.end
|
||||
|
||||
child.position.end = tail.position.end
|
||||
|
||||
insertion.position = {start: head.position.start, end}
|
||||
}
|
||||
|
||||
return index + 1
|
||||
}
|
||||
}
|
||||
)
|
3
node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.d.ts
generated
vendored
Normal file
3
node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.d.ts
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
export const makeFinalWhiteSpaceSiblings: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph | import("nlcst").Root>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
||||
export type Root = import('nlcst').Root;
|
33
node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.js
generated
vendored
Normal file
33
node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.js
generated
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
* @typedef {import('nlcst').Root} Root
|
||||
*/
|
||||
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Move white space ending a paragraph up, so they are the siblings of
|
||||
// paragraphs.
|
||||
export const makeFinalWhiteSpaceSiblings = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph | Root>}
|
||||
*/
|
||||
|
||||
function (child, index, parent) {
|
||||
if ('children' in child) {
|
||||
const tail = child.children[child.children.length - 1]
|
||||
|
||||
if (tail && tail.type === 'WhiteSpaceNode') {
|
||||
child.children.pop() // Remove `tail`.
|
||||
parent.children.splice(index + 1, 0, tail)
|
||||
const previous = child.children[child.children.length - 1]
|
||||
|
||||
if (previous && previous.position && child.position) {
|
||||
child.position.end = previous.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the current node again.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
3
node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.d.ts
generated
vendored
Normal file
3
node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.d.ts
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
export const makeInitialWhiteSpaceSiblings: import("../../node_modules/unist-util-visit-children/lib/index.js").Visit<import("nlcst").Paragraph | import("nlcst").Root>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
||||
export type Root = import('nlcst').Root;
|
28
node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.js
generated
vendored
Normal file
28
node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.js
generated
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
* @typedef {import('nlcst').Root} Root
|
||||
*/
|
||||
|
||||
import {visitChildren} from 'unist-util-visit-children'
|
||||
|
||||
// Move white space starting a sentence up, so they are the siblings of
|
||||
// sentences.
|
||||
export const makeInitialWhiteSpaceSiblings = visitChildren(
|
||||
/**
|
||||
* @type {import('unist-util-visit-children').Visitor<Paragraph | Root>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if ('children' in child && child.children) {
|
||||
const head = child.children[0]
|
||||
if (head && head.type === 'WhiteSpaceNode') {
|
||||
child.children.shift()
|
||||
parent.children.splice(index, 0, head)
|
||||
const next = child.children[0]
|
||||
|
||||
if (next && next.position && child.position) {
|
||||
child.position.start = next.position.start
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-affix-exceptions.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-affix-exceptions.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeAffixExceptions: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
54
node_modules/parse-latin/lib/plugin/merge-affix-exceptions.js
generated
vendored
Normal file
54
node_modules/parse-latin/lib/plugin/merge-affix-exceptions.js
generated
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Merge a sentence into its previous sentence, when the sentence starts with a
|
||||
// comma.
|
||||
export const mergeAffixExceptions = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
const previous = parent.children[index - 1]
|
||||
|
||||
if (
|
||||
previous &&
|
||||
'children' in previous &&
|
||||
'children' in child &&
|
||||
child.children.length > 0
|
||||
) {
|
||||
let position = -1
|
||||
|
||||
while (child.children[++position]) {
|
||||
const node = child.children[position]
|
||||
|
||||
if (node.type === 'WordNode') {
|
||||
return
|
||||
}
|
||||
|
||||
if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
|
||||
const value = toString(node)
|
||||
|
||||
if (value !== ',' && value !== ';') {
|
||||
return
|
||||
}
|
||||
|
||||
previous.children.push(...child.children)
|
||||
|
||||
// Update position.
|
||||
if (previous.position && child.position) {
|
||||
previous.position.end = child.position.end
|
||||
}
|
||||
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-affix-symbol.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-affix-symbol.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeAffixSymbol: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
47
node_modules/parse-latin/lib/plugin/merge-affix-symbol.js
generated
vendored
Normal file
47
node_modules/parse-latin/lib/plugin/merge-affix-symbol.js
generated
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
// Closing or final punctuation, or terminal markers that should still be
|
||||
// included in the previous sentence, even though they follow the sentence’s
|
||||
// terminal marker.
|
||||
import {affixSymbol} from '../expressions.js'
|
||||
|
||||
// Move certain punctuation following a terminal marker (thus in the next
|
||||
// sentence) to the previous sentence.
|
||||
export const mergeAffixSymbol = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if ('children' in child && child.children.length > 0 && index > 0) {
|
||||
const previous = parent.children[index - 1]
|
||||
const first = child.children[0]
|
||||
const second = child.children[1]
|
||||
|
||||
if (
|
||||
previous &&
|
||||
previous.type === 'SentenceNode' &&
|
||||
(first.type === 'SymbolNode' || first.type === 'PunctuationNode') &&
|
||||
affixSymbol.test(toString(first))
|
||||
) {
|
||||
child.children.shift() // Remove `first`.
|
||||
previous.children.push(first)
|
||||
|
||||
// Update position.
|
||||
if (first.position && previous.position) {
|
||||
previous.position.end = first.position.end
|
||||
}
|
||||
|
||||
if (second && second.position && child.position) {
|
||||
child.position.start = second.position.start
|
||||
}
|
||||
|
||||
// Next, iterate over the previous node again.
|
||||
return index - 1
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-final-word-symbol.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-final-word-symbol.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeFinalWordSymbol: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Sentence>;
|
||||
export type Sentence = import('nlcst').Sentence;
|
45
node_modules/parse-latin/lib/plugin/merge-final-word-symbol.js
generated
vendored
Normal file
45
node_modules/parse-latin/lib/plugin/merge-final-word-symbol.js
generated
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Sentence} Sentence
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Merge certain punctuation marks into their preceding words.
|
||||
export const mergeFinalWordSymbol = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Sentence>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if (
|
||||
index > 0 &&
|
||||
(child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
|
||||
toString(child) === '-'
|
||||
) {
|
||||
const children = parent.children
|
||||
const previous = children[index - 1]
|
||||
const next = children[index + 1]
|
||||
|
||||
if (
|
||||
(!next || next.type !== 'WordNode') &&
|
||||
previous &&
|
||||
previous.type === 'WordNode'
|
||||
) {
|
||||
// Remove `child` from parent.
|
||||
children.splice(index, 1)
|
||||
|
||||
// Add the punctuation mark at the end of the previous node.
|
||||
previous.children.push(child)
|
||||
|
||||
// Update position.
|
||||
if (previous.position && child.position) {
|
||||
previous.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position (which was
|
||||
// the next node).
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeInitialDigitSentences: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
39
node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.js
generated
vendored
Normal file
39
node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.js
generated
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
import {digitStart} from '../expressions.js'
|
||||
|
||||
// Merge a sentence into its previous sentence, when the sentence starts with a
|
||||
// lower case letter.
|
||||
export const mergeInitialDigitSentences = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
const previous = parent.children[index - 1]
|
||||
|
||||
if (
|
||||
previous &&
|
||||
previous.type === 'SentenceNode' &&
|
||||
child.type === 'SentenceNode'
|
||||
) {
|
||||
const head = child.children[0]
|
||||
|
||||
if (head && head.type === 'WordNode' && digitStart.test(toString(head))) {
|
||||
previous.children.push(...child.children)
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Update position.
|
||||
if (previous.position && child.position) {
|
||||
previous.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeInitialLowerCaseLetterSentences: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
52
node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.js
generated
vendored
Normal file
52
node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.js
generated
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
// Initial lowercase letter.
|
||||
import {lowerInitial} from '../expressions.js'
|
||||
|
||||
// Merge a sentence into its previous sentence, when the sentence starts with a
|
||||
// lower case letter.
|
||||
export const mergeInitialLowerCaseLetterSentences = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if (child.type === 'SentenceNode' && index > 0) {
|
||||
const previous = parent.children[index - 1]
|
||||
const children = child.children
|
||||
|
||||
if (children.length > 0 && previous.type === 'SentenceNode') {
|
||||
let position = -1
|
||||
|
||||
while (children[++position]) {
|
||||
const node = children[position]
|
||||
|
||||
if (node.type === 'WordNode') {
|
||||
if (!lowerInitial.test(toString(node))) {
|
||||
return
|
||||
}
|
||||
|
||||
previous.children.push(...children)
|
||||
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Update position.
|
||||
if (previous.position && child.position) {
|
||||
previous.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
|
||||
if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeInitialWordSymbol: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Sentence>;
|
||||
export type Sentence = import('nlcst').Sentence;
|
47
node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.js
generated
vendored
Normal file
47
node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.js
generated
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Sentence} Sentence
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Merge certain punctuation marks into their following words.
|
||||
export const mergeInitialWordSymbol = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Sentence>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if (
|
||||
(child.type !== 'SymbolNode' && child.type !== 'PunctuationNode') ||
|
||||
toString(child) !== '&'
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
const children = parent.children
|
||||
const next = children[index + 1]
|
||||
|
||||
// If either a previous word, or no following word, exists, exit early.
|
||||
if (
|
||||
(index > 0 && children[index - 1].type === 'WordNode') ||
|
||||
!(next && next.type === 'WordNode')
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
// Remove `child` from parent.
|
||||
children.splice(index, 1)
|
||||
|
||||
// Add the punctuation mark at the start of the next node.
|
||||
next.children.unshift(child)
|
||||
|
||||
// Update position.
|
||||
if (next.position && child.position) {
|
||||
next.position.start = child.position.start
|
||||
}
|
||||
|
||||
// Next, iterate over the node at the previous position, as it's now adjacent
|
||||
// to a following word.
|
||||
return index - 1
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-initialisms.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-initialisms.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeInitialisms: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Sentence>;
|
||||
export type Sentence = import('nlcst').Sentence;
|
72
node_modules/parse-latin/lib/plugin/merge-initialisms.js
generated
vendored
Normal file
72
node_modules/parse-latin/lib/plugin/merge-initialisms.js
generated
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Sentence} Sentence
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
import {numerical} from '../expressions.js'
|
||||
|
||||
// Merge initialisms.
|
||||
export const mergeInitialisms = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Sentence>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if (
|
||||
index > 0 &&
|
||||
child.type === 'PunctuationNode' &&
|
||||
toString(child) === '.'
|
||||
) {
|
||||
const previous = parent.children[index - 1]
|
||||
|
||||
if (
|
||||
previous.type === 'WordNode' &&
|
||||
previous.children &&
|
||||
previous.children.length !== 1 &&
|
||||
previous.children.length % 2 !== 0
|
||||
) {
|
||||
let position = previous.children.length
|
||||
let isAllDigits = true
|
||||
|
||||
while (previous.children[--position]) {
|
||||
const otherChild = previous.children[position]
|
||||
|
||||
const value = toString(otherChild)
|
||||
|
||||
if (position % 2 === 0) {
|
||||
// Initialisms consist of one character values.
|
||||
if (value.length > 1) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!numerical.test(value)) {
|
||||
isAllDigits = false
|
||||
}
|
||||
} else if (value !== '.') {
|
||||
if (position < previous.children.length - 2) {
|
||||
break
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isAllDigits) {
|
||||
// Remove `child` from parent.
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Add child to the previous children.
|
||||
previous.children.push(child)
|
||||
|
||||
// Update position.
|
||||
if (previous.position && child.position) {
|
||||
previous.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
4
node_modules/parse-latin/lib/plugin/merge-inner-word-slash.d.ts
generated
vendored
Normal file
4
node_modules/parse-latin/lib/plugin/merge-inner-word-slash.d.ts
generated
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
export const mergeInnerWordSlash: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Sentence>;
|
||||
export type Sentence = import('nlcst').Sentence;
|
||||
export type SentenceContent = import('nlcst').SentenceContent;
|
||||
export type WordContent = import('nlcst').WordContent;
|
57
node_modules/parse-latin/lib/plugin/merge-inner-word-slash.js
generated
vendored
Normal file
57
node_modules/parse-latin/lib/plugin/merge-inner-word-slash.js
generated
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Sentence} Sentence
|
||||
* @typedef {import('nlcst').SentenceContent} SentenceContent
|
||||
* @typedef {import('nlcst').WordContent} WordContent
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Merge words joined by certain punctuation marks.
|
||||
export const mergeInnerWordSlash = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Sentence>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
const siblings = parent.children
|
||||
const previous = siblings[index - 1]
|
||||
|
||||
if (
|
||||
previous &&
|
||||
previous.type === 'WordNode' &&
|
||||
(child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
|
||||
toString(child) === '/'
|
||||
) {
|
||||
const previousValue = toString(previous)
|
||||
/** @type {SentenceContent} */
|
||||
let tail = child
|
||||
/** @type {Array<WordContent>} */
|
||||
const queue = [child]
|
||||
let count = 1
|
||||
let nextValue = ''
|
||||
const next = siblings[index + 1]
|
||||
|
||||
if (next && next.type === 'WordNode') {
|
||||
nextValue = toString(next)
|
||||
tail = next
|
||||
queue.push(...next.children)
|
||||
count++
|
||||
}
|
||||
|
||||
if (previousValue.length < 3 && (!nextValue || nextValue.length < 3)) {
|
||||
// Add all found tokens to `prev`s children.
|
||||
previous.children.push(...queue)
|
||||
|
||||
siblings.splice(index, count)
|
||||
|
||||
// Update position.
|
||||
if (previous.position && tail.position) {
|
||||
previous.position.end = tail.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
3
node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.d.ts
generated
vendored
Normal file
3
node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.d.ts
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
export const mergeInnerWordSymbol: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Sentence>;
|
||||
export type Sentence = import('nlcst').Sentence;
|
||||
export type WordContent = import('nlcst').WordContent;
|
79
node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
generated
vendored
Normal file
79
node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
generated
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Sentence} Sentence
|
||||
* @typedef {import('nlcst').WordContent} WordContent
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
// Symbols part of surrounding words.
|
||||
import {wordSymbolInner} from '../expressions.js'
|
||||
|
||||
// Merge words joined by certain punctuation marks.
|
||||
export const mergeInnerWordSymbol = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Sentence>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if (
|
||||
index > 0 &&
|
||||
(child.type === 'SymbolNode' || child.type === 'PunctuationNode')
|
||||
) {
|
||||
const siblings = parent.children
|
||||
const previous = siblings[index - 1]
|
||||
|
||||
if (previous && previous.type === 'WordNode') {
|
||||
let position = index - 1
|
||||
/** @type {Array<WordContent>} */
|
||||
const tokens = []
|
||||
/** @type {Array<WordContent>} */
|
||||
let queue = []
|
||||
|
||||
// - If a token which is neither word nor inner word symbol is found,
|
||||
// the loop is broken
|
||||
// - If an inner word symbol is found, it’s queued
|
||||
// - If a word is found, it’s queued (and the queue stored and emptied)
|
||||
while (siblings[++position]) {
|
||||
const sibling = siblings[position]
|
||||
|
||||
if (sibling.type === 'WordNode') {
|
||||
tokens.push(...queue, ...sibling.children)
|
||||
|
||||
queue = []
|
||||
} else if (
|
||||
(sibling.type === 'SymbolNode' ||
|
||||
sibling.type === 'PunctuationNode') &&
|
||||
wordSymbolInner.test(toString(sibling))
|
||||
) {
|
||||
queue.push(sibling)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (tokens.length > 0) {
|
||||
// If there is a queue, remove its length from `position`.
|
||||
if (queue.length > 0) {
|
||||
position -= queue.length
|
||||
}
|
||||
|
||||
// Remove every (one or more) inner-word punctuation marks and children
|
||||
// of words.
|
||||
siblings.splice(index, position - index)
|
||||
|
||||
// Add all found tokens to `prev`s children.
|
||||
previous.children.push(...tokens)
|
||||
|
||||
const last = tokens[tokens.length - 1]
|
||||
|
||||
// Update position.
|
||||
if (previous.position && last.position) {
|
||||
previous.position.end = last.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-non-word-sentences.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-non-word-sentences.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeNonWordSentences: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
56
node_modules/parse-latin/lib/plugin/merge-non-word-sentences.js
generated
vendored
Normal file
56
node_modules/parse-latin/lib/plugin/merge-non-word-sentences.js
generated
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
*/
|
||||
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Merge a sentence into the following sentence, when the sentence does not
|
||||
// contain word tokens.
|
||||
export const mergeNonWordSentences = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if ('children' in child) {
|
||||
let position = -1
|
||||
|
||||
while (child.children[++position]) {
|
||||
if (child.children[position].type === 'WordNode') {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
const previous = parent.children[index - 1]
|
||||
|
||||
if (previous && 'children' in previous) {
|
||||
previous.children.push(...child.children)
|
||||
|
||||
// Remove the child.
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Patch position.
|
||||
if (previous.position && child.position) {
|
||||
previous.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position (which was the
|
||||
// next node).
|
||||
return index
|
||||
}
|
||||
|
||||
const next = parent.children[index + 1]
|
||||
|
||||
if (next && 'children' in next) {
|
||||
next.children.unshift(...child.children)
|
||||
|
||||
// Patch position.
|
||||
if (next.position && child.position) {
|
||||
next.position.start = child.position.start
|
||||
}
|
||||
|
||||
// Remove the child.
|
||||
parent.children.splice(index, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergePrefixExceptions: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
78
node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.js
generated
vendored
Normal file
78
node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.js
generated
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Full stop characters that should not be treated as terminal sentence markers:
|
||||
// A case-insensitive abbreviation.
|
||||
const abbreviationPrefix = new RegExp(
|
||||
'^(' +
|
||||
'[0-9]{1,3}|' +
|
||||
'[a-z]|' +
|
||||
// Common Latin Abbreviations:
|
||||
// Based on: <https://en.wikipedia.org/wiki/List_of_Latin_abbreviations>.
|
||||
// Where only the abbreviations written without joining full stops,
|
||||
// but with a final full stop, were extracted.
|
||||
//
|
||||
// circa, capitulus, confer, compare, centum weight, eadem, (et) alii,
|
||||
// et cetera, floruit, foliis, ibidem, idem, nemine && contradicente,
|
||||
// opere && citato, (per) cent, (per) procurationem, (pro) tempore,
|
||||
// sic erat scriptum, (et) sequentia, statim, videlicet. */
|
||||
'al|ca|cap|cca|cent|cf|cit|con|cp|cwt|ead|etc|ff|' +
|
||||
'fl|ibid|id|nem|op|pro|seq|sic|stat|tem|viz' +
|
||||
')$'
|
||||
)
|
||||
|
||||
// Merge a sentence into its next sentence, when the sentence ends with a
|
||||
// certain word.
|
||||
export const mergePrefixExceptions = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph>}
|
||||
*/
|
||||
function (child, index, parent) {
|
||||
if ('children' in child && child.children.length > 1) {
|
||||
const period = child.children[child.children.length - 1]
|
||||
|
||||
if (
|
||||
period &&
|
||||
(period.type === 'PunctuationNode' || period.type === 'SymbolNode') &&
|
||||
toString(period) === '.'
|
||||
) {
|
||||
const node = child.children[child.children.length - 2]
|
||||
|
||||
if (
|
||||
node &&
|
||||
node.type === 'WordNode' &&
|
||||
abbreviationPrefix.test(toString(node).toLowerCase())
|
||||
) {
|
||||
// Merge period into abbreviation.
|
||||
node.children.push(period)
|
||||
child.children.pop()
|
||||
|
||||
// Update position.
|
||||
if (period.position && node.position) {
|
||||
node.position.end = period.position.end
|
||||
}
|
||||
|
||||
// Merge sentences.
|
||||
const next = parent.children[index + 1]
|
||||
|
||||
if (next && next.type === 'SentenceNode') {
|
||||
child.children.push(...next.children)
|
||||
parent.children.splice(index + 1, 1)
|
||||
|
||||
// Update position.
|
||||
if (next.position && child.position) {
|
||||
child.position.end = next.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the current node again.
|
||||
return index - 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
2
node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.d.ts
generated
vendored
Normal file
2
node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.d.ts
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const mergeRemainingFullStops: import("../../node_modules/unist-util-visit-children/lib/index.js").Visit<import("nlcst").Paragraph>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
99
node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js
generated
vendored
Normal file
99
node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js
generated
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
*/
|
||||
|
||||
import {toString} from 'nlcst-to-string'
|
||||
import {visitChildren} from 'unist-util-visit-children'
|
||||
// Full stop characters that should not be treated as terminal sentence markers:
|
||||
// A case-insensitive abbreviation.
|
||||
import {terminalMarker} from '../expressions.js'
|
||||
|
||||
// Merge non-terminal-marker full stops into the previous word (if available),
|
||||
// or the next word (if available).
|
||||
export const mergeRemainingFullStops = visitChildren(
|
||||
/**
|
||||
* @type {import('unist-util-visit-children').Visitor<Paragraph>}
|
||||
*/
|
||||
// eslint-disable-next-line complexity
|
||||
function (child, _, _parent) {
|
||||
if ('children' in child) {
|
||||
let position = child.children.length
|
||||
let hasFoundDelimiter = false
|
||||
|
||||
while (child.children[--position]) {
|
||||
const grandchild = child.children[position]
|
||||
|
||||
if (
|
||||
grandchild.type !== 'SymbolNode' &&
|
||||
grandchild.type !== 'PunctuationNode'
|
||||
) {
|
||||
// This is a sentence without terminal marker, so we 'fool' the code to
|
||||
// make it think we have found one.
|
||||
if (grandchild.type === 'WordNode') {
|
||||
hasFoundDelimiter = true
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// Exit when this token is not a terminal marker.
|
||||
if (!terminalMarker.test(toString(grandchild))) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Ignore the first terminal marker found (starting at the end), as it
|
||||
// should not be merged.
|
||||
if (!hasFoundDelimiter) {
|
||||
hasFoundDelimiter = true
|
||||
continue
|
||||
}
|
||||
|
||||
// Only merge a single full stop.
|
||||
if (toString(grandchild) !== '.') {
|
||||
continue
|
||||
}
|
||||
|
||||
const previous = child.children[position - 1]
|
||||
const next = child.children[position + 1]
|
||||
|
||||
if (previous && previous.type === 'WordNode') {
|
||||
const nextNext = child.children[position + 2]
|
||||
|
||||
// Continue when the full stop is followed by a space and another full
|
||||
// stop, such as: `{.} .`
|
||||
if (
|
||||
next &&
|
||||
nextNext &&
|
||||
next.type === 'WhiteSpaceNode' &&
|
||||
toString(nextNext) === '.'
|
||||
) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Remove `child` from parent.
|
||||
child.children.splice(position, 1)
|
||||
|
||||
// Add the punctuation mark at the end of the previous node.
|
||||
previous.children.push(grandchild)
|
||||
|
||||
// Update position.
|
||||
if (grandchild.position && previous.position) {
|
||||
previous.position.end = grandchild.position.end
|
||||
}
|
||||
|
||||
position--
|
||||
} else if (next && next.type === 'WordNode') {
|
||||
// Remove `child` from parent.
|
||||
child.children.splice(position, 1)
|
||||
|
||||
// Add the punctuation mark at the start of the next node.
|
||||
next.children.unshift(grandchild)
|
||||
|
||||
if (grandchild.position && next.position) {
|
||||
next.position.start = grandchild.position.start
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
6
node_modules/parse-latin/lib/plugin/patch-position.d.ts
generated
vendored
Normal file
6
node_modules/parse-latin/lib/plugin/patch-position.d.ts
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
export const patchPosition: import("../../node_modules/unist-util-visit-children/lib/index.js").Visit<import("nlcst").Sentence | import("nlcst").Paragraph | import("nlcst").Root>;
|
||||
export type Node = import('unist').Node;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
||||
export type Position = import('unist').Position;
|
||||
export type Root = import('nlcst').Root;
|
||||
export type Sentence = import('nlcst').Sentence;
|
49
node_modules/parse-latin/lib/plugin/patch-position.js
generated
vendored
Normal file
49
node_modules/parse-latin/lib/plugin/patch-position.js
generated
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* @typedef {import('unist').Node} Node
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
* @typedef {import('unist').Position} Position
|
||||
* @typedef {import('nlcst').Root} Root
|
||||
* @typedef {import('nlcst').Sentence} Sentence
|
||||
*/
|
||||
|
||||
import {visitChildren} from 'unist-util-visit-children'
|
||||
|
||||
// Patch the position on a parent node based on its first and last child.
|
||||
export const patchPosition = visitChildren(
|
||||
/**
|
||||
* @type {import('unist-util-visit-children').Visitor<Paragraph | Root | Sentence>}
|
||||
*/
|
||||
function (child, index, node) {
|
||||
const siblings = node.children
|
||||
|
||||
if (
|
||||
child.position &&
|
||||
index < 1 &&
|
||||
/* c8 ignore next */
|
||||
(!node.position || !node.position.start)
|
||||
) {
|
||||
patch(node)
|
||||
node.position.start = child.position.start
|
||||
}
|
||||
|
||||
if (
|
||||
child.position &&
|
||||
index === siblings.length - 1 &&
|
||||
(!node.position || !node.position.end)
|
||||
) {
|
||||
patch(node)
|
||||
node.position.end = child.position.end
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
/**
|
||||
* @param {Node} node
|
||||
* @returns {asserts node is Node & {position: Position}}
|
||||
*/
|
||||
function patch(node) {
|
||||
if (!node.position) {
|
||||
// @ts-expect-error: fine, we’ll fill it later.
|
||||
node.position = {}
|
||||
}
|
||||
}
|
3
node_modules/parse-latin/lib/plugin/remove-empty-nodes.d.ts
generated
vendored
Normal file
3
node_modules/parse-latin/lib/plugin/remove-empty-nodes.d.ts
generated
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
export const removeEmptyNodes: import("../../node_modules/unist-util-modify-children/lib/index.js").Modify<import("nlcst").Paragraph | import("nlcst").Root>;
|
||||
export type Paragraph = import('nlcst').Paragraph;
|
||||
export type Root = import('nlcst').Root;
|
23
node_modules/parse-latin/lib/plugin/remove-empty-nodes.js
generated
vendored
Normal file
23
node_modules/parse-latin/lib/plugin/remove-empty-nodes.js
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* @typedef {import('nlcst').Paragraph} Paragraph
|
||||
* @typedef {import('nlcst').Root} Root
|
||||
*/
|
||||
|
||||
import {modifyChildren} from 'unist-util-modify-children'
|
||||
|
||||
// Remove empty children.
|
||||
export const removeEmptyNodes = modifyChildren(
|
||||
/**
|
||||
* @type {import('unist-util-modify-children').Modifier<Paragraph | Root>}
|
||||
*/
|
||||
|
||||
function (child, index, parent) {
|
||||
if ('children' in child && child.children.length === 0) {
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Next, iterate over the node *now* at the current position (which was the
|
||||
// next node).
|
||||
return index
|
||||
}
|
||||
}
|
||||
)
|
Reference in New Issue
Block a user