1
0

downgrade to kirby v3

This commit is contained in:
Philip Wagner
2024-09-01 10:47:15 +02:00
parent a4b2aece7b
commit af86acb7a1
1085 changed files with 54743 additions and 65042 deletions

View File

@@ -21,21 +21,37 @@ use Kirby\Toolkit\Str;
*/
class Element
{
public function __construct(
protected DOMElement $node,
protected array $marks = []
) {
/**
* @var array
*/
protected $marks;
/**
* @var \DOMElement
*/
protected $node;
/**
* @param \DOMElement $node
* @param array $marks
*/
public function __construct(DOMElement $node, array $marks = [])
{
$this->marks = $marks;
$this->node = $node;
}
/**
* The returns the attribute value or
* the given fallback if the attribute does not exist
*
* @param string $attr
* @param string|null $fallback
* @return string|null
*/
public function attr(
string $attr,
string|null $fallback = null
): string|null {
if ($this->node->hasAttribute($attr) === true) {
public function attr(string $attr, string $fallback = null): ?string
{
if ($this->node->hasAttribute($attr)) {
return $this->node->getAttribute($attr) ?? $fallback;
}
@@ -44,6 +60,8 @@ class Element
/**
* Returns a list of all child elements
*
* @return \DOMNodeList
*/
public function children(): DOMNodeList
{
@@ -52,6 +70,8 @@ class Element
/**
* Returns an array with all class names
*
* @return array
*/
public function classList(): array
{
@@ -60,16 +80,20 @@ class Element
/**
* Returns the value of the class attribute
*
* @return string|null
*/
public function className(): string|null
public function className(): ?string
{
return $this->attr('class');
}
/**
* Returns the original dom element
*
* @return \DOMElement
*/
public function element(): DOMElement
public function element()
{
return $this->node;
}
@@ -77,6 +101,9 @@ class Element
/**
* Returns an array with all nested elements
* that could be found for the given query
*
* @param string $query
* @return array
*/
public function filter(string $query): array
{
@@ -94,8 +121,11 @@ class Element
/**
* Tries to find a single nested element by
* query and otherwise returns null
*
* @param string $query
* @return \Kirby\Parsley\Element|null
*/
public function find(string $query): static|null
public function find(string $query)
{
if ($result = $this->query($query)[0]) {
return new static($result);
@@ -108,16 +138,17 @@ class Element
* Returns the inner HTML of the element
*
* @param array|null $marks List of allowed marks
* @return string
*/
public function innerHtml(array|null $marks = null): string
public function innerHtml(array $marks = null): string
{
$marks ??= $this->marks;
$inline = new Inline($this->node, $marks);
return $inline->innerHtml();
return (new Inline($this->node, $marks ?? $this->marks))->innerHtml();
}
/**
* Returns the contents as plain text
*
* @return string
*/
public function innerText(): string
{
@@ -126,31 +157,40 @@ class Element
/**
* Returns the full HTML for the element
*
* @param array|null $marks
* @return string
*/
public function outerHtml(array|null $marks = null): string
public function outerHtml(array $marks = null): string
{
return $this->node->ownerDocument->saveHtml($this->node);
}
/**
* Searches nested elements
*
* @param string $query
* @return DOMNodeList|null
*/
public function query(string $query): DOMNodeList|null
public function query(string $query)
{
$path = new DOMXPath($this->node->ownerDocument);
return $path->query($query, $this->node);
return (new DOMXPath($this->node->ownerDocument))->query($query, $this->node);
}
/**
* Removes the element from the DOM
*
* @return void
*/
public function remove(): void
public function remove()
{
$this->node->parentNode->removeChild($this->node);
}
/**
* Returns the name of the element
*
* @return string
*/
public function tagName(): string
{

View File

@@ -2,10 +2,8 @@
namespace Kirby\Parsley;
use DOMElement;
use DOMNode;
use DOMNodeList;
use DOMText;
use Kirby\Toolkit\Html;
/**
@@ -22,27 +20,33 @@ use Kirby\Toolkit\Html;
*/
class Inline
{
protected string $html = '';
protected array $marks = [];
/**
* @var string
*/
protected $html = '';
/**
* @var array
*/
protected $marks = [];
/**
* @param \DOMNode $node
* @param array $marks
*/
public function __construct(DOMNode $node, array $marks = [])
{
$this->createMarkRules($marks);
$html = static::parseNode($node, $this->marks) ?? '';
// only trim HTML if it doesn't consist of only spaces
if (trim($html) !== '') {
$html = trim($html);
}
$this->html = $html;
$this->html = trim(static::parseNode($node, $this->marks) ?? '');
}
/**
* Loads all mark rules
*
* @param array $marks
* @return array
*/
protected function createMarkRules(array $marks): array
protected function createMarkRules(array $marks)
{
foreach ($marks as $mark) {
$this->marks[$mark['tag']] = $mark;
@@ -52,22 +56,25 @@ class Inline
}
/**
* Get all allowed attributes for a DOMElement
* Get all allowed attributes for a DOMNode
* as clean array
*
* @param DOMNode $node
* @param array $marks
* @return array
*/
public static function parseAttrs(
DOMElement $node,
array $marks = []
): array {
public static function parseAttrs(DOMNode $node, array $marks = []): array
{
$attrs = [];
$mark = $marks[$node->tagName];
$defaults = $mark['defaults'] ?? [];
foreach ($mark['attrs'] ?? [] as $attr) {
$attrs[$attr] = match ($node->hasAttribute($attr)) {
true => $node->getAttribute($attr),
default => $defaults[$attr] ?? null
};
if ($node->hasAttribute($attr)) {
$attrs[$attr] = $node->getAttribute($attr);
} else {
$attrs[$attr] = $defaults[$attr] ?? null;
}
}
return $attrs;
@@ -76,11 +83,13 @@ class Inline
/**
* Parses all children and creates clean HTML
* for each of them.
*
* @param \DOMNodeList $children
* @param array $marks
* @return string
*/
public static function parseChildren(
DOMNodeList $children,
array $marks
): string {
public static function parseChildren(DOMNodeList $children, array $marks): string
{
$html = '';
foreach ($children as $child) {
$html .= static::parseNode($child, $marks);
@@ -91,11 +100,12 @@ class Inline
/**
* Go through all child elements and create
* clean inner HTML for them
*
* @param DOMNode $node
* @return string|null
*/
public static function parseInnerHtml(
DOMElement $node,
array $marks = []
): string|null {
public static function parseInnerHtml(DOMNode $node, array $marks = []): ?string
{
$html = static::parseChildren($node->childNodes, $marks);
// trim the inner HTML for paragraphs
@@ -113,46 +123,50 @@ class Inline
/**
* Converts the given node to clean HTML
*
* @param \DOMNode $node
* @param array $marks
* @return string|null
*/
public static function parseNode(DOMNode $node, array $marks = []): string|null
public static function parseNode(DOMNode $node, array $marks = []): ?string
{
if ($node instanceof DOMText) {
if (is_a($node, 'DOMText') === true) {
return Html::encode($node->textContent);
}
if ($node instanceof DOMElement) {
// unknown marks
if (array_key_exists($node->tagName, $marks) === false) {
return static::parseChildren($node->childNodes, $marks);
}
// collect all allowed attributes
$attrs = static::parseAttrs($node, $marks);
// close self-closing elements
if (Html::isVoid($node->tagName) === true) {
return '<' . $node->tagName . Html::attr($attrs, null, ' ') . ' />';
}
$innerHtml = static::parseInnerHtml($node, $marks);
// skip empty paragraphs
if ($innerHtml === null && $node->tagName === 'p') {
return null;
}
// create the outer html for the element
$html = '<' . $node->tagName . Html::attr($attrs, null, ' ') . '>';
$html .= $innerHtml;
$html .= '</' . $node->tagName . '>';
return $html;
// ignore comments
if (is_a($node, 'DOMComment') === true) {
return null;
}
return null;
// unknown marks
if (array_key_exists($node->tagName, $marks) === false) {
return static::parseChildren($node->childNodes, $marks);
}
// collect all allowed attributes
$attrs = static::parseAttrs($node, $marks);
// close self-closing elements
if (Html::isVoid($node->tagName) === true) {
return '<' . $node->tagName . Html::attr($attrs, null, ' ') . ' />';
}
$innerHtml = static::parseInnerHtml($node, $marks);
// skip empty paragraphs
if ($innerHtml === null && $node->tagName === 'p') {
return null;
}
// create the outer html for the element
return '<' . $node->tagName . Html::attr($attrs, null, ' ') . '>' . $innerHtml . '</' . $node->tagName . '>';
}
/**
* Returns the HTML contents of the element
*
* @return string
*/
public function innerHtml(): string
{

View File

@@ -2,10 +2,7 @@
namespace Kirby\Parsley;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMText;
use Kirby\Parsley\Schema\Plain;
use Kirby\Toolkit\Dom;
@@ -23,25 +20,65 @@ use Kirby\Toolkit\Dom;
*/
class Parsley
{
protected array $blocks = [];
protected DOMDocument $doc;
protected Dom $dom;
protected array $inline = [];
protected array $marks = [];
protected array $nodes = [];
protected Schema $schema;
protected array $skip = [];
/**
* @var array
*/
protected $blocks = [];
public static bool $useXmlExtension = true;
/**
* @var \DOMDocument
*/
protected $doc;
public function __construct(string $html, Schema|null $schema = null)
/**
* @var \Kirby\Toolkit\Dom
*/
protected $dom;
/**
* @var array
*/
protected $inline = [];
/**
* @var array
*/
protected $marks = [];
/**
* @var array
*/
protected $nodes = [];
/**
* @var \Kirby\Parsley\Schema
*/
protected $schema;
/**
* @var array
*/
protected $skip = [];
/**
* @var bool
*/
public static $useXmlExtension = true;
/**
* @param string $html
* @param \Kirby\Parsley\Schema|null $schema
*/
public function __construct(string $html, Schema $schema = null)
{
// fail gracefully if the XML extension is not installed
// or should be skipped
if ($this->useXmlExtension() === false) {
$this->blocks[] = [
'type' => 'markdown',
'content' => ['text' => $html]
'type' => 'markdown',
'content' => [
'text' => $html,
]
];
return;
}
@@ -73,6 +110,8 @@ class Parsley
/**
* Returns all detected blocks
*
* @return array
*/
public function blocks(): array
{
@@ -81,6 +120,9 @@ class Parsley
/**
* Load all node rules from the schema
*
* @param array $nodes
* @return array
*/
public function createNodeRules(array $nodes): array
{
@@ -94,6 +136,9 @@ class Parsley
/**
* Checks if the given element contains
* any other block level elements
*
* @param \DOMNode $element
* @return bool
*/
public function containsBlock(DOMNode $element): bool
{
@@ -102,10 +147,7 @@ class Parsley
}
foreach ($element->childNodes as $childNode) {
if (
$this->isBlock($childNode) === true ||
$this->containsBlock($childNode)
) {
if ($this->isBlock($childNode) === true || $this->containsBlock($childNode)) {
return true;
}
}
@@ -120,8 +162,10 @@ class Parsley
* if the type matches, or will be appended.
*
* The inline cache will be reset afterwards
*
* @return void
*/
public function endInlineBlock(): void
public function endInlineBlock()
{
if (empty($this->inline) === true) {
return;
@@ -130,7 +174,7 @@ class Parsley
$html = [];
foreach ($this->inline as $inline) {
$node = new Inline($inline, $this->marks);
$node = new Inline($inline, $this->marks);
$html[] = $node->innerHTML();
}
@@ -147,8 +191,11 @@ class Parsley
* Creates a fallback block type for the given
* element. The element can either be a element object
* or a simple HTML/plain text string
*
* @param \Kirby\Parsley\Element|string $element
* @return array|null
*/
public function fallback(Element|string $element): array|null
public function fallback($element): ?array
{
if ($fallback = $this->schema->fallback($element)) {
return $fallback;
@@ -159,26 +206,32 @@ class Parsley
/**
* Checks if the given DOMNode is a block element
*
* @param DOMNode $element
* @return bool
*/
public function isBlock(DOMNode $element): bool
{
if ($element instanceof DOMElement) {
return array_key_exists($element->tagName, $this->nodes) === true;
if (is_a($element, 'DOMElement') === false) {
return false;
}
return false;
return array_key_exists($element->tagName, $this->nodes) === true;
}
/**
* Checks if the given DOMNode is an inline element
*
* @param \DOMNode $element
* @return bool
*/
public function isInline(DOMNode $element): bool
{
if ($element instanceof DOMText) {
if (is_a($element, 'DOMText') === true) {
return true;
}
if ($element instanceof DOMElement) {
if (is_a($element, 'DOMElement') === true) {
// all spans will be treated as inline elements
if ($element->tagName === 'span') {
return true;
@@ -199,17 +252,17 @@ class Parsley
return false;
}
public function mergeOrAppend(array $block): void
/**
* @param array $block
* @return void
*/
public function mergeOrAppend(array $block)
{
$lastIndex = count($this->blocks) - 1;
$lastItem = $this->blocks[$lastIndex] ?? null;
// merge with previous block
if (
$block['type'] === 'text' &&
$lastItem &&
$lastItem['type'] === 'text'
) {
if ($block['type'] === 'text' && $lastItem && $lastItem['type'] === 'text') {
$this->blocks[$lastIndex]['content']['text'] .= ' ' . $block['content']['text'];
// append
@@ -221,6 +274,9 @@ class Parsley
/**
* Parses the given DOM node and tries to
* convert it to a block or a list of blocks
*
* @param \DOMNode $element
* @return void
*/
public function parseNode(DOMNode $element): bool
{
@@ -232,18 +288,15 @@ class Parsley
}
// inline context
if ($this->isInline($element) === true) {
if ($this->isInline($element)) {
$this->inline[] = $element;
return true;
} else {
$this->endInlineBlock();
}
$this->endInlineBlock();
// known block nodes
if ($this->isBlock($element) === true) {
/**
* @var DOMElement $element
*/
if ($parser = ($this->nodes[$element->tagName]['parse'] ?? null)) {
if ($result = $parser(new Element($element, $this->marks))) {
$this->blocks[] = $result;
@@ -254,9 +307,6 @@ class Parsley
// has only unknown children (div, etc.)
if ($this->containsBlock($element) === false) {
/**
* @var DOMElement $element
*/
if (in_array($element->tagName, $this->skip) === true) {
return false;
}
@@ -289,6 +339,9 @@ class Parsley
return true;
}
/**
* @return bool
*/
public function useXmlExtension(): bool
{
if (static::$useXmlExtension !== true) {

View File

@@ -18,8 +18,11 @@ class Schema
/**
* Returns the fallback block when no
* other block type can be detected
*
* @param \Kirby\Parsley\Element|string $element
* @return array|null
*/
public function fallback(Element|string $element): array|null
public function fallback($element): ?array
{
return null;
}
@@ -27,6 +30,8 @@ class Schema
/**
* Returns a list of allowed inline marks
* and their parsing rules
*
* @return array
*/
public function marks(): array
{
@@ -36,6 +41,8 @@ class Schema
/**
* Returns a list of allowed nodes and
* their parsing rules
*
* @return array
*/
public function nodes(): array
{
@@ -45,6 +52,8 @@ class Schema
/**
* Returns a list of all elements that should be
* skipped and not be parsed at all
*
* @return array
*/
public function skip(): array
{

View File

@@ -2,8 +2,6 @@
namespace Kirby\Parsley\Schema;
use DOMElement;
use DOMText;
use Kirby\Parsley\Element;
use Kirby\Toolkit\Str;
@@ -21,22 +19,22 @@ use Kirby\Toolkit\Str;
*/
class Blocks extends Plain
{
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function blockquote(Element $node): array
{
$text = [];
$citation = null;
$text = [];
// get all the text for the quote
foreach ($node->children() as $child) {
if ($child instanceof DOMText) {
if (is_a($child, 'DOMText') === true) {
$text[] = trim($child->textContent);
}
if (
$child instanceof DOMElement &&
$child->tagName !== 'footer'
) {
$element = new Element($child);
$text[] = $element->innerHTML($this->marks());
if (is_a($child, 'DOMElement') === true && $child->tagName !== 'footer') {
$text[] = (new Element($child))->innerHTML($this->marks());
}
}
@@ -44,7 +42,9 @@ class Blocks extends Plain
$text = implode('', array_filter($text));
// get the citation from the footer
$citation = $node->find('footer')?->innerHTML($this->marks());
if ($footer = $node->find('footer')) {
$citation = $footer->innerHTML($this->marks());
}
return [
'content' => [
@@ -58,10 +58,13 @@ class Blocks extends Plain
/**
* Creates the fallback block type
* if no other block can be found
*
* @param \Kirby\Parsley\Element|string $element
* @return array|null
*/
public function fallback(Element|string $element): array|null
public function fallback($element): ?array
{
if ($element instanceof Element) {
if (is_a($element, Element::class) === true) {
$html = $element->innerHtml();
// wrap the inner HTML in a p tag if it doesn't
@@ -91,6 +94,9 @@ class Blocks extends Plain
/**
* Converts a heading element to a heading block
*
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function heading(Element $node): array
{
@@ -111,14 +117,21 @@ class Blocks extends Plain
];
}
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function iframe(Element $node): array
{
$src = $node->attr('src');
$figcaption = $node->find('ancestor::figure[1]//figcaption');
$caption = $figcaption?->innerHTML($this->marks());
$caption = null;
$src = $node->attr('src');
// avoid parsing the caption twice
$figcaption?->remove();
if ($figcaption = $node->find('ancestor::figure[1]//figcaption')) {
$caption = $figcaption->innerHTML($this->marks());
// avoid parsing the caption twice
$figcaption->remove();
}
// reverse engineer video URLs
if (preg_match('!player.vimeo.com\/video\/([0-9]+)!i', $src, $array) === 1) {
@@ -150,14 +163,25 @@ class Blocks extends Plain
];
}
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function img(Element $node): array
{
$link = $node->find('ancestor::a')?->attr('href');
$figcaption = $node->find('ancestor::figure[1]//figcaption');
$caption = $figcaption?->innerHTML($this->marks());
$caption = null;
$link = null;
// avoid parsing the caption twice
$figcaption?->remove();
if ($figcaption = $node->find('ancestor::figure[1]//figcaption')) {
$caption = $figcaption->innerHTML($this->marks());
// avoid parsing the caption twice
$figcaption->remove();
}
if ($a = $node->find('ancestor::a')) {
$link = $a->attr('href');
}
return [
'content' => [
@@ -173,6 +197,9 @@ class Blocks extends Plain
/**
* Converts a list element to HTML
*
* @param \Kirby\Parsley\Element $node
* @return string
*/
public function list(Element $node): string
{
@@ -182,30 +209,30 @@ class Blocks extends Plain
$innerHtml = '';
foreach ($li->children() as $child) {
if ($child instanceof DOMText) {
if (is_a($child, 'DOMText') === true) {
$innerHtml .= $child->textContent;
} elseif ($child instanceof DOMElement) {
} elseif (is_a($child, 'DOMElement') === true) {
$child = new Element($child);
$list = ['ul', 'ol'];
$innerHtml .= match (in_array($child->tagName(), $list)) {
true => $this->list($child),
default => $child->innerHTML($this->marks())
};
if (in_array($child->tagName(), ['ul', 'ol']) === true) {
$innerHtml .= $this->list($child);
} else {
$innerHtml .= $child->innerHTML($this->marks());
}
}
}
$html[] = '<li>' . trim($innerHtml) . '</li>';
}
$outerHtml = '<' . $node->tagName() . '>';
$outerHtml .= implode($html);
$outerHtml .= '</' . $node->tagName() . '>';
return $outerHtml;
return '<' . $node->tagName() . '>' . implode($html) . '</' . $node->tagName() . '>';
}
/**
* Returns a list of allowed inline marks
* and their parsing rules
*
* @return array
*/
public function marks(): array
{
@@ -214,7 +241,7 @@ class Blocks extends Plain
'tag' => 'a',
'attrs' => ['href', 'rel', 'target', 'title'],
'defaults' => [
'rel' => 'noreferrer'
'rel' => 'noopener noreferrer'
]
],
[
@@ -264,79 +291,114 @@ class Blocks extends Plain
* their parsing rules
*
* @codeCoverageIgnore
* @return array
*/
public function nodes(): array
{
return [
[
'tag' => 'blockquote',
'parse' => fn (Element $node) => $this->blockquote($node)
'parse' => function (Element $node) {
return $this->blockquote($node);
}
],
[
'tag' => 'h1',
'parse' => fn (Element $node) => $this->heading($node)
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h2',
'parse' => fn (Element $node) => $this->heading($node)
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h3',
'parse' => fn (Element $node) => $this->heading($node)
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h4',
'parse' => fn (Element $node) => $this->heading($node)
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h5',
'parse' => fn (Element $node) => $this->heading($node)
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h6',
'parse' => fn (Element $node) => $this->heading($node)
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'hr',
'parse' => fn (Element $node) => ['type' => 'line']
'parse' => function (Element $node) {
return [
'type' => 'line'
];
}
],
[
'tag' => 'iframe',
'parse' => fn (Element $node) => $this->iframe($node)
'parse' => function (Element $node) {
return $this->iframe($node);
}
],
[
'tag' => 'img',
'parse' => fn (Element $node) => $this->img($node)
'parse' => function (Element $node) {
return $this->img($node);
}
],
[
'tag' => 'ol',
'parse' => fn (Element $node) => [
'content' => [
'text' => $this->list($node)
],
'type' => 'list',
]
'parse' => function (Element $node) {
return [
'content' => [
'text' => $this->list($node)
],
'type' => 'list',
];
}
],
[
'tag' => 'pre',
'parse' => fn (Element $node) => $this->pre($node)
'parse' => function (Element $node) {
return $this->pre($node);
}
],
[
'tag' => 'table',
'parse' => fn (Element $node) => $this->table($node)
'parse' => function (Element $node) {
return $this->table($node);
}
],
[
'tag' => 'ul',
'parse' => fn (Element $node) => [
'content' => [
'text' => $this->list($node)
],
'type' => 'list',
]
'parse' => function (Element $node) {
return [
'content' => [
'text' => $this->list($node)
],
'type' => 'list',
];
}
],
];
}
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function pre(Element $node): array
{
$language = 'text';
@@ -359,6 +421,10 @@ class Blocks extends Plain
];
}
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function table(Element $node): array
{
return [

View File

@@ -23,10 +23,13 @@ class Plain extends Schema
/**
* Creates the fallback block type
* if no other block can be found
*
* @param \Kirby\Parsley\Element|string $element
* @return array|null
*/
public function fallback(Element|string $element): array|null
public function fallback($element): ?array
{
if ($element instanceof Element) {
if (is_a($element, Element::class) === true) {
$text = $element->innerText();
} elseif (is_string($element) === true) {
$text = trim($element);
@@ -49,6 +52,8 @@ class Plain extends Schema
/**
* Returns a list of all elements that
* should be skipped during parsing
*
* @return array
*/
public function skip(): array
{