829 lines
27 KiB
PHP
829 lines
27 KiB
PHP
<?php
|
|
|
|
/*
|
|
* This file is part of the commonmark-php package.
|
|
*
|
|
* (c) Colin O'Dell <colinodell@gmail.com>
|
|
*
|
|
* Original code based on stmd.js
|
|
* - (c) John MacFarlane
|
|
*
|
|
* For the full copyright and license information, please view the LICENSE
|
|
* file that was distributed with this source code.
|
|
*/
|
|
if (!defined('__TYPECHO_ROOT_DIR__')) exit;
|
|
|
|
|
|
/**
|
|
* Parses inline elements
|
|
*/
|
|
class CommonMark_InlineParser
|
|
{
|
|
/**
|
|
* @var string
|
|
*/
|
|
protected $subject;
|
|
|
|
/**
|
|
* @var int
|
|
*/
|
|
protected $labelNestLevel = 0; // Used by parseLinkLabel method
|
|
|
|
/**
|
|
* @var int
|
|
*/
|
|
protected $pos = 0;
|
|
|
|
/**
|
|
* @var ReferenceMap
|
|
*/
|
|
protected $refmap;
|
|
|
|
/**
|
|
* @var RegexHelper
|
|
*/
|
|
protected $regexHelper;
|
|
|
|
/**
|
|
* Constrcutor
|
|
*/
|
|
public function __construct()
|
|
{
|
|
$this->refmap = new CommonMark_Reference_ReferenceMap();
|
|
}
|
|
|
|
/**
|
|
* If re matches at current position in the subject, advance
|
|
* position in subject and return the match; otherwise return null
|
|
* @param string $re
|
|
*
|
|
* @return string|null The match (if found); null otherwise
|
|
*/
|
|
protected function match($re)
|
|
{
|
|
$matches = array();
|
|
$subject = substr($this->subject, $this->pos);
|
|
if (!preg_match($re, $subject, $matches, PREG_OFFSET_CAPTURE)) {
|
|
return null;
|
|
}
|
|
|
|
// [0][0] contains the matched text
|
|
// [0][1] contains the index of that match
|
|
$this->pos += $matches[0][1] + strlen($matches[0][0]);
|
|
|
|
return $matches[0][0];
|
|
}
|
|
|
|
/**
|
|
* Returns the character at the current subject position, or null if
|
|
* there are no more characters
|
|
*
|
|
* @return string|null
|
|
*/
|
|
protected function peek()
|
|
{
|
|
$ch = substr($this->subject, $this->pos, 1);
|
|
return false !== $ch && strlen($ch) > 0 ? $ch : null;
|
|
}
|
|
|
|
/**
|
|
* Parse zero or more space characters, including at most one newline
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function spnl()
|
|
{
|
|
$this->match('/^ *(?:\n *)?/');
|
|
|
|
return 1;
|
|
}
|
|
|
|
// All of the parsers below try to match something at the current position
|
|
// in the subject. If they succeed in matching anything, they
|
|
// push an inline element onto the 'inlines' list. They return the
|
|
// number of characters parsed (possibly 0).
|
|
|
|
/**
|
|
* Attempt to parse backticks, adding either a backtick code span or a
|
|
* literal sequence of backticks to the 'inlines' list.
|
|
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
|
|
*
|
|
* @return int Number of characters parsed
|
|
*/
|
|
protected function parseBackticks(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
$startpos = $this->pos;
|
|
$ticks = $this->match('/^`+/');
|
|
if (!$ticks) {
|
|
return 0;
|
|
}
|
|
|
|
$afterOpenTicks = $this->pos;
|
|
$foundCode = false;
|
|
$match = null;
|
|
while (!$foundCode && ($match = $this->match('/`+/m'))) {
|
|
if ($match == $ticks) {
|
|
$c = substr($this->subject, $afterOpenTicks, $this->pos - $afterOpenTicks - strlen($ticks));
|
|
$c = preg_replace('/[ \n]+/', ' ', $c);
|
|
$inlines->add(CommonMark_Element_InlineCreator::createCode(trim($c)));
|
|
|
|
return ($this->pos - $startpos);
|
|
}
|
|
}
|
|
|
|
// If we go here, we didn't match a closing backtick sequence
|
|
$inlines->add(CommonMark_Element_InlineCreator::createString($ticks));
|
|
$this->pos = $afterOpenTicks;
|
|
|
|
return ($this->pos - $startpos);
|
|
}
|
|
|
|
/**
|
|
* Parse a backslash-escaped special character, adding either the escaped
|
|
* character, a hard line break (if the backslash is followed by a newline),
|
|
* or a literal backslash to the 'inlines' list.
|
|
*
|
|
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseEscaped(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
$subject = $this->subject;
|
|
$pos = $this->pos;
|
|
if ($subject[$pos] === '\\') {
|
|
if (isset($subject[$pos + 1]) && $subject[$pos + 1] === "\n") {
|
|
$inlines->add(CommonMark_Element_InlineCreator::createHardbreak());
|
|
$this->pos = $this->pos + 2;
|
|
|
|
return 2;
|
|
} elseif (isset($subject[$pos + 1]) && preg_match(
|
|
'/' . CommonMark_Util_RegexHelper::REGEX_ESCAPABLE . '/',
|
|
$subject[$pos + 1]
|
|
)
|
|
) {
|
|
$inlines->add(CommonMark_Element_InlineCreator::createString($subject[$pos + 1]));
|
|
$this->pos = $this->pos + 2;
|
|
|
|
return 2;
|
|
} else {
|
|
$this->pos++;
|
|
$inlines->add(CommonMark_Element_InlineCreator::createString('\\'));
|
|
|
|
return 1;
|
|
}
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Attempt to parse an autolink (URL or email in pointy brackets)
|
|
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseAutolink(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
$emailRegex = '/^<([a-zA-Z0-9.!#$%&\'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/';
|
|
$otherLinkRegex = '/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i';
|
|
|
|
if ($m = $this->match($emailRegex)) {
|
|
$email = substr($m, 1, -1);
|
|
$inlines->add(CommonMark_Element_InlineCreator::createLink('mailto:' . $email, $email));
|
|
|
|
return strlen($m);
|
|
} elseif ($m = $this->match($otherLinkRegex)) {
|
|
$dest = substr($m, 1, -1);
|
|
$inlines->add(CommonMark_Element_InlineCreator::createLink($dest, $dest));
|
|
|
|
return strlen($m);
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Attempt to parse a raw HTML tag
|
|
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseHtmlTag(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
if ($m = $this->match(CommonMark_Util_RegexHelper::getInstance()->getHtmlTagRegex())) {
|
|
$inlines->add(CommonMark_Element_InlineCreator::createHtml($m));
|
|
|
|
return strlen($m);
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Scan a sequence of characters == c, and return information about
|
|
* the number of delimiters and whether they are positioned such that
|
|
* they can open and/or close emphasis or strong emphasis. A utility
|
|
* function for strong/emph parsing.
|
|
*
|
|
* @param string $char
|
|
*
|
|
* @return array
|
|
*/
|
|
protected function scanDelims($char)
|
|
{
|
|
$numDelims = 0;
|
|
$startPos = $this->pos;
|
|
|
|
$charBefore = $this->pos === 0 ? "\n" : $this->subject[$this->pos - 1];
|
|
|
|
while ($this->peek() === $char) {
|
|
$numDelims++;
|
|
$this->pos++;
|
|
}
|
|
|
|
$charAfter = $this->peek() ? : "\n";
|
|
|
|
$canOpen = $numDelims > 0 && $numDelims <= 3 && !preg_match('/\s/', $charAfter);
|
|
$canClose = $numDelims > 0 && $numDelims <= 3 && !preg_match('/\s/', $charBefore);
|
|
if ($char === '_') {
|
|
$canOpen = $canOpen && !preg_match('/[a-z0-9]/i', $charBefore);
|
|
$canClose = $canClose && !preg_match('/[a-z0-9]/i', $charAfter);
|
|
}
|
|
|
|
$this->pos = $startPos;
|
|
|
|
return compact('numDelims', 'canOpen', 'canClose');
|
|
}
|
|
|
|
/**
|
|
* @param ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseEmphasis(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
$startPos = $this->pos;
|
|
$firstClose = 0;
|
|
$nxt = $this->peek();
|
|
if ($nxt == '*' || $nxt == '_') {
|
|
$c = $nxt;
|
|
} else {
|
|
return 0;
|
|
}
|
|
|
|
// Get opening delimiters
|
|
$res = $this->scanDelims($c);
|
|
$numDelims = $res['numDelims'];
|
|
$this->pos += $numDelims;
|
|
|
|
// We provisionally add a literal string. If we match appropriate
|
|
// closing delimiters, we'll change this to Strong or Emph.
|
|
$inlines->add(CommonMark_Element_InlineCreator::createString(substr($this->subject, $this->pos - $numDelims, $numDelims)));
|
|
// Record the position of this opening delimiter:
|
|
$delimPos = $inlines->count() - 1;
|
|
|
|
if (!$res['canOpen'] || $numDelims === 0) {
|
|
return 0;
|
|
}
|
|
|
|
$firstCloseDelims = 0;
|
|
switch ($numDelims) {
|
|
case 1: // we started with * or _
|
|
while (true) {
|
|
$res = $this->scanDelims($c);
|
|
if ($res['numDelims'] >= 1 && $res['canClose']) {
|
|
$this->pos += 1;
|
|
// Convert the inline at delimpos, currently a string with the delim,
|
|
// into an Emph whose contents are the succeeding inlines
|
|
$inlines->get($delimPos)->setType(CommonMark_Element_InlineElement::TYPE_EMPH);
|
|
$inlines->get($delimPos)->setContents($inlines->slice($delimPos + 1));
|
|
$inlines->splice($delimPos + 1);
|
|
break;
|
|
} else {
|
|
if ($this->parseInline($inlines) === 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ($this->pos - $startPos);
|
|
|
|
case 2: // We started with ** or __
|
|
while (true) {
|
|
$res = $this->scanDelims($c);
|
|
if ($res['numDelims'] >= 2 && $res['canClose']) {
|
|
$this->pos += 2;
|
|
$inlines->get($delimPos)->setType(CommonMark_Element_InlineElement::TYPE_STRONG);
|
|
$inlines->get($delimPos)->setContents($inlines->slice($delimPos + 1));
|
|
$inlines->splice($delimPos + 1);
|
|
break;
|
|
} else {
|
|
if ($this->parseInline($inlines) === 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ($this->pos - $startPos);
|
|
|
|
case 3: // We started with *** or ___
|
|
while (true) {
|
|
$res = $this->scanDelims($c);
|
|
if ($res['numDelims'] >= 1 && $res['numDelims'] <= 3 && $res['canClose'] && $res['numDelims'] != $firstCloseDelims) {
|
|
if ($firstCloseDelims === 1 && $numDelims > 2) {
|
|
$res['numDelims'] = 2;
|
|
} elseif ($firstCloseDelims === 2) {
|
|
$res['numDelims'] = 1;
|
|
} elseif ($res['numDelims'] === 3) {
|
|
// If we opened with ***, then we interpret *** as ** followed by *
|
|
// giving us <strong><em>
|
|
$res['numDelims'] = 1;
|
|
}
|
|
|
|
$this->pos += $res['numDelims'];
|
|
|
|
if ($firstClose > 0) { // if we've already passed the first closer:
|
|
$targetInline = $inlines->get($delimPos);
|
|
if ($firstCloseDelims === 1) {
|
|
$targetInline->setType(CommonMark_Element_InlineElement::TYPE_STRONG);
|
|
$targetInline->setContents(
|
|
array(
|
|
CommonMark_Element_InlineCreator::createEmph(
|
|
$inlines->slice($delimPos + 1, $firstClose - $delimPos - 1)
|
|
)
|
|
)
|
|
);
|
|
} else {
|
|
$targetInline->setType(CommonMark_Element_InlineElement::TYPE_EMPH);
|
|
$targetInline->setContents(
|
|
array(
|
|
CommonMark_Element_InlineCreator::createStrong(
|
|
$inlines->slice($delimPos + 1, $firstClose - $delimPos - 1)
|
|
)
|
|
)
|
|
);
|
|
}
|
|
|
|
$targetInline->setContents($targetInline->getContents() + $inlines->slice($firstClose + 1));
|
|
$inlines->splice($delimPos + 1);
|
|
break;
|
|
} else {
|
|
// this is the first closer; for now, add literal string;
|
|
// we'll change this when he hit the second closer
|
|
$str = substr($this->subject, $this->pos - $res['numDelims'], $this->pos);
|
|
$inlines->add(CommonMark_Element_InlineCreator::createString($str));
|
|
$firstClose = $inlines->count() - 1;
|
|
$firstCloseDelims = $res['numDelims'];
|
|
}
|
|
} else {
|
|
// Parse another inline element, til we hit the end
|
|
if ($this->parseInline($inlines) === 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ($this->pos - $startPos);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Attempt to parse link title (sans quotes)
|
|
*
|
|
* @return null|string The string, or null if no match
|
|
*/
|
|
protected function parseLinkTitle()
|
|
{
|
|
if ($title = $this->match(CommonMark_Util_RegexHelper::getInstance()->getLinkTitleRegex())) {
|
|
// Chop off quotes from title and unescape
|
|
return CommonMark_Util_RegexHelper::unescape(substr($title, 1, strlen($title) - 2));
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Attempt to parse link destination
|
|
*
|
|
* @return null|string The string, or null if no match
|
|
*/
|
|
protected function parseLinkDestination()
|
|
{
|
|
if ($res = $this->match(CommonMark_Util_RegexHelper::getInstance()->getLinkDestinationBracesRegex())) {
|
|
// Chop off surrounding <..>:
|
|
return CommonMark_Util_RegexHelper::unescape(substr($res, 1, strlen($res) - 2));
|
|
} else {
|
|
$res = $this->match(CommonMark_Util_RegexHelper::getInstance()->getLinkDestinationRegex());
|
|
if ($res !== null) {
|
|
return CommonMark_Util_RegexHelper::unescape($res);
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return int
|
|
*/
|
|
protected function parseLinkLabel()
|
|
{
|
|
if ($this->peek() != '[') {
|
|
return 0;
|
|
}
|
|
|
|
$startPos = $this->pos;
|
|
$nestLevel = 0;
|
|
if ($this->labelNestLevel > 0) {
|
|
// If we've already checked to the end of this subject
|
|
// for a label, even with a different starting [, we
|
|
// know we won't find one here and we can just return.
|
|
// This avoids lots of backtracking.
|
|
// Note: nest level 1 would be: [foo [bar]
|
|
// nest level 2 would be: [foo [bar [baz]
|
|
$this->labelNestLevel--;
|
|
|
|
return 0;
|
|
}
|
|
|
|
$this->pos++; // Advance past [
|
|
while (($c = $this->peek()) !== null && ($c != ']' || $nestLevel > 0)) {
|
|
switch ($c) {
|
|
case '`':
|
|
$this->parseBackticks(new CommonMark_Util_ArrayCollection());
|
|
break;
|
|
case '<':
|
|
$this->parseAutolink(new CommonMark_Util_ArrayCollection()) || $this->parseHtmlTag(
|
|
new CommonMark_Util_ArrayCollection()
|
|
) || $this->parseString(new CommonMark_Util_ArrayCollection()); // TODO: Does PHP support this use of "||"?
|
|
break;
|
|
case '[': // nested []
|
|
$nestLevel++;
|
|
$this->pos++;
|
|
break;
|
|
case ']': //nested []
|
|
$nestLevel--;
|
|
$this->pos++;
|
|
break;
|
|
case '\\':
|
|
$this->parseEscaped(new CommonMark_Util_ArrayCollection());
|
|
break;
|
|
default:
|
|
$this->parseString(new CommonMark_Util_ArrayCollection());
|
|
}
|
|
}
|
|
|
|
if ($c === ']') {
|
|
$this->labelNestLevel = 0;
|
|
$this->pos++; // advance past ]
|
|
|
|
return $this->pos - $startPos;
|
|
} else {
|
|
if ($c === null) {
|
|
$this->labelNestLevel = $nestLevel;
|
|
}
|
|
|
|
$this->pos = $startPos;
|
|
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse raw link label, including surrounding [], and return
|
|
* inline contents.
|
|
*
|
|
* @param string $s
|
|
*
|
|
* @return ArrayCollection|InlineElementInterface[] Inline contents
|
|
*/
|
|
private function parseRawLabel($s)
|
|
{
|
|
// note: parse without a refmap; we don't want links to resolve
|
|
// in nested brackets!
|
|
$parser = new self();
|
|
$substring = substr($s, 1, strlen($s) - 2);
|
|
|
|
return $parser->parse($substring, new CommonMark_Reference_ReferenceMap());
|
|
}
|
|
|
|
/**
|
|
* Attempt to parse a link. If successful, add the link to inlines.
|
|
* @param ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseLink(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
$startPos = $this->pos;
|
|
$n = $this->parseLinkLabel();
|
|
if ($n === 0) {
|
|
return 0;
|
|
}
|
|
|
|
$rawLabel = substr($this->subject, $startPos, $n);
|
|
|
|
// if we got this far, we've parsed a label.
|
|
// Try to parse an explicit link: [label](url "title")
|
|
if ($this->peek() == '(') {
|
|
$this->pos++;
|
|
if ($this->spnl() &&
|
|
(($dest = $this->parseLinkDestination()) !== null) &&
|
|
$this->spnl()
|
|
) {
|
|
// make sure there's a space before the title:
|
|
if (preg_match('/^\\s/', $this->subject[$this->pos - 1])) {
|
|
$title = $this->parseLinkTitle() ? : '';
|
|
} else {
|
|
$title = null;
|
|
}
|
|
|
|
if ($this->spnl() && $this->match('/^\\)/')) {
|
|
$inlines->add(CommonMark_Element_InlineCreator::createLink($dest, $this->parseRawLabel($rawLabel), $title));
|
|
|
|
return $this->pos - $startPos;
|
|
}
|
|
}
|
|
|
|
$this->pos = $startPos;
|
|
|
|
return 0;
|
|
}
|
|
|
|
// If we're here, it wasn't an explicit link. Try to parse a reference link.
|
|
// first, see if there's another label
|
|
$savePos = $this->pos;
|
|
$this->spnl();
|
|
$beforeLabel = $this->pos;
|
|
$n = $this->parseLinkLabel();
|
|
if ($n == 2) {
|
|
// empty second label
|
|
$refLabel = $rawLabel;
|
|
} elseif ($n > 0) {
|
|
$refLabel = substr($this->subject, $beforeLabel, $n);
|
|
} else {
|
|
$this->pos = $savePos;
|
|
$refLabel = $rawLabel;
|
|
}
|
|
|
|
// Lookup rawLabel in refmap
|
|
if ($link = $this->refmap->getReference($refLabel)) {
|
|
$inlines->add(
|
|
CommonMark_Element_InlineCreator::createLink($link->getDestination(), $this->parseRawLabel($rawLabel), $link->getTitle())
|
|
);
|
|
|
|
return $this->pos - $startPos;
|
|
}
|
|
|
|
// Nothing worked, rewind:
|
|
$this->pos = $startPos;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Attempt to parse an entity, adding to inlines if successful
|
|
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseEntity(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
if ($m = $this->match('/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i')) {
|
|
$inlines->add(CommonMark_Element_InlineCreator::createEntity($m));
|
|
|
|
return strlen($m);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Parse a run of ordinary characters, or a single character with
|
|
* a special meaning in markdown, as a plain string, adding to inlines.
|
|
*
|
|
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseString(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
if ($m = $this->match(CommonMark_Util_RegexHelper::getInstance()->getMainRegex())) {
|
|
$inlines->add(CommonMark_Element_InlineCreator::createString($m));
|
|
|
|
return strlen($m);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Parse a newline. If it was preceded by two spaces, return a hard
|
|
* line break; otherwise a soft line break.
|
|
*
|
|
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseNewline(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
if ($this->peek() == "\n") {
|
|
$this->pos++;
|
|
$last = $inlines->last();
|
|
if ($last && $last->getType() == CommonMark_Element_InlineElement::TYPE_STRING && substr($last->getContents(), -2) == ' ') {
|
|
$last->setContents(rtrim($last->getContents(), ' '));
|
|
$inlines->add(CommonMark_Element_InlineCreator::createHardbreak());
|
|
} else {
|
|
if ($last && $last->getType() == CommonMark_Element_InlineElement::TYPE_STRING && substr(
|
|
$last->getContents(),
|
|
-1
|
|
) == ' '
|
|
) {
|
|
$last->setContents(substr($last->getContents(), 0, -1));
|
|
}
|
|
$inlines->add(CommonMark_Element_InlineCreator::createSoftbreak());
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* @param ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*
|
|
* @throws \RuntimeException
|
|
*/
|
|
protected function parseImage(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
if ($this->match('/^!/')) {
|
|
$n = $this->parseLink($inlines);
|
|
if ($n === 0) {
|
|
$inlines->add(CommonMark_Element_InlineCreator::createString('!'));
|
|
|
|
return 1;
|
|
}
|
|
|
|
/** @var InlineElementInterface $last */
|
|
$last = $inlines->last();
|
|
|
|
if ($last && $last->getType() == CommonMark_Element_InlineElement::TYPE_LINK) {
|
|
$last->setType(CommonMark_Element_InlineElement::TYPE_IMAGE);
|
|
|
|
return $n + 1;
|
|
} else {
|
|
// This shouldn't happen
|
|
throw new RuntimeException('Unknown error occurred while attempting to parse an image');
|
|
}
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse the next inline element in subject, advancing subject position
|
|
* and adding the result to 'inlines'.
|
|
*
|
|
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
|
|
*
|
|
* @return int
|
|
*/
|
|
protected function parseInline(CommonMark_Util_ArrayCollection $inlines)
|
|
{
|
|
$c = $this->peek();
|
|
$res = null;
|
|
|
|
switch ($c) {
|
|
case "\n":
|
|
$res = $this->parseNewline($inlines);
|
|
break;
|
|
case '\\':
|
|
$res = $this->parseEscaped($inlines);
|
|
break;
|
|
case '`':
|
|
$res = $this->parseBackticks($inlines);
|
|
break;
|
|
case '*':
|
|
case '_':
|
|
$res = $this->parseEmphasis($inlines);
|
|
break;
|
|
case '[':
|
|
$res = $this->parseLink($inlines);
|
|
break;
|
|
case '!':
|
|
$res = $this->parseImage($inlines);
|
|
break;
|
|
case '<':
|
|
$res = $this->parseAutolink($inlines) ? : $this->parseHtmlTag($inlines);
|
|
break;
|
|
case '&':
|
|
$res = $this->parseEntity($inlines);
|
|
break;
|
|
default:
|
|
// Nothing
|
|
}
|
|
|
|
return $res ? : $this->parseString($inlines);
|
|
}
|
|
|
|
/**
|
|
* Parse s as a list of inlines, using refmap to resolve references.
|
|
*
|
|
* @param string $s
|
|
* @param ReferenceMap $refMap
|
|
*
|
|
* @return ArrayCollection|InlineElementInterface[]
|
|
*/
|
|
protected function parseInlines($s, CommonMark_Reference_ReferenceMap $refMap)
|
|
{
|
|
$this->subject = $s;
|
|
$this->pos = 0;
|
|
$this->refmap = $refMap;
|
|
$inlines = new CommonMark_Util_ArrayCollection();
|
|
while ($this->parseInline($inlines)) {
|
|
;
|
|
}
|
|
|
|
return $inlines;
|
|
}
|
|
|
|
/**
|
|
* @param string $s
|
|
* @param ReferenceMap $refMap
|
|
*
|
|
* @return ArrayCollection|Element\InlineElementInterface[]
|
|
*/
|
|
public function parse($s, CommonMark_Reference_ReferenceMap $refMap)
|
|
{
|
|
return $this->parseInlines($s, $refMap);
|
|
}
|
|
|
|
/**
|
|
* Attempt to parse a link reference, modifying refmap.
|
|
* @param string $s
|
|
* @param ReferenceMap $refMap
|
|
*
|
|
* @return int
|
|
*/
|
|
public function parseReference($s, CommonMark_Reference_ReferenceMap $refMap)
|
|
{
|
|
$this->subject = $s;
|
|
$this->pos = 0;
|
|
$startPos = $this->pos;
|
|
|
|
// label:
|
|
$matchChars = $this->parseLinkLabel();
|
|
if ($matchChars === 0) {
|
|
return 0;
|
|
} else {
|
|
$label = substr($this->subject, 0, $matchChars);
|
|
}
|
|
|
|
// colon:
|
|
if ($this->peek() === ':') {
|
|
$this->pos++;
|
|
} else {
|
|
$this->pos = $startPos;
|
|
|
|
return 0;
|
|
}
|
|
|
|
// link url
|
|
$this->spnl();
|
|
|
|
$destination = $this->parseLinkDestination();
|
|
if ($destination === null || strlen($destination) === 0) {
|
|
$this->pos = $startPos;
|
|
|
|
return 0;
|
|
}
|
|
|
|
$beforeTitle = $this->pos;
|
|
$this->spnl();
|
|
$title = $this->parseLinkTitle();
|
|
if ($title === null) {
|
|
$title = '';
|
|
// rewind before spaces
|
|
$this->pos = $beforeTitle;
|
|
}
|
|
|
|
// make sure we're at line end:
|
|
if ($this->match('/^ *(?:\n|$)/') === null) {
|
|
$this->pos = $startPos;
|
|
|
|
return 0;
|
|
}
|
|
|
|
if (!$refMap->contains($label)) {
|
|
$refMap->addReference(new CommonMark_Reference_Reference($label, $destination, $title));
|
|
}
|
|
|
|
return $this->pos - $startPos;
|
|
}
|
|
}
|