typecho/var/Typecho/AutoP.php

<?php

/**
 * AutoP
 *
 * @copyright Copyright (c) 2012 Typecho Team. (http://typecho.org)
 * @author Joyqi <magike.net@gmail.com>
 * @license GNU General Public License 2.0
 */
class AutoP
{
    // 作为段落的标签
    const BLOCK = 'p|pre|div|blockquote|form|ul|ol|dd|table|ins|h1|h2|h3|h4|h5|h6';

    /**
     * 唯一id
     *
     * @access private
     * @var integer
     */
    private $_uniqueId = 0;

    /**
     * 存储的段落
     *
     * @access private
     * @var array
     */
    private $_blocks = array();

    /**
     * 生成唯一的id, 为了速度考虑最多支持1万个tag的处理
     *
     * @access private
     * @return string
     */
    private function makeUniqueId()
    {
        return ':' . str_pad($this->_uniqueId ++, 4, '0', STR_PAD_LEFT);
    }

    /**
     * 用段落方法处理换行
     *
     * @access private
     * @param string $text
     * @return string
     */
    private function cutByBlock($text)
    {
        $space = "( |　)";
        $text = preg_replace("/{$space}*\n{$space}*/is", "\n", $text);
        $text = preg_replace("/\s*<p:([0-9]{4})\/>\s*/is", "</p><p:\\1/><p>", $text);
        $text = preg_replace("/\n{2,}/", "</p><p>", $text);
        $text = nl2br($text);
        $text = preg_replace("/(<p>)?\s*<p:([0-9]{4})\/>\s*(<\/p>)?/is", "<p:\\2/>", $text);
        $text = preg_replace("/<p>{$space}*<\/p>/is", '', $text);
        $text = preg_replace("/\s*<p>\s*$/is", '', $text);
        $text = preg_replace("/^\s*<\/p>\s*/is", '', $text);
        return $text;
    }

    /**
     * 修复段落开头和结尾
     *
     * @access private
     * @param string $text
     * @return string
     */
    private function fixPragraph($text)
    {
        $text = trim($text);
        if (!preg_match("/^<(" . self::BLOCK . ")(\s|>)/i", $text)) {
            $text = '<p>' . $text;
        }

        if (!preg_match("/<\/(" . self::BLOCK . ")>$/i", $text)) {
            $text = $text . '</p>';
        }

        return $text;
    }

    /**
     * 替换段落的回调函数
     *
     * @access public
     * @param array $matches 匹配值
     * @return string
     */
    public function replaceBlockCallback($matches)
    {
        $tagMatch = '|' . $matches[1] . '|';
        $text = $matches[4];

        switch (true) {
            /** 用br处理换行 */
            case false !== strpos('|li|dd|dt|td|p|a|span|code|cite|strong|sup|sub|small|del|u|i|b|ins|h1|h2|h3|h4|h5|h6|', $tagMatch):
                $text = nl2br(trim($text));
                break;
            /** 用段落处理换行 */
            case false !== strpos('|div|blockquote|form|', $tagMatch):
                $text = $this->cutByBlock($text);
                if (false !== strpos($text, '</p><p>')) {
                    $text = $this->fixPragraph($text);
                }
                break;
            default:
                break;
        }

        /** 没有段落能力的标签 */
        if (false !== strpos('|a|span|code|cite|strong|sup|sub|small|del|u|i|b|', $tagMatch)) {
            $key = '<b' . $matches[2] . '/>';
        } else {
            $key = '<p' . $matches[2] . '/>';
        }

        $this->_blocks[$key] = "<{$matches[1]}{$matches[3]}>{$text}</{$matches[1]}>";
        return $key;
    }

    /**
     * 自动分段
     *
     * @param string $text
     * @static
     * @access private
     * @return string
     */
    public function parse($text)
    {
        /** 重置计数器 */
        $this->_uniqueId = 0;
        $this->_blocks = array();

        /** 将已有的段落后面的换行处理掉 */
        $text = preg_replace(array("/<\/p>\s+<p(\s*)/is", "/\s*<br\s*\/?>\s*/is"), array("</p><p\\1", "<br />"), trim($text));

        /** 将所有非自闭合标签解析为唯一的字符串 */
        $foundTagCount = 0;
        $textLength = strlen($text);
        $uniqueIdList = array();

        if (preg_match_all("/<\/\s*([a-z0-9]+)>/is", $text, $matches, PREG_OFFSET_CAPTURE)) {
            foreach ($matches[0] as $key => $match) {
                $tag = $matches[1][$key][0];

                $leftOffset = $match[1] - $textLength;
                $posSingle = strrpos($text, '<' . $tag . '>', $leftOffset);
                $posFix = strrpos($text, '<' . $tag . ' ', $leftOffset);
                $pos = false;

                switch (true) {
                    case (false !== $posSingle && false !== $posFix):
                        $pos = max($posSingle, $posFix);
                        break;
                    case false === $posSingle && false !== $posFix:
                        $pos = $posFix;
                        break;
                    case false !== $posSingle && false === $posFix:
                        $pos = $posSingle;
                        break;
                    default:
                        break;
                }

                if (false !== $pos) {
                    $uniqueId = $this->makeUniqueId();
                    $uniqueIdList[$uniqueId] = $tag;
                    $tagLength = strlen($tag);

                    $text = substr_replace($text, $uniqueId, $pos + 1 + $tagLength, 0);
                    $text = substr_replace($text, $uniqueId, $match[1] + 7 + $foundTagCount * 10 + $tagLength, 0); // 7 = 5 + 2
                    $foundTagCount ++;
                }
            }
        }

        foreach ($uniqueIdList as $uniqueId => $tag) {
            $text = preg_replace_callback("/<({$tag})({$uniqueId})([^>]*)>(.*)<\/\\1\\2>/is",
                array($this, 'replaceBlockCallback'), $text, 1);
        }

        $text = $this->cutByBlock($text);
        $blocks = array_reverse($this->_blocks);

        foreach ($blocks as $blockKey => $blockValue) {
            $text = str_replace($blockKey, $blockValue, $text);
        }

        return $this->fixPragraph($text);
    }
}