prepare(); if (ltrim($text) === '') { return ''; } $text = $this->preprocess($text); $this->prepareMarkers($text); $absy = $this->parseBlocks(explode("\n", $text)); $markup = $this->renderAbsy($absy); $markup = $this->postprocess($markup); $this->cleanup(); return $markup; } /** * Parses a paragraph ignoring block elements. * * @param string $text - The text to parse. * @return string - Parsed markup. */ public function parseParagraph($text): string { $this->prepare(); if (ltrim($text) === '') { return ''; } $text = $this->preprocess($text); $this->prepareMarkers($text); $absy = $this->parseInline($text); $markup = $this->renderAbsy($absy); $markup = $this->postprocess($markup); $this->cleanup(); return $markup; } /** * Pre-processes text before parsing. * * @param string $text - The text to parse. * @return string - The pre-processed text. */ protected function preprocess($text): string { if ($this->convertTabsToSpaces) { $text = str_replace("\t", " ", $text); } $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text); return $text; } /** * Post-processes markup after parsing. * * @param string $markup - Parsed markup. * @return string - Post-processed markup. */ protected function postprocess($markup): string { $safeChr = "\u{FFFD}"; $markup = rtrim($markup, "\n"); $markup = str_replace("\0", $safeChr, $markup); $markup = preg_replace('/[Xx]?0+;/', $safeChr, $markup); return $markup; } /** * Get the identifier for this rendering context. * * @return string - The identifier. */ public function getContextId(): string { return $this->_contextId; } /** * Set the identifier for this rendering context. * * @param string $string - Identifier to set. * @return string - The identifier. */ public function setContextId($string): string { $id = str_replace( ['&', '<', '>', '"', ' '], '', strval($string) ); return $this->_contextId = $id; } /** * This method will be called before `parse()` and `parseParagraph()`. * You can override it to do some initialization work. */ protected function prepare(): void { } /** * This method will be called after `parse()` and `parseParagraph()`. * You can override it to do cleanup. */ protected function cleanup(): void { } #--------------------------------------------- # Block parsing #--------------------------------------------- private $_blockTypes; /** * Detect registered block types. * * @return array - A list of block element types available. */ protected function blockTypes(): array { if ($this->_blockTypes === null) { // Detect block types via "identify" methods. $reflection = new ReflectionClass($this); $this->_blockTypes = array_filter( array_map( function($method) { $methodName = $method->getName(); return str_starts_with($methodName, 'identify') ? substr($methodName, 8) : false; }, $reflection->getMethods(ReflectionMethod::IS_PROTECTED) ) ); // Merge the predefined call order with the array of detected methods. $this->_blockTypes = array_unique( array_merge( $this->blockPriorities, $this->_blockTypes ), SORT_STRING ); } return $this->_blockTypes; } /** * Given a set of lines and an index of a current line it uses * the registered block types to detect the type of this line. * * @param array $lines * @param integer $current * @return string - Name of the block type in lower case. */ protected function detectLineType($lines, $current): string { $line = $lines[$current]; $blockTypes = $this->blockTypes(); foreach($blockTypes as $blockType) { if ($this->{'identify' . $blockType}($line, $lines, $current)) { return $blockType; } } // Consider the line a normal paragraph if no other block type matches. return 'paragraph'; } /** * Parse block elements by calling `detectLineType()` to identify them * and call consume function afterwards. * * @param array $lines * @return array */ protected function parseBlocks($lines): array { if ($this->_depth >= $this->maximumNestingLevel) { // Maximum depth is reached; do not parse input. if ($this->maximumNestingLevelThrow) { throw new RuntimeException( 'Parser exceeded maximum nesting level' ); } return [['text', implode("\n", $lines)]]; } $this->_depth++; $blocks = []; // Convert lines to blocks. for ($i = 0, $count = count($lines); $i < $count; $i++) { $line = $lines[$i]; if ($line !== '' && rtrim($line) !== '') { // Skip empty lines. // Identify beginning of a block and parse the content. list($block, $i) = $this->parseBlock($lines, $i); if ($block !== false) { $blocks[] = $block; } } } $this->_depth--; return $blocks; } /** * Parses the block at current line by identifying the block type * and parsing the content. * * @param $lines * @param $current * @return array - Array of two elements: * (array) The parsed block; * (int) The the next line index to be parsed. */ protected function parseBlock($lines, $current): array { // Identify block type for this line. $blockType = $this->detectLineType($lines, $current); // Call consume method for the detected block type // to consume further lines. return $this->{'consume' . $blockType}($lines, $current); } /** * Renders a Markdown abstract syntax tree as HTML. * * @param array $blocks * @return string */ protected function renderAbsy($blocks): string { $output = ''; foreach ($blocks as $block) { array_unshift($this->context, $block[0]); $output .= $this->{'render' . $block[0]}($block); array_shift($this->context); } return $output; } /** * Consume lines for a paragraph. * * @param array $lines * @param integer $current * @return array */ protected function consumeParagraph($lines, $current): array { $content = []; // Consume until blank line... for ($i = $current, $count = count($lines); $i < $count; $i++) { if (ltrim($lines[$i]) !== '') { $content[] = trim($lines[$i]); } else { break; } } $block = [ 'paragraph', 'content' => $this->parseInline(implode("\n", $content)), ]; return [$block, --$i]; } /** * Render a paragraph block. * * @param array $block * @return string */ protected function renderParagraph($block): string { return '
' . $this->renderAbsy($block['content']) . "
\n"; } #--------------------------------------------- # Inline parsing #--------------------------------------------- private $_inlineMarkers = []; /** * Returns a map of inline markers to the corresponding parser methods. * * This array defines handler methods for inline markdown markers. * When a marker is found in the text, the handler method is called with the text * starting at the position of the marker. * * Note that markers starting with whitespace may slow down the parser, * so it may be better to use [[renderText]] to deal with them instead. * * You may override this method to define a set of markers and parsing methods. * The default implementation looks for protected methods starting with `parse` * with a matching `Markers` method. E.g. parseEscape() and parseEscapeMarkers(). * * @return array - A map of markers to parser methods. */ protected function inlineMarkers(): array { $markers = []; // Detect "parse" functions. $reflection = new ReflectionClass($this); foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) { $methodName = $method->getName(); if ( str_starts_with($methodName, 'parse') && !str_ends_with($methodName, 'Markers') ) { if (method_exists($this, $methodName.'Markers')) { $array = call_user_func(array($this, $methodName.'Markers')); foreach($array as $marker) { $markers[$marker] = $methodName; } } } } return $markers; } /** * Prepare markers that are used in the text to parse. * * @param string $text */ protected function prepareMarkers($text): void { $this->_inlineMarkers = []; foreach ($this->inlineMarkers() as $marker => $method) { if (strpos($text, $marker) !== false) { $m = $marker[0]; // Put the longest marker first. if (isset($this->_inlineMarkers[$m])) { reset($this->_inlineMarkers[$m]); if (strlen($marker) >= strlen(key($this->_inlineMarkers[$m]))) { $this->_inlineMarkers[$m] = array_merge( [$marker => $method], $this->_inlineMarkers[$m] ); continue; } } $this->_inlineMarkers[$m][$marker] = $method; } } } /** * Parses inline elements of the language. * * @param string $text - The inline text to parse. * @return array */ protected function parseInline($text): array { if ($this->_depth >= $this->maximumNestingLevel) { // Maximum depth is reached; do not parse input. if ($this->maximumNestingLevelThrow) { throw new RuntimeException( 'Parser exceeded maximum nesting level' ); } return [['text', $text]]; } $this->_depth++; $markers = implode('', array_keys($this->_inlineMarkers)); $paragraph = []; while ( !empty($markers) && ($found = strpbrk($text, $markers)) !== false ) { $pos = strpos($text, $found); // Add the text up to next marker to the paragraph. if ($pos !== 0) { $paragraph[] = ['text', substr($text, 0, $pos)]; } $text = $found; $parsed = false; foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) { if (str_starts_with($text, $marker)) { // Parse the marker. array_unshift($this->context, $method); list($output, $offset) = $this->$method($text); array_shift($this->context); $paragraph[] = $output; $text = substr($text, $offset); $parsed = true; break; } } if (!$parsed) { $paragraph[] = ['text', substr($text, 0, 1)]; $text = substr($text, 1); } } $paragraph[] = ['text', $text]; $this->_depth--; return $paragraph; } /** * Declares inline markers for the corresponding parser method. * * @return array */ protected function parseEscapeMarkers(): array { return array('\\'); } /** * Parses escaped special characters. * * @marker \ */ protected function parseEscape($text): array { if ( isset($text[1]) && in_array($text[1], $this->escapeCharacters) ) { $chr = $this->escapeHtmlEntities($text[1], ENT_COMPAT); return [['text', $chr], 2]; } return [['text', $text[0]], 1]; } /** * This function renders plain text sections in the markdown text. * It can be used to work on normal text sections. * E.g. to highlight keywords or do special escaping. */ protected function renderText($block): string { return $block[1]; } /** * Add backslash to escapeable characters in text. * * @param string $text * @return string */ protected function escapeBackslash($text): string { $strtr = []; foreach($this->escapeCharacters as $chr) { $strtr[$chr] = "\\$chr"; } return strtr($text, $strtr); } /** * Remove backslash from escaped characters in text. * * @param string $text * @return string */ protected function unEscapeBackslash($text): string { $strtr = []; foreach($this->escapeCharacters as $chr) { $strtr["\\$chr"] = $chr; } return strtr($text, $strtr); } /** * Encode HTML special characters as HTML entities. * * @param string $text * @param integer $flags * @return string * @see https://www.php.net/manual/en/function.htmlspecialchars */ protected function escapeHtmlEntities($text, $flags = 0): string { $ent = $this->html5 ? ENT_HTML5 : ENT_HTML401; $text = htmlspecialchars($text, $flags | $ent, 'UTF-8'); return $text; } /** * Decode HTML entities to corresponding characters. * * @param string $text * @param integer $flags * @return string * @see https://www.php.net/manual/en/function.html-entity-decode */ protected function unEscapeHtmlEntities($text, $flags = 0): string { $ent = $this->html5 ? ENT_HTML5 : ENT_HTML401; $text = html_entity_decode($text, $flags | $ent, 'UTF-8'); return $text; } }