<?php declare(strict_types=1); namespace Jfcherng\Diff\Renderer\Html; use Jfcherng\Diff\Factory\LineRendererFactory; use Jfcherng\Diff\Renderer\RendererConstant; use Jfcherng\Diff\SequenceMatcher; use Jfcherng\Diff\Utility\ReverseIterator; use Jfcherng\Utility\MbString; /** * Combined HTML diff generator. * * Note that this renderer always has no line number. */ final class Combined extends AbstractHtml { /** * {@inheritdoc} */ public const INFO = [ 'desc' => 'Combined', 'type' => 'Html', ]; /** * {@inheritdoc} */ public const AUTO_FORMAT_CHANGES = false; /** * {@inheritdoc} */ protected function redererChanges(array $changes): string { if (empty($changes)) { return $this->getResultForIdenticals(); } $wrapperClasses = \array_merge( $this->options['wrapperClasses'], ['diff', 'diff-html', 'diff-combined'] ); return '<table class="' . \implode(' ', $wrapperClasses) . '">' . $this->renderTableHeader() . $this->renderTableHunks($changes) . '</table>'; } /** * Renderer the table header. */ protected function renderTableHeader(): string { if (!$this->options['showHeader']) { return ''; } return '<thead>' . '<tr>' . '<th>' . $this->_('differences') . '</th>' . '</tr>' . '</thead>'; } /** * Renderer the table separate block. */ protected function renderTableSeparateBlock(): string { return '<tbody class="skipped">' . '<tr>' . '<td></td>' . '</tr>' . '</tbody>'; } /** * Renderer table hunks. * * @param array[][] $hunks each hunk has many blocks */ protected function renderTableHunks(array $hunks): string { $ret = ''; foreach ($hunks as $i => $hunk) { if ($i > 0 && $this->options['separateBlock']) { $ret .= $this->renderTableSeparateBlock(); } foreach ($hunk as $block) { $ret .= $this->renderTableBlock($block); } } return $ret; } /** * Renderer the table block. * * @param array $block the block */ protected function renderTableBlock(array $block): string { switch ($block['tag']) { case SequenceMatcher::OP_EQ: $content = $this->renderTableBlockEqual($block); break; case SequenceMatcher::OP_INS: $content = $this->renderTableBlockInsert($block); break; case SequenceMatcher::OP_DEL: $content = $this->renderTableBlockDelete($block); break; case SequenceMatcher::OP_REP: $content = $this->renderTableBlockReplace($block); break; default: $content = ''; } return '<tbody class="change change-' . self::TAG_CLASS_MAP[$block['tag']] . '">' . $content . '</tbody>'; } /** * Renderer the table block: equal. * * @param array $block the block */ protected function renderTableBlockEqual(array $block): string { $block['new']['lines'] = $this->customFormatLines( $block['new']['lines'], SequenceMatcher::OP_EQ ); $ret = ''; // note that although we are in a OP_EQ situation, // the old and the new may not be exactly the same // because of ignoreCase, ignoreWhitespace, etc foreach ($block['new']['lines'] as $newLine) { // we could only pick either the old or the new to show // here we pick the new one to let the user know what it is now $ret .= $this->renderTableRow('new', SequenceMatcher::OP_EQ, $newLine); } return $ret; } /** * Renderer the table block: insert. * * @param array $block the block */ protected function renderTableBlockInsert(array $block): string { $block['new']['lines'] = $this->customFormatLines( $block['new']['lines'], SequenceMatcher::OP_INS ); $ret = ''; foreach ($block['new']['lines'] as $newLine) { $ret .= $this->renderTableRow('new', SequenceMatcher::OP_INS, $newLine); } return $ret; } /** * Renderer the table block: delete. * * @param array $block the block */ protected function renderTableBlockDelete(array $block): string { $block['old']['lines'] = $this->customFormatLines( $block['old']['lines'], SequenceMatcher::OP_DEL ); $ret = ''; foreach ($block['old']['lines'] as $oldLine) { $ret .= $this->renderTableRow('old', SequenceMatcher::OP_DEL, $oldLine); } return $ret; } /** * Renderer the table block: replace. * * @param array $block the block */ protected function renderTableBlockReplace(array $block): string { if ($this->options['detailLevel'] === 'none') { return $this->renderTableBlockDelete($block) . $this->renderTableBlockInsert($block); } $ret = ''; $oldLines = $block['old']['lines']; $newLines = $block['new']['lines']; $oldLinesCount = \count($oldLines); $newLinesCount = \count($newLines); // if the line counts changes, we treat the old and the new as // "a line with \n in it" and then do one-line-to-one-line diff if ($oldLinesCount !== $newLinesCount) { [$oldLines, $newLines] = $this->markReplaceBlockDiff($oldLines, $newLines); $oldLinesCount = $newLinesCount = 1; } $oldLines = $this->customFormatLines($oldLines, SequenceMatcher::OP_DEL); $newLines = $this->customFormatLines($newLines, SequenceMatcher::OP_INS); // now $oldLines must has the same line counts with $newlines for ($no = 0; $no < $newLinesCount; ++$no) { $mergedLine = $this->mergeReplaceLines($oldLines[$no], $newLines[$no]); // not merge-able, we fall back to separated form if (!isset($mergedLine)) { $ret .= $this->renderTableBlockDelete($block) . $this->renderTableBlockInsert($block); break; } $ret .= $this->renderTableRow('rep', SequenceMatcher::OP_REP, $mergedLine); } return $ret; } /** * Renderer a content row of the output table. * * @param string $tdClass the <td> class * @param int $op the operation * @param string $line the line */ protected function renderTableRow(string $tdClass, int $op, string $line): string { return '<tr data-type="' . self::SYMBOL_MAP[$op] . '">' . '<td class="' . $tdClass . '">' . $line . '</td>' . '</tr>'; } /** * Merge two "replace"-type lines into a single line. * * The implementation concept is that if we remove all closure parts from * the old and the new, the rest of them (cleaned line) should be the same. * And then, we add back those removed closure parts in a correct order. * * @param string $oldLine the old line * @param string $newLine the new line * * @return null|string string if merge-able, null otherwise */ protected function mergeReplaceLines(string $oldLine, string $newLine): ?string { $delParts = $this->analyzeClosureParts( $oldLine, RendererConstant::HTML_CLOSURES_DEL, SequenceMatcher::OP_DEL ); $insParts = $this->analyzeClosureParts( $newLine, RendererConstant::HTML_CLOSURES_INS, SequenceMatcher::OP_INS ); // get the cleaned line by a non-regex way (should be faster) // i.e., the new line with all "<ins>...</ins>" parts removed $mergedLine = $newLine; foreach (ReverseIterator::fromArray($insParts) as $part) { $mergedLine = \substr_replace( $mergedLine, '', // deletion $part['offset'], \strlen($part['content']) ); } // note that $mergedLine is actually a clean line at this point if (!$this->isLinesMergeable($oldLine, $newLine, $mergedLine)) { return null; } // before building the $mergedParts, we do some adjustments $this->revisePartsForBoundaryNewlines($delParts, RendererConstant::HTML_CLOSURES_DEL); $this->revisePartsForBoundaryNewlines($insParts, RendererConstant::HTML_CLOSURES_INS); // create a sorted merged parts array $mergedParts = \array_merge($delParts, $insParts); \usort($mergedParts, function (array $a, array $b): int { // first sort by "offsetClean", "order" then by "type" return $a['offsetClean'] <=> $b['offsetClean'] ?: $a['order'] <=> $b['order'] ?: ($a['type'] === SequenceMatcher::OP_DEL ? -1 : 1); }); // insert merged parts into the cleaned line foreach (ReverseIterator::fromArray($mergedParts) as $part) { $mergedLine = \substr_replace( $mergedLine, $part['content'], $part['offsetClean'], 0 // insertion ); } return \str_replace("\n", '<br>', $mergedLine); } /** * Analyze and get the closure parts information of the line. * * Such as * extract informations for "<ins>part 1</ins>" and "<ins>part 2</ins>" * from "Hello <ins>part 1</ins>SOME OTHER TEXT<ins>part 2</ins> World" * * @param string $line the line * @param string[] $closures the closures * @param int $type the type * * @return array[] the closure informations */ protected function analyzeClosureParts(string $line, array $closures, int $type): array { [$ld, $rd] = $closures; $ldLength = \strlen($ld); $rdLength = \strlen($rd); $parts = []; $partStart = $partEnd = 0; $partLengthSum = 0; // find the next left delimiter while (false !== ($partStart = \strpos($line, $ld, $partEnd))) { // find the corresponding right delimiter if (false === ($partEnd = \strpos($line, $rd, $partStart + $ldLength))) { break; } $partEnd += $rdLength; $partLength = $partEnd - $partStart; $parts[] = [ 'type' => $type, // the sorting order used when both "offsetClean" are the same 'order' => 0, // the offset in the line 'offset' => $partStart, // the offset in the cleaned line (i.e., the line with closure parts removed) 'offsetClean' => $partStart - $partLengthSum, // the content of the part 'content' => \substr($line, $partStart, $partLength), ]; $partLengthSum += $partLength; } return $parts; } /** * Mark differences between two "replace" blocks. * * Each of the returned block (lines) is always only one line. * * @param string[] $oldBlock The old block * @param string[] $newBlock The new block * * @return string[][] the value of [[$oldLine], [$newLine]] */ protected function markReplaceBlockDiff(array $oldBlock, array $newBlock): array { static $mbOld, $mbNew, $lineRenderer; $mbOld = $mbOld ?? new MbString(); $mbNew = $mbNew ?? new MbString(); $lineRenderer = $lineRenderer ?? LineRendererFactory::make( $this->options['detailLevel'], [], /** @todo is it possible to get the differOptions here? */ $this->options ); $mbOld->set(\implode("\n", $oldBlock)); $mbNew->set(\implode("\n", $newBlock)); $lineRenderer->render($mbOld, $mbNew); return [ [$mbOld->get()], // one-line block for the old [$mbNew->get()], // one-line block for the new ]; } /** * Determine whether the "replace"-type lines are merge-able or not. * * @param string $oldLine the old line * @param string $newLine the new line * @param string $cleanLine the clean line */ protected function isLinesMergeable(string $oldLine, string $newLine, string $cleanLine): bool { $oldLine = \str_replace(RendererConstant::HTML_CLOSURES_DEL, '', $oldLine); $newLine = \str_replace(RendererConstant::HTML_CLOSURES_INS, '', $newLine); $sumLength = \strlen($oldLine) + \strlen($newLine); /** @var float the changed ratio, 0 <= value < 1 */ $changedRatio = ($sumLength - (\strlen($cleanLine) << 1)) / ($sumLength + 1); return $changedRatio <= $this->options['mergeThreshold']; } /** * Extract boundary newlines from parts into new parts. * * @param array[] $parts the parts * @param string[] $closures the closures * * @see https://git.io/JvVXH */ protected function revisePartsForBoundaryNewlines(array &$parts, array $closures): void { [$ld, $rd] = $closures; $ldRegex = \preg_quote($ld, '/'); $rdRegex = \preg_quote($rd, '/'); for ($i = \count($parts) - 1; $i >= 0; --$i) { $part = &$parts[$i]; // deal with leading newlines $part['content'] = \preg_replace_callback( "/(?P<closure>{$ldRegex})(?P<nl>[\r\n]++)/u", function (array $matches) use (&$parts, $part, $ld, $rd): string { // add a new part for the extracted newlines $part['order'] = -1; $part['content'] = "{$ld}{$matches['nl']}{$rd}"; $parts[] = $part; return $matches['closure']; }, $part['content'] ); // deal with trailing newlines $part['content'] = \preg_replace_callback( "/(?P<nl>[\r\n]++)(?P<closure>{$rdRegex})/u", function (array $matches) use (&$parts, $part, $ld, $rd): string { // add a new part for the extracted newlines $part['order'] = 1; $part['content'] = "{$ld}{$matches['nl']}{$rd}"; $parts[] = $part; return $matches['closure']; }, $part['content'] ); } } /** * Make lines suitable for HTML output. * * @param string[] $lines the lines * @param int $op the operation */ protected function customFormatLines(array $lines, int $op): array { if (!$this->changesAreRaw) { return $lines; } static $closureMap = [ SequenceMatcher::OP_DEL => RendererConstant::HTML_CLOSURES_DEL, SequenceMatcher::OP_INS => RendererConstant::HTML_CLOSURES_INS, ]; $lines = $this->formatLines($lines); $htmlClosures = $closureMap[$op] ?? null; foreach ($lines as &$line) { if ($htmlClosures) { $line = \str_replace(RendererConstant::HTML_CLOSURES, $htmlClosures, $line); } } return $lines; } }