Squiz Matrix  4.12.2
 All Data Structures Namespaces Functions Variables Pages
matrix_diff_highlighter.inc
1 <?php
17 // Text/Diff.php is throwing php5 warnings
18 // instead of overwritting the pear package, we are making the error mute
19 @include_once 'Text/Diff/Renderer.php';
20 include_once 'Text/Diff/Renderer/inline.php';
21 
39 class Matrix_Diff_Highlighter extends Text_Diff_Renderer_Inline
40 {
41 
42 
53  function _markChanges($lines, $prefix, $suffix)
54  {
55  if (empty($lines)) return '';
56  $lines = $this->_splitOnWords(implode(' ', $lines));
57  if (count($lines) == 1) {
58  if (!$this->_isOpenTag($lines[0]) && !$this->_isCloseTag($lines[0])) {
59  return $prefix.$lines[0].$suffix;
60  } else {
61  return $lines[0];
62  }
63  } else if (count($lines) >= 2) {
64  $num_lines = 0;
65  foreach ($lines as $i => $line) {
66  if ($this->_isOpenTag($lines[$i]) || $this->_isCloseTag($lines[$i])){
67  $num_lines++;
68  }
69  }
70  if ($num_lines == count($lines)) return $this->_lines($lines, ' ', FALSE);
71  }
72 
73  if ($this->_isOpenBlockTag($lines[0]) && $this->_isCloseBlockTag($lines[count($lines)-1])) {
74  // simple case - we have one or more block elements
75  return $prefix.$this->_lines($lines, ' ', FALSE).$suffix;
76  }
77 
78  $table_opened = FALSE;
79  $list_opened = FALSE;
80  $need_traversal = FALSE;
81  foreach ($lines as $i => $line) {
82  if (preg_match('/<table([^>]*)>/i', $line)) {
83  $table_opened = TRUE;
84  }
85  if (preg_match('/<(ul|ol|dl)([^>]*)>/i', $line)) {
86  $list_opened = TRUE;
87  }
88  if (preg_match('/<li([^>]*)>/i', $line) && !$list_opened) {
89  // list item found
90  $need_traversal = TRUE;
91  break;
92  }
93  if (preg_match('/<td([^>]*)>/i', $line) && !$table_opened) {
94  // list item found
95  $need_traversal = TRUE;
96  break;
97  }
98  }
99 
100  if ($need_traversal) {
101 
102  $opened = FALSE;
103  foreach ($lines as $i => $line) {
104  if (preg_match('/<(td|th|li)([^>]*)>/i', $line)) {
105  //lets find the first closing tag (assuming valid html)
106  $end_index = $i + 1;
107  $pattern = '/<\/td>/i';
108  if (preg_match('/<th([^>]*)>/i', $line)) {
109  $pattern = '/<\/th>/i';
110  } else if (preg_match('/<li([^>]*)>/i', $line)) {
111  $pattern = '/<\/li>/i';
112  }
113  while ($end_index < count($lines)){
114  if (preg_match($pattern, $lines[$end_index])) {
115  $found = 1;
116  break;
117  }
118  $end_index++;
119  }
120  //if we find the closing (td,th,li) tag lets make sure tags such as <p> are nested
121  //inside the <ins> and <del> tags which is what is causing tables to look distorted as per Bug #4384
122  if ($found){
123  $begin_index = $i + 1;
124  while ($this->_isOpenTag($lines[$begin_index]) && ($begin_index < $end_index)) {
125  $begin_index++;
126  }
127  $end_index--;
128  while ($this->_isCloseTag($lines[$end_index]) && ($end_index > 0)) {
129  $end_index--;
130  }
131 
132  $lines[$begin_index] = $prefix.$lines[$begin_index];
133  $lines[$end_index] .= $suffix;
134  $found = 0;
135  } else {
136  $lines[$i] .= $prefix;
137  }
138 
139  $opened = TRUE; //tracking if the last tag is an open tag
140  if (!isset($first)){ //tracking whether first tag is open or close
141  $first = "open";
142  }
143  } else if (preg_match('/<\/(td|th|li)([^>]*)>/i', $line)) {
144  $opened = FALSE;
145  if (!isset($first)){
146  $first = "closed";
147  //takes care of situations where the FIRST tag in the lines is an open and close tag
148  //in this case we want <ins> or <del> tags AFTER.
149  if ($this->_isOpenTag($lines[0]) || $this->_isCloseTag($lines[0])){
150  $lines[0] = $lines[0].$prefix;
151  } else {
152  $lines[0] = $prefix.$lines[0];
153  }
154  $end_index = $i - 1;
155  while ($this->_isCloseTag($lines[$end_index]) && ($end_index > 0)) {
156  $end_index--;
157  }
158  $lines[$end_index] .= $suffix;
159  }
160  }
161  }
162 
163  if ($opened) {
164  //takes care of situations where the LAST tag in the lines is an open and close tag
165  //in this case we don't want any <ins> or <del> tags.
166  if ($this->_isOpenTag($lines[count($lines)-1]) || $this->_isCloseTag($lines[count($lines)-1])){
167  $lines[count($lines)-2] = str_replace($prefix, '',$lines[count($lines)-2]);
168  } else {
169  $lines[count($lines)-1] .= $suffix;
170  }
171  }
172  return $this->_lines($lines, ' ', FALSE);
173 
174  } else {
175  // First get rid of any invalid non matching block tags (bug #5932)
176  $this->_removeInvalidBlockTags($lines);
177 
178  // simple case, just make sure we put the mod tags in the right place
179  $begin_index = 0;
180  while ($this->_isCloseTag($lines[$begin_index]) && ($begin_index < count($lines))) {
181  $begin_index++;
182  }
183  $end_index = count($lines) - 1;
184  while ($this->_isOpenTag($lines[$end_index]) && ($end_index > 0)) {
185  $end_index--;
186  }
187 
188  $lines[$begin_index] = $prefix.$lines[$begin_index];
189  $lines[$end_index] .= $suffix;
190 
191  return $this->_lines($lines, ' ', FALSE);
192  }
193 
194  }//end _markChanges()
195 
196 
205  private function _removeInvalidBlockTags(&$lines)
206  {
207  $opened_tags = Array();
208  foreach($lines as $index => $tag) {
209  if ($this->_isOpenTag($tag)) {
210  $opened_tags[] = Array('line' => $index, 'tag' => trim(preg_replace('|<([^\s]+).*?>|', '$1', $tag)));
211  } else if ($this->_isCloseTag($tag)) {
212  if (!empty($opened_tags)) {
213  $previous_tag = $opened_tags[count($opened_tags)-1]['tag'];
214  $current_tag = trim(preg_replace('|</([^>]+)>|', '$1', $tag));
215  if ($previous_tag != $current_tag) {
216  // It does not has a matching opened tag, get rid of it
217  $lines[$index] = '';
218  } else {
219  array_pop($opened_tags);
220  }
221  } else {
222  $lines[$index] = '';
223  }
224  }//end else if
225  }//end foreach
226 
227  // Also get rid of remaing non-closed opened tags
228  foreach($opened_tags as $opened_tag) {
229  $lines[$opened_tag['line']] = '';
230  }//end foreach
231 
232  }//end _removeInvalidBlockTags()
233 
234 
243  function _added($lines)
244  {
245  return $this->_markChanges($lines, $this->_ins_prefix, $this->_ins_suffix);
246 
247  }//end _added()
248 
249 
258  function _deleted($lines)
259  {
260  return $this->_markChanges($lines, $this->_del_prefix, $this->_del_suffix);
261 
262  }//end _deleted()
263 
264 
274  function _changed($orig, $final)
275  {
276  /* If we've already split on words, don't try to do so again - just
277  * display. */
278  if ($this->_split_level == 'words') {
279  $prefix = '';
280  while ($orig[0] !== FALSE && $final[0] !== FALSE &&
281  substr($orig[0], 0, 1) == ' ' &&
282  substr($final[0], 0, 1) == ' ') {
283  $prefix .= substr($orig[0], 0, 1);
284  $orig[0] = substr($orig[0], 1);
285  $final[0] = substr($final[0], 1);
286  }
287  return $prefix.$this->_deleted($orig).$this->_added($final);
288  }
289 
290  $text1 = implode("\n", $orig);
291  $text2 = implode("\n", $final);
292 
293  /* Non-printing newline marker. */
294  $nl = "\0";
295 
296  $text1_words = $this->_splitOnWords($text1);
297  $text2_words = $this->_splitOnWords($text2);
298 
299  /* We want to split on word boundaries, but we need to
300  * preserve whitespace as well. Therefore we split on words,
301  * but include all blocks of whitespace in the wordlist. */
302  $diff = new Text_Diff($text1_words,
303  $text2_words);
304 
305  /* Get the diff in inline format. */
306  $renderer = new Matrix_Diff_Highlighter(array_merge($this->getParams(),
307  Array('split_level' => 'words')));
308 
309  /* Run the diff and get the output. */
310  return str_replace($nl, "\n", $renderer->render($diff))."\n";
311 
312  }//end _changed()
313 
314 
323  function _isCloseTag($x)
324  {
325  return preg_match('/<\/[^>]+>/i', $x);
326 
327  }//end _isCloseTag()
328 
329 
338  function _isOpenTag($x)
339  {
340  if (preg_match('/<(br|hr|img)( ([^>]*))?>/i', $x)) {
341  return FALSE; // no empty tags thanks
342  }
343 
344  if (preg_match('/<a[^>]+href=[^>]+>/i', $x)) {
345  return true; // anchor tags are open tags
346  }
347 
348  return preg_match('/<[^\/>]+>/i', $x);
349 
350  }//end _isOpenTag()
351 
352 
361  function _isOpenBlockTag($x)
362  {
363  return preg_match('/<(address|blockcode|blockquote|div|h|h1|h2|h3|h4|h5|h6|hr|p|pre|section)>/i', $x);
364 
365  }//end _isOpenBlockTag()
366 
367 
376  function _isCloseBlockTag($x)
377  {
378  return preg_match('/<\/(address|blockcode|blockquote|div|h|h1|h2|h3|h4|h5|h6|hr|p|pre|section)>/i', $x);
379 
380  }//end _isCloseBlockTag()
381 
382 
393  function _splitOnWords($string)
394  {
395  // Ignore \0; otherwise the while loop will never finish.
396  $string = str_replace("\0", '', $string);
397  $string = str_replace('>', '> ', $string);
398  $string = str_replace('<', ' <', $string);
399 
400  $bits = preg_split('/[\s]+/', $string);
401  $i = 0;
402  $total = count($bits); // need to copy this now coz it will change
403  while ($i < $total) {
404  if (isset($bits[$i])) {
405  if ($bits[$i] === '') {
406  unset($bits[$i]);
407  } else if ($bits[$i]{0} == '<') {
408  // looks like the start of a tag
409  if (strpos($bits[$i], '>') === FALSE) {
410  // the tag doesn't end here so look for its endings
411  $j = $i+1;
412  do {
413  $bits[$i] .= ' '.$bits[$j];
414  unset($bits[$j]);
415  $j++;
416  } while (strpos($bits[$i], '>') === FALSE);
417  }
418  }
419  }
420  $i++;
421  }
422 
423  foreach ($bits as $i => $v) {
424  $bits[$i] = $v.' ';
425  }
426 
427  return array_values($bits);
428 
429  }//end _splitOnWords()
430 
431 
440  function render($diff)
441  {
442  $out = '
443  <style type="text/css">
444  ins, ins *, ins * * {
445  background: #aaffaa;
446  text-decoration: none;
447  }
448  del, del *, del * * {
449  background: #ffaaaa;
450  text-decoration: line-through;
451  }
452  </style>
453  ';
454 
455  // Text/Diff.php is throwing php5 warnings
456  // instead of overwritting the pear package, we are making the error mute
457  $out .= @parent::render($diff);
458 
459  // make sure we have some whitespace around ins/del tags,
460  // but none immediately inside
461  $out = preg_replace('/<ins>(\s*)/', ' <ins>', $out);
462  $out = preg_replace('/<del>(\s*)/', ' <del>', $out);
463  $out = preg_replace('/(\s*)<\/ins>/', '</ins> ', $out);
464  $out = preg_replace('/(\s*)<\/del>/', '</del> ', $out);
465 
466  // clean out empty ins/del elements
467  $out = preg_replace('/<del>(\s*)<\/del>/', '', $out);
468  $out = preg_replace('/<ins>(\s*)<\/ins>/', '', $out);
469 
470  return html_entity_decode($out, ENT_COMPAT, SQ_CONF_DEFAULT_CHARACTER_SET);
471 
472  }//end render()
473 
474 
484  function process($first, $second)
485  {
486  if ($first == $second) return $second;
487 
488  // Make sure tags are separated from words
489  $first = trim(preg_replace('/<([^>]+)>/', ' <$1> ', $first), ' ');
490  $second = trim(preg_replace('/<([^>]+)>/', ' <$1> ', $second), ' ');
491 
492  // Collapse inline whitespace
493  $first = preg_replace('/([ \t]+)/', ' ', $first);
494  $second = preg_replace('/([ \t]+)/', ' ', $second);
495 
496  /* Create the Diff object. */
497  // Text/Diff.php is throwing php5 warnings
498  // instead of overwritting the pear package we are making the error mute
499  @include_once 'Text/Diff.php';
500  @$diff = new Text_Diff(explode("\n", $first), explode("\n", $second));
501 
502  return $this->render($diff);
503 
504  }//end process()
505 
506 
507 }//end class
508 ?>