Squiz Matrix  4.12.2
 All Data Structures Namespaces Functions Variables Pages
system_integrity_fix_char_encoding.php
1 <?php
28 error_reporting(E_ALL);
29 if ((php_sapi_name() != 'cli')) {
30  trigger_error("You can only run this script from the command line\n", E_USER_ERROR);
31 }
32 
33 $SYSTEM_ROOT = getCLIArg('system');
34 if (!$SYSTEM_ROOT) {
35  echo "ERROR: You need to supply the path to the System Root\n";
36  print_usage();
37  exit(1);
38 }
39 
40 if (!is_dir($SYSTEM_ROOT) || !is_readable($SYSTEM_ROOT.'/core/include/init.inc')) {
41  echo "ERROR: Path provided doesn't point to a Matrix installation's System Root. Please provide correct path and try again.\n";
42  print_usage();
43  exit(1);
44 }
45 
46 if (ini_get('memory_limit') != '-1') ini_set('memory_limit', '-1');
47 
48 $SYS_OLD_ENCODING = getCLIArg('old');
49 if (!$SYS_OLD_ENCODING || !isValidCharset($SYS_OLD_ENCODING)) {
50  echo "\nERROR: The charset you specified '$SYS_OLD_ENCODING', as system's old encoding is not valid charset type.\n\n";
51  print_usage();
52  exit(1);
53 }
54 define('SYS_OLD_ENCODING',$SYS_OLD_ENCODING);
55 
56 $SYS_NEW_ENCODING = getCLIArg('new');
57 if (!isValidCharset($SYS_NEW_ENCODING)) {
58  echo "\nERROR: The charset you specified '".$SYS_NEW_ENCODING."', as system's new encoding is not valid charset type.\n\n";
59  print_usage();
60  exit(1);
61 }
62 
63 if (!empty($SYS_NEW_ENCODING)) {
64  define('SYS_NEW_ENCODING', $SYS_NEW_ENCODING);
65 } else {
66  $config_file = file_get_contents($SYSTEM_ROOT.'/data/private/conf/main.inc');
67  preg_match("|SQ_CONF_DEFAULT_CHARACTER_SET',\s*'(.*?)'\);|", $config_file, $match);
68  if (empty($match[1])) {
69  echo "\nERROR: The default charset is not specified in the main.inc. Pleas specify the new charset to convert the system to.\n\n";
70  print_usage();
71  exit(1);
72  }
73  define('SYS_NEW_ENCODING', $match[1]);
74 }
75 
76 $root_node_id = getCLIArg('rootnode');
77 $root_node_id = ($root_node_id) ? $root_node_id : 1;
78 
79 $reportOnly = getCLIArg('report');
80 
81 // Make sure iconv is available.
82 if (function_exists('iconv') == FALSE) {
83  echo "This script requires the php iconv module which isn't available.\n";
84  echo "Install that module and try again.\n";
85  exit(1);
86 }
87 
88 // Tables where the values are to fixed
89 //
90 // Array(
91 // <db table> => Array(
92 // "assetid" => <fieldname containing the assetid>,
93 // "contextid" => <fieldname containing the record's contextid>,
94 // "value" => <fieldname containing the record's value>,
95 // "key" => [<third field as part of the record's primary key>],
96 // ),
97 // )
98 $tables = Array(
99  'sq_ast_attr_val' => Array(
100  'assetid' => 'assetid',
101  'contextid' => 'contextid',
102  'value' => 'custom_val',
103  'key' => 'attrid',
104  ),
105  'sq_ast_mdata_val' => Array(
106  'assetid' => 'assetid',
107  'contextid' => 'contextid',
108  'value' => 'value',
109  'key' => 'fieldid',
110  ),
111  'sq_ast_mdata_dflt_val' => Array(
112  'assetid' => 'assetid',
113  'contextid' => 'contextid',
114  'value' => 'default_val',
115  'key' => '',
116  ),
117  'sq_ast_attr_uniq_val' => Array(
118  'assetid' => 'assetid',
119  'contextid' => 'contextid',
120  'value' => 'custom_val',
121  'key' => 'owning_attrid',
122  ),
123 );
124 
125 if (SYS_OLD_ENCODING == SYS_NEW_ENCODING) {
126  echo "\nERROR: The old encoding ('" . SYS_OLD_ENCODING . "') is the same as the current/new character set.\n\n";
127  print_usage();
128  exit(1);
129 }
130 
131 if ($root_node_id == 1) {
132  echo "\nWARNING: You are running this script on the whole system.\nThis is fine, but it may take a long time\n";
133 }
134 
135 define('SCRIPT_LOG_FILE', $SYSTEM_ROOT.'/data/private/logs/'.basename(__FILE__).'.log');
136 
137 if (!$reportOnly) {
138  echo "\nIMPORTANT: This script will replace all the smart quote chars by their regular counterpart chars. And if value string is still\n";
139  echo "invalid in the current system's charset then it performs charset conversion on string from older to current encoding\n";
140  echo "YOU MUST BACKUP YOUR SYSTEM BEFORE RUNNING THIS SCRIPT\n";
141  echo "Are you sure you want to proceed (Y/N)? \n";
142 
143  $yes_no = rtrim(fgets(STDIN, 4094));
144  if (strtolower($yes_no) != 'y') {
145  echo "\nScript aborted. \n";
146  exit;
147  }
148 }
149 
150 // File to communicate between the child and parent process
151 define('SYNC_FILE', $SYSTEM_ROOT.'/data/temp/system_integrity_fix_char_encoding.data');
152 // Batch size when processing the asset contnet file regeneration
153 define('BATCH_SIZE', '100');
154 
155  // No turning back now. Start char fixing.
156 $start_time = microtime(TRUE);
157 
158 $pid = fork();
159 if (!$pid) {
160 
161  // NOTE: This seemingly ridiculousness allows us to workaround Oracle, forking and CLOBs
162  // if a query is executed that returns more than 1 LOB before a fork occurs,
163  // the Oracle DB connection will be lost inside the fork
164  require_once $SYSTEM_ROOT.'/core/include/init.inc';
165 
166  $summary = fix_db($root_node_id, $tables);
167 
168  // Get the list of assetids for which we need to regenerate the filesystem content
169  // to reflect the changes made in the db
170  $affected_assetids = get_affected_assetids($summary['affected_assetids']);
171 
172  // Also get the context ids
173  $contextids = array_keys($GLOBALS['SQ_SYSTEM']->getAllContexts());
174 
175  file_put_contents(SYNC_FILE, serialize(Array('affected_assetids' => $affected_assetids, 'db_summary' => $summary, 'contextids' => $contextids)));
176 
177  exit();
178 
179 }//end child process
180 
181 if (!is_file(SYNC_FILE)) {
182  echo "Expected sync file containing the affected assetids not found. Only database was updated\n";
183  exit(1);
184 }
185 
186 $summary = unserialize(file_get_contents(SYNC_FILE));
187 
188 // Fix the filesystem content to reflect the changes made in the db
189 if ($reportOnly == FALSE) {
190  regenerate_filesystem_content($summary['affected_assetids'], $summary['contextids']);
191 
192  echo "Number of db records replaced successfully: ".$summary['db_summary']['records_fixed_count']."\n";
193  echo "Total errors recorded: ".$summary['db_summary']['error_count']."\n";
194 } else {
195  echo "Number of db records that need replacing: ".$summary['db_summary']['records_fixed_count']."\n";
196 }
197 
198 echo "Total time taken to run the script: ".round(microtime(TRUE)-$start_time, 2)." second(s)\n";
199 
200 if ($summary['db_summary']['error_count'] > 0) {
201  echo "\nPlease check ".SCRIPT_LOG_FILE." file for errors\n\n";
202 }
203 echo "\n";
204 
205 exit();
206 
207 // End of Main program /////////////////////////////////
208 
209 
218 function fix_db($root_node, $tables)
219 {
220  global $reportOnly;
221 
222  $target_assetids = array_keys($GLOBALS['SQ_SYSTEM']->am->getChildren($root_node));
223  array_unshift($target_assetids, $root_node);
224 
225  if (empty($target_assetids)) {
226  echo "\n\nAsset #${root_node} not found or no assets found underneath\n";
227  return;
228  }
229  echo "\n\nNumber of assets to look into : ".count($target_assetids)." \n";
230 
231  $errors = Array();
232  $records_fixed_count = 0;
233  $invalid_asset_records = Array();
234 
235  // Assets that will require filesystem content regeneration
236  $affected_assetids = Array();
237 
238  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('db2');
239 
240  $chunks = array_chunk($target_assetids, 50);
241 
242  // Go through 50 assets at a time.
243  $count = 0;
244  foreach ($chunks as $assetids) {
245  foreach($tables as $table => $fields) {
246  $sql = 'SELECT '.trim(implode(',',$fields),',').' FROM '.$table;
247  $sql .= ' WHERE assetid IN (\''.implode('\',\'', $assetids).'\')';
248 
249  $results = MatrixDAL::executeSqlAssoc($sql);
250 
251  foreach($results as $record) {
252  $count++;
253  if ($count % 100 == 0) {
254  echo '.';
255  }
256 
257  $value = isset($record[$fields['value']]) ? $record[$fields['value']] : NULL;
258  $assetid = isset($record[$fields['assetid']]) ? $record[$fields['assetid']] : NULL;
259  $key = isset($record[$fields['key']]) ? $record[$fields['key']] : NULL;
260  $contextid = isset($record[$fields['contextid']]) ? $record[$fields['contextid']] : NULL;
261 
262  if (is_null($value) || is_null($assetid) || is_null($contextid) || ($fields['key'] && is_null($key))) {
263  continue;
264  }
265 
266  if (empty($value)) {
267  continue;
268  }
269 
270  // If it's the same in the new and old encodings, that's good.
271  $checked = @iconv(SYS_OLD_ENCODING, SYS_NEW_ENCODING.'//IGNORE', $value);
272 
273  if ($value === $checked) {
274  continue;
275  }
276 
277  $update_required = FALSE;
278  $invalid_asset_records[] = array(
279  'asset' => $assetid,
280  'table' => $table,
281  'value' => $value,
282  );
283 
284  // Check if the value is now valid
285  if (!isValidValue($value)) {
286  // String might also contains the char(s) from older encoding which is/are not valid for current one
287  // See if we can convert these without igonoring or interprating any chars
288  $converted_value = @iconv(SYS_OLD_ENCODING, SYS_NEW_ENCODING.'//IGNORE', $value);
289 
290  // If the converted value is valid in current encoding then its good to go
291  // otherwise we'll just not use this value
292  if ($converted_value != $value && isValidValue($converted_value)) {
293  $value = $converted_value;
294  $update_required = TRUE;
295  }
296  }
297 
298  if ($update_required) {
299  if (!$reportOnly) {
300  $GLOBALS['SQ_SYSTEM']->doTransaction('BEGIN');
301  try {
302  $sql = "UPDATE
303  $table
304  SET
305  ".$fields['value']."=:value
306  WHERE
307  ".$fields['assetid']."=:assetid".
308  " AND ".$fields['contextid']."=:contextid".
309  (!is_null($key) ? " AND ".$fields['key']."=:key" : "");
310 
311 
312  $update_sql = MatrixDAL::preparePdoQuery($sql);
313 
314  MatrixDAL::bindValueToPdo($update_sql, 'value', $value);
315  MatrixDAL::bindValueToPdo($update_sql, 'assetid', $assetid);
316  MatrixDAL::bindValueToPdo($update_sql, 'contextid', $contextid);
317  if (!is_null($key)) MatrixDAL::bindValueToPdo($update_sql, 'key', $key);
318 
319  $execute = MatrixDAL::executePdoAssoc($update_sql);
320  if (count($execute) > 1) {
321  $sql = str_replace(':assetid', $assetid, $sql);
322  $sql = str_replace(':contextid', $contextid, $sql);
323  $sql = str_replace(':contextid', $contextid, $sql);
324  $sql = !is_null($key) ? str_replace(':key', $key, $sql) : $sql;
325 
326  $errors[] = array(
327  'asset' => $assetid,
328  'table' => $table,
329  );
330 
331  $msg = "Executing query \"$sql\" will affect ".count($execute)." (more than 1) records! Ignoring this sql.";
332  log_error_msg($msg);
333 
334  $GLOBALS['SQ_SYSTEM']->doTransaction('ROLLBACK');
335 
336  continue;
337  }
338 
339  $GLOBALS['SQ_SYSTEM']->doTransaction('COMMIT');
340  $records_fixed_count++;
341  $affected_assetids[$table][] = $assetid;
342 
343  } catch (Exception $e) {
344  $errors[] = array(
345  'asset' => $assetid,
346  'table' => $table,
347  );
348  $msg = "Unexpected error occured while updating database: ".$e->getMessage();
349  log_error_msg($msg);
350 
351  $GLOBALS['SQ_SYSTEM']->doTransaction('ROLLBACK');
352  }
353  } else {
354  $records_fixed_count++;
355  // For reporting purpose only
356  $affected_assetids[$table][] = $assetid;
357  }
358  } else {
359  // This record contained invalid value. Either the invalid char(s) in it was/were not in the replacement array
360  // or trying to carryout charset conversion (without losing any data) still resulted into invalid value
361  // Hence replacement was not carried out.
362  $errors[] = array(
363  'asset' => $assetid,
364  'table' => $table,
365  'value' => $value,
366  );
367 
368  $msg = "Asset with ".$fields['assetid']."=#$assetid, ".
369  (!is_null($key) ? $fields['key']."=#$key, and " : "and ").
370  $fields['contextid']."=#$contextid in table $table ".
371  "contains invalid char(s), which were not replaced because ".
372  "either those invalid chars were not defined in the replacement array or the charset conversion was not successful".
373  "\nPotentially invalid characters include: ".listProblematicCharacters($value);
374  log_error_msg($msg);
375  }
376 
377  }//end foreach
378  }//end foreach
379  }
380 
381  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
382 
383  unset($target_assetids);
384 
385  echo "\n";
386 
387  $invalid_count = sizeof(array_keys($invalid_asset_records));
388  echo "Number of db records with invalid char(s): ".$invalid_count."\n";
389  if ($invalid_count > 0) {
390  foreach ($invalid_asset_records as $k => $details) {
391  echo "\tAsset: ".$details['asset']." in table ".$details['table'];
392  echo "\tPossibly problematic characters: ".listProblematicCharacters($details['value'])."\n";
393  }
394  echo "\n";
395  }
396 
397  return Array(
398  'error_count' => sizeof(array_keys($errors)),
399  'records_fixed_count' => $records_fixed_count,
400  'affected_assetids' => $affected_assetids,
401  );
402 
403 }//end fix_db()
404 
405 
415 function get_affected_assetids($data)
416 {
417  // List of relevant assetids to regenerate the filesystem content
418  $affected_assetids = Array(
419  'bodycopy_content_file' => Array(),
420  'metadata_file' => Array(),
421  'design_file' => Array(),
422  );
423 
424  echo "Getting the list of assetids that needs content regeneration ...";
425  foreach($data as $table_type => $assetids) {
426  switch($table_type) {
427  case 'sq_ast_mdata_val':
428  $affected_assetids['metadata_file'] = array_merge($affected_assetids['metadata_file'], $assetids);
429  echo ".";
430  break;
431 
432  case 'sq_ast_mdata_dflt_val':
433  $mm = $GLOBALS['SQ_SYSTEM']->getMetadataManager();
434  foreach($assetids as $mfield_assetid) {
435  // Get all the asset that has this schema applied
436  $schemaid = array_keys($GLOBALS['SQ_SYSTEM']->am->getParents($mfield_assetid, 'metadata_schema'));
437  $affected_assetids['metadata_file'] = array_merge($affected_assetids['metadata_file'], $mm->getSchemaAssetids());
438  echo ".";
439  }//end foreach
440  break;
441 
442  case 'sq_ast_attr_val':
443  // Get list of Design assets that needs to be regenerated
444  $affected_assetids['design_file'] = array_keys($GLOBALS['SQ_SYSTEM']->am->getAssetInfo($assetids, Array('design','design_css'), TRUE));
445 
446  echo ".";
447  // and list of Bodycopy Container assets
448  $content_type_assetids = array_keys($GLOBALS['SQ_SYSTEM']->am->getAssetInfo($assetids, Array('content_type'), FALSE));
449  foreach($content_type_assetids as $assetid) {
450  $bodycopy_container_link = $GLOBALS['SQ_SYSTEM']->am->getLinks($assetid, SQ_LINK_TYPE_2, Array('bodycopy_container'), FALSE, 'minor');
451  if (isset($bodycopy_container_link[0]['majorid'])) {
452  // This bodycopy content file needs to be generated
453  $affected_assetids['bodycopy_content_file'][] = $bodycopy_container_link[0]['majorid'];
454  }
455  echo ".";
456  }//end foreach
457 
458  break;
459  }//end switch
460  }//end foreach
461 
462  // Remove the duplicates from the assetid list
463  $affected_assetids['metadata_file'] = array_unique($affected_assetids['metadata_file']);
464  $affected_assetids['bodycopy_content_file'] = array_unique($affected_assetids['bodycopy_content_file']);
465  $affected_assetids['design_file'] = array_unique($affected_assetids['design_file']);
466 
467  // Chunk the assets into the batches
468  $batched_assetids = Array();
469  foreach($affected_assetids as $type => $type_assetids) {
470  $start_index = 0;
471  $asset_count = count($type_assetids);
472  $batched_assetids[$type] = Array();
473  while($start_index < $asset_count) {
474  $batched_assetids[$type][] = array_slice($type_assetids, $start_index, BATCH_SIZE);
475  $start_index += BATCH_SIZE;
476  }//end while
477  }//end foreach
478 
479  unset($affected_assetids);
480  echo " done.\n";
481 
482  return $batched_assetids;
483 
484 }//end get_affected_assetids()
485 
486 
500 function regenerate_filesystem_content($assets_data, $contextids)
501 {
502  global $SYSTEM_ROOT;
503 
504  echo "\n";
505 
506  foreach($assets_data as $type => $assets_batch) {
507  if (empty($assets_batch)) {
508  continue;
509  }
510 
511  echo "Regenerating the ".str_replace('_', ' ', $type). " ...";
512  foreach($assets_batch as $assetids) {
513  $pid = fork();
514  if (!$pid) {
515 
516  // Do the stuff in the child process
517  require_once $SYSTEM_ROOT.'/core/include/init.inc';
518  $root_user = $GLOBALS['SQ_SYSTEM']->am->getSystemAsset('root_user');
519  $GLOBALS['SQ_SYSTEM']->setCurrentUser($root_user);
520 
521  $mm = $GLOBALS['SQ_SYSTEM']->getMetadataManager();
522  $GLOBALS['SQ_SYSTEM']->setRunLevel(SQ_RUN_LEVEL_FORCED);
523 
524  foreach($contextids as $contextid) {
525  $GLOBALS['SQ_SYSTEM']->changeContext($contextid);
526 
527  foreach($assetids as $assetid) {
528  $asset = $GLOBALS['SQ_SYSTEM']->am->getAsset($assetid);
529  if (is_null($asset)) {
530  continue;
531  }
532  if ($type == 'bodycopy_content_file') {
533  // Its a bodycopy container asset
534  $bodycopy_container_edit_fns = $asset->getEditFns();
535  $bodycopy_container_edit_fns->generateContentFile($asset);
536  } else if ($type == 'metadata_file') {
537  // Do not trigger "update asset" event when regenerating metadata
538  $mm->regenerateMetadata($assetid, NULL, FALSE);
539  } else {
540  // If we're not a design for some reason, continue
541  if (!($asset instanceof Design)) continue;
542  $design_edit_fns = $asset->getEditFns();
543  // Parse and process the design, if successful generate the design file
544  if (@$design_edit_fns->parseAndProcessFile($asset)) @$asset->generateDesignFile(false);
545  // Update respective design customisations
546  $customisation_links = $GLOBALS['SQ_SYSTEM']->am->getLinks($assetid, SQ_LINK_TYPE_2, 'design_customisation', true, 'major', 'customisation');
547  foreach($customisation_links as $link) {
548  $customisation = $GLOBALS['SQ_SYSTEM']->am->getAsset($link['minorid'], $link['minor_type_code']);
549  if (is_null($customisation)) continue;
550  @$customisation->updateFromParent($design);
551  $GLOBALS['SQ_SYSTEM']->am->forgetAsset($customisation);
552  }
553  }
554 
555  $asset = $GLOBALS['SQ_SYSTEM']->am->forgetAsset($asset);
556 
557  echo ".";
558  }//end foreach assetids
559 
560  $GLOBALS['SQ_SYSTEM']->restoreContext();
561  }//end foreach contexts
562 
563  $GLOBALS['SQ_SYSTEM']->restoreRunLevel();
564  $GLOBALS['SQ_SYSTEM']->restoreCurrentUser();
565 
566  exit();
567  }//end child process
568 
569  }//end foreach asset batch
570  echo " done.\n";
571 
572  }//end foreach type
573 
574 }//end regenerate_filesystem_content()
575 
576 
585 function isValidValue($value, $charset=SYS_NEW_ENCODING)
586 {
587  $result = ($value == @iconv($charset, $charset."//IGNORE", $value));
588  return $result;
589 }
590 
591 
600 function isValidCharset($charset)
601 {
602  return 'test' == @iconv($charset, $charset, 'test');
603 }
604 
605 
610 function log_error_msg($msg)
611 {
612  $msg = date('j-m-y h-i-s').": ".$msg."\n";
613  file_put_contents(SCRIPT_LOG_FILE, $msg, FILE_APPEND);
614 }
615 
616 
617 /*
618 * Fork child process. The parent process will sleep until the child
619 * exits
620 *
621 * @return string
622 */
623 function fork()
624 {
625  $child_pid = pcntl_fork();
626 
627  switch ($child_pid) {
628  case -1:
629  trigger_error("Forking failed!");
630  return null;
631  break;
632  case 0: // child process
633  return $child_pid;
634  break;
635  default : // parent process
636  $status = null;
637  pcntl_waitpid(-1, $status);
638  return $child_pid;
639  break;
640  }
641 }//end fork()
642 
643 
654 function getCLIArg($arg)
655 {
656  return (count($match = array_values(preg_grep("/--" . $arg . "(\=(.*)|)/i",$_SERVER['argv']))) > 0 === TRUE) ? ((preg_match('/--(.*)=(.*)/',$match[0],$reg)) ? $reg[2] : true) : false;
657 
658 }//end getCLIArg()
659 
660 
666 function print_usage()
667 {
668  echo "\nThis script replaces all the non-utf8 smart quotes chars by their respective regular couterpart chars.";
669  echo "\nIf string is still invalid in current charset encoding aftet the replacement then script will perform chaset";
670  echo "\nconversion on string from previous charset to the current one.\n\n";
671 
672  echo "Usage: php ".basename(__FILE__)." --system=<SYSTEM_ROOT> --old=<OLD_CHARSET> [--new=<NEW_CHARSET>] [--rootnode=<ROOT_NODE>] [--report]\n\n";
673  echo "\t<SYSTEM_ROOT> : The root directory of Matrix system.\n";
674  echo "\t<OLD_CHARSET> : Previous charset of the system. (eg. UTF-8, Windows-1252, etc)\n";
675  echo "\t<NEW_CHARSET> : New charset of the system. (eg. UTF-8, Windows-1252, etc)\n";
676  echo "\t<ROOT_NODE> : Assetid of the rootnode (all children of the rootnode will be processed by the script).\n";
677  echo "\t<--report> : Issue a report only instead of also trying to convert the assets.\n";
678 
679  echo "\nWARNING: IT IS STRONGLY RECOMMENDED THAT YOU BACKUP YOUR SYSTEM BEFORE RUNNING THIS SCRIPT\n\n";
680 
681 }//end print_usage()
682 
683 
691 function htmlallentities($str)
692 {
693  $res = '';
694  $strlen = strlen($str);
695  for ($i = 0; $i < $strlen; $i++) {
696  $byte = ord($str[$i]);
697  if($byte < 128) // 1-byte char
698  $res .= $str[$i];
699  elseif($byte < 192) // invalid utf8
700  $res .= '&#'.ord($str[$i]).';';
701  elseif($byte < 224) // 2-byte char
702  $res .= '&#'.((63&$byte)*64 + (63&ord($str[++$i]))).';';
703  elseif($byte < 240) // 3-byte char
704  $res .= '&#'.((15&$byte)*4096 + (63&ord($str[++$i]))*64 + (63&ord($str[++$i]))).';';
705  elseif($byte < 248) // 4-byte char
706  $res .= '&#'.((15&$byte)*262144 + (63&ord($str[++$i]))*4096 + (63&ord($str[++$i]))*64 + (63&ord($str[++$i]))).';';
707  }
708 
709  return $res;
710 
711 }//end htmlallentities()
712 
713 
721 function listProblematicCharacters($value)
722 {
723  $entified = htmlallentities($value);
724  preg_match_all('/&#([0-9]+);/', $entified, $matches);
725  $codes = array_unique($matches[1]);
726  $probChars = '';
727  foreach ($codes as $code) {
728  $probChars .= html_entity_decode('&#'.$code.';', ENT_COMPAT, 'utf-8').' ('.$code.'), ';
729  }
730 
731  return preg_replace('/,\s*$/', '', $probChars);
732 
733 }//end listProblematicCharacters()
734 
735 
736 ?>
737