Squiz Matrix  4.12.2
 All Data Structures Namespaces Functions Variables Pages
replace_text.php
1 <?php
14 // START Configuration Options:
15 
16 // 1- Asset types to allow. Add/Remove types from array. Empty for all types allowed.
17 $type_code = Array (
18  'page',
19  );
20 
21 // 2- Whether we are finding assets that are just a $type_code or $type_code and any of it's sub-classes (True or False)
22 $strict_type_code = FALSE;
23 
24 // 3- Enable/Disable the following options (0 or 1).
25 $options = Array (
26  //non-extreme options
27  'Remove <font> tags' => 1,
28  'Remove double spaces' => 1,
29  'Remove non-HTML tags' => 1,
30  'Change Microsoft Words bullets' => 1,
31  'Remove soft hyphens' => 1,
32  //extreme options
33  'Remove style attribute' => 0,
34  'Remove class attribute' => 0,
35  'Remove <table> tags' => 0,
36  'Remove <span> tags' => 0,
37  'Remove all empty tags' => 0,
38  'Remove all tags attributes (except HREF and SRC)' => 0,
39  );
40 
41 // END Configuration Options:
42 
43  error_reporting(E_ALL);
44  if ((php_sapi_name() != 'cli')) trigger_error("You can only run this script from the command line\n", E_USER_ERROR);
45 
46  $args = count($_SERVER['argv']);
47  if ($args > 4 || $args < 3) {
48  echo "This script needs to be run in the following format:\n\n";
49  echo "\tphp replace_text.php SYSTEM_ROOT root_node_ids [exclude_root_node_ids]\n\n";
50  echo "\tEg. php scripts/replace_text.php . 10,5 7,2\n\n";
51  echo "Also note there are 3 configurable options in the script: By default non-extreme options are run on all 'page' types\n\n";
52  exit(1);
53  }
54 
55  $SYSTEM_ROOT = (isset($_SERVER['argv'][1])) ? $_SERVER['argv'][1] : '';
56  if (empty($SYSTEM_ROOT)) {
57  echo "ERROR: You need to supply the path to the System Root as the first argument\n";
58  exit();
59  }
60 
61  if (!is_dir($SYSTEM_ROOT) || !is_readable($SYSTEM_ROOT.'/core/include/init.inc')) {
62  echo "ERROR: Path provided doesn't point to a Matrix installation's System Root. Please provide correct path and try again.\n";
63  exit();
64  }
65 
66  if (isset($_SERVER['argv'][2])) {
67  $root_nodes = explode(',', $_SERVER['argv'][2]);
68  }
69 
70  $excl_nodes = Array();
71  if (isset($_SERVER['argv'][3])) {
72  $excl_nodes = explode(',', $_SERVER['argv'][3]);
73  }
74 
75  require_once $SYSTEM_ROOT.'/core/include/init.inc';
76 
77  $root_user =& $GLOBALS['SQ_SYSTEM']->am->getSystemAsset('root_user');
78  $GLOBALS['SQ_SYSTEM']->setCurrentUser($root_user);
79  $GLOBALS['SQ_SYSTEM']->setRunLevel(SQ_RUN_LEVEL_FORCED);
80 
81  //find all asset id's to be excluded
82  $excl_ids = Array();
83  foreach ($excl_nodes as $node) {
84  $children = $GLOBALS['SQ_SYSTEM']->am->getChildren($node, $type_code, $strict_type_code);
85  foreach ($children as $child_id => $info) {
86  $excl_ids[] = $child_id;
87  }
88  }
89 
90  foreach ($root_nodes as $node) {
91  $children = $GLOBALS['SQ_SYSTEM']->am->getChildren($node, $type_code, $strict_type_code, FALSE);
92  foreach ($children as $child_id => $info) {
93  if (!in_array($child_id, $excl_nodes) && !in_array($child_id, $excl_ids)) {
94  $contents = $GLOBALS['SQ_SYSTEM']->am->getEditableContents($child_id);
95  if ($contents) {
96  foreach ($contents as $id => $edit) {
97  echo "Examining wysiwyg content type of Asset ID: $id\n";
98  $edited = process_replace_text($edit, $options);
99  if ($edited !== FALSE) {
100  $GLOBALS['SQ_SYSTEM']->am->setEditableContents($id, $edited);
101  } else {
102  die ("There is a crazy error in this script. Most likey the options array has been misconfigured\n");
103  }
104  }
105  } else {
106  if (isset ($info[0]['type_code'])) {
107  $type_info = $GLOBALS['SQ_SYSTEM']->am->getAssetTypeAttributes($info[0]['type_code'], Array('name', 'type'));
108  } else {
109  continue;
110  }
111  foreach ($type_info as $name => $type) {
112  if ($type['type'] == 'wysiwyg'){
113  $asset = $GLOBALS['SQ_SYSTEM']->am->getAsset($child_id);
114  $contents = $asset->attr($name);
115  echo "Examining wysiwyg contents of attribute '$name' of Asset ID: $child_id\n";
116  $edited = process_replace_text($contents, $options);
117  if ($edited === FALSE) die ("There is a crazy error in this script. Most likey the options array has been misconfigured\n");
118  $asset->setAttrValue($name, $edited);
119  $asset->saveAttributes();
120  }
121 
122  }
123  }
124  }
125  }
126  }
127 
128  $GLOBALS['SQ_SYSTEM']->restoreRunLevel();
129  $GLOBALS['SQ_SYSTEM']->restoreCurrentUser();
130 
140  function process_replace_text($html, $options)
141  {
142  $reg = null;
143  $rep = null;
144  $curHTML = $html;
145  $HTMLtags = "!--|a|abbr|above|acronym|address|applet|array|area|b|base|basefont|bdo|bgsound|big|blink|blockquote|body|box|br|blink|button|caption|center|cite|code|col|colgroup|comment|dd|del|dfn|dir|div|dl|dt|em|embed|fieldset|fig|font|form|frame|frameset|h1|h2|h3|h4|h5|h6|head|hr|html|i|id|iframe|ilayer|img|input|ins|isindex|kbd|label|layer|legend|li|link|listing|map|marquee|menu|meta|multicol|nextid|nobr|noframes|nolayer|note|noscript|object|ol|option|keygen|optgroup|p|param|pre|q|quote|range|root|s|samp|script|select|small|sound|spacer|span|sqrt|strike|strong|style|sub|sup|table|tbody|td|text|textarea|tfoot|th|thead|title|tr|tt|u|ul|var|wbr|xmp";
146  $bullet = urldecode("%B7");
147  $shy = urldecode("%AD");
148  foreach ($options as $key => $value) {
149  if ($value == 1) {
150  // Bug #3204 - Remove Word Document HTML Clipboard Tags, that get pasted through on Firefox 3
151  $localreg = "%<link rel=\"[^\"]*\" href=\"file[^\"]*\">%i";
152  $localrep = "";
153  $curHTML = preg_replace($localreg, $localrep, $curHTML);
154  $wordreg = "%<w\:[^>]*>(.*?)<\/w\:[^>]*>%i";
155  $wordrep = "";
156  $curHTML = preg_replace($wordreg, $wordrep, $curHTML);
157  switch ($key) {
158  case 'Remove <font> tags':
159  $reg = "%<\/?font ?[^>]*>%i";
160  $rep = "";
161  break;
162  case 'Remove double spaces':
163  //#3827 -- using regexp literal
164  $reg = "%(\s|&nbsp;){2,}%i";
165  $rep = "$1";
166  break;
167  case 'Remove non-HTML tags':
168  $reg = "%<(?!(\/?(".$HTMLtags.")[> ]))([^>]*)>%i";
169  $rep = "";
170  break;
171  case 'Change Microsoft Words bullets':
172  $reg = "%<p[^>]*>(".$bullet."|&middot;)(.*?)<\/p>%i";
173  $rep = "<li>$2";
174  break;
175  case 'Remove soft hyphens':
176  $reg = "%(&shy;?|".$shy.")%i";
177  $rep = "";
178  break;
179  case 'Remove style attribute':
180  $reg = "% style=\"?[^\">]*\"?%i";
181  $rep = "";
182  break;
183  case 'Remove class attribute':
184  $reg = "% class=\"?[^\">]*[\"]?%i";
185  $rep = "";
186  break;
187  case 'Remove <table> tags':
188  $reg = "%<(table|/table|tr|tbody|/tbody|td|th) ?[^>]*>%i";
189  $rep = "";
190  $curHTML = preg_replace($reg, $rep, $curHTML);
191  $reg = "%<(/tr|/td|/th)>%i";
192  $rep = "<br />";
193  break;
194  case 'Remove <span> tags':
195  $reg = "%<\/?span( [^>]*>|>)%i";
196  $rep = "";
197  break;
198  case 'Remove all empty tags':
199  $reg = "%<([A-Z][A-Z0-9]*)( [^>]*)?>(&nbsp;| |\n|\t)*<\/\\1>%i";
200  $rep = "";
201  break;
202  case 'Remove all tags attributes (except HREF and SRC)':
203  $reg = '%<([^/ >]+)[^>]*?([^>]*?( (src|href)="?[^>"]*"?)[^>]*?)*[^>]*?>%i';
204  $rep = "<$1$3>";
205  break;
206  default : return false;
207  }
208  // BUG#928 - special condition to allow empty anchor tag
209  if ($key == 'Remove all empty tags') {
210  $reg2 = "%(<A NAME[^>]*?>)(&nbsp;| |\n|\t)*(</A>)%i";
211  $rep2 = "$1matrix_anchor_tmp$3";
212  $curHTML = preg_replace($reg2, $rep2, $curHTML);
213  $reg2 = "%(<A ID[^>]*?>)(&nbsp;| |\n|\t)*(</A>)%i";
214  $rep2 = "$1matrix_anchor_tmp$3";
215  $curHTML = preg_replace($reg2, $rep2, $curHTML);
216  }
217  $curHTML = preg_replace($reg, $rep, $curHTML);
218 
219  if ($key == 'Remove all empty tags') {
220  $reg3 = "%(<A NAME[^>]*?>)matrix_anchor_tmp(</A>)%i";
221  $rep3 = "$1$2";
222  $curHTML = preg_replace($reg3, $rep3, $curHTML);
223  $reg3 = "%(<A ID[^>]*?>)matrix_anchor_tmp(</A>)%i";
224  $rep3 = "$1$2";
225  $curHTML = preg_replace($reg3, $rep3, $curHTML);
226  }
227  }
228  }
229  return $curHTML;
230 
231  }//end process_replace_text()
232 
233 ?>