Squiz Matrix  4.12.2
 All Data Structures Namespaces Functions Variables Pages
regen_metadata_by_root_nodes.php
1 <?php
24 error_reporting(E_ALL);
25 if ((php_sapi_name() != 'cli')) {
26  trigger_error("You can only run this script from the command line\n", E_USER_ERROR);
27 }//end if
28 
29 if (count($argv) < 3) {
30  echo "Usage: php scripts/regen_metadata_by_root_nodes.php <SYSTEM_ROOT> <ASSETID[, ASSETID]> <MAX_THREAD_NUM> <BATCH_SIZE> <--skip-asset-update> <--direct-children-only> \n";
31  exit();
32 }//end if
33 
34 $SYSTEM_ROOT = (isset($_SERVER['argv'][1])) ? $_SERVER['argv'][1] : '';
35 if (empty($SYSTEM_ROOT)) {
36  echo "ERROR: You need to supply the path to the System Root as the first argument\n";
37  exit();
38 }
39 
40 if (!is_dir($SYSTEM_ROOT) || !is_readable($SYSTEM_ROOT.'/core/include/init.inc')) {
41  echo "ERROR: Path provided doesn't point to a Matrix installation's System Root. Please provide correct path and try again.\n";
42  exit();
43 }
44 
45 $assetids = (isset($_SERVER['argv'][2])) ? $_SERVER['argv'][2] : '';
46 if (empty($assetids)) {
47  echo "ERROR: You need to specify the root nodes to regenerate metadata from as the second argument\n";
48  exit();
49 }//end if
50 
51 $max_thread_num = (isset($_SERVER['argv'][3])) ? $_SERVER['argv'][3] : '';
52 if (empty($max_thread_num) || ($max_thread_num > 5)) $max_thread_num = 3;
53 
54 $batch_size = (isset($_SERVER['argv'][4])) ? $_SERVER['argv'][4] : '';
55 if (empty($batch_size)) $batch_size = 50;
56 
57 $update_assets = TRUE;
58 $max_asset_depth = NULL;
59 if (isset($_SERVER['argv'][5])) {
60  $options = array_slice($_SERVER['argv'], 5);
61  foreach ($options as $option) {
62  if ($option == '--skip-asset-update') {
63  $update_assets = FALSE;
64  } else if ($option == '--direct-children-only') {
65  $max_asset_depth = 1;
66  }//end if
67  }
68 }
69 
70 
71 define('LOG_FILE', $SYSTEM_ROOT.'/data/private/logs/regen_metadata_by_root_nodes.log'); // This is the log file
72 define('SYNCH_FILE', $SYSTEM_ROOT.'/data/private/logs/regen_metadata_by_root_nodes.assetid'); // We need this file to store the assetids of those to be regenerated
73 define('BATCH_SIZE', $batch_size); // The number of assets being processed in one thread.
74 define('MAX_CONCURRENCY', $max_thread_num); // The number of simultaneous threads can be spawned.
75 
76 
77 // Replace space with empty string
78 $assetids = preg_replace('/[\s]*/', '', $assetids);
79 
80 $pid_prepare = pcntl_fork();
81  switch ($pid_prepare) {
82  case -1:
83  break;
84  case 0:
85 
86  require_once $SYSTEM_ROOT.'/core/include/init.inc';
87 
88  $root_user = $GLOBALS['SQ_SYSTEM']->am->getSystemAsset('root_user');
89 
90  // This ridiculousness allows us to workaround Oracle, forking and CLOBs
91  // if a query is executed that returns more than 1 LOB before a fork occurs,
92  // the Oracle DB connection will be lost inside the fork.
93  // In this case, because a user asset has more than 1 attribute and custom_val in sq_ast_attr_val
94  // is of type CLOB, we attempt to check the root password inside our forked process.
95  // log in as root
96  if (!$GLOBALS['SQ_SYSTEM']->setCurrentUser($root_user)) {
97  echo "ERROR: Failed logging in as root user\n";
98  exit(1);
99  }//end if
100 
101  // Explode them so we have the list in array
102  $rootnodes = getRootNodes($assetids);
103 
104  $children = Array();
105  foreach ($rootnodes as $rootnode_id) {
106  $children += array_merge($children, array_keys(($GLOBALS['SQ_SYSTEM']->am->getChildren($rootnode_id, '', TRUE, NULL, NULL, NULL, TRUE, 1, $max_asset_depth))));
107  }//end foreach
108 
109  // Save the list into a file so we can access the list from the parent process
110  file_put_contents(SYNCH_FILE, implode(',', $children));
111 
112 
113  exit(0);
114  // waiting for child exit signal
115  $status = null;
116  pcntl_waitpid(-1, $status);
117 
118  break;
119  default:
120  $status = null;
121  pcntl_waitpid(-1, $status);
122  break;
123  }//end switch
124 
125 $children = Array();
126 if (file_exists(SYNCH_FILE)) {
127  $children_str = file_get_contents(SYNCH_FILE);
128 } else {
129  echo "Unable to find Synch File, probably because the root user was not able to log in, or the user executing this script does not have permission to write to this folder.\n";
130  exit(0);
131 }//end else
132 
133 $children = explode(',', $children_str);
134 $children = array_unique($children); // We are only generate metadata for each asset once, despite they might be linked in different plaecs
135 
136 // Chunk them up so we can process each batch when forking
137 $chunk_children = array_chunk($children, BATCH_SIZE);
138 $current_child_list = Array();
139 
140 log_to_file('======================= Start Regenerating Metadata '.date('d-m-Y h:i:s').' =======================', LOG_FILE);
141 log_to_file("Regenerating for: " . var_export(count($children),TRUE) . " assets \n", LOG_FILE);
142 
143  $fork_num = 0; // Determine how many child process we have forked
144  while (!empty($chunk_children)) {
145  $current_child_list = array_pop($chunk_children);
146  $pid = pcntl_fork();
147  $fork_num++;
148  switch ($pid) {
149  case -1:
150  trigger_error('Process failed to fork while regenerating metadata', E_USER_ERROR);
151  exit(1);
152  break;
153  case 0:
154 
155  require_once $SYSTEM_ROOT.'/core/include/init.inc';
156  $GLOBALS['SQ_SYSTEM']->setCurrentUser($GLOBALS['SQ_SYSTEM']->am->getSystemAsset('root_user'));
157 
158  $mm = $GLOBALS['SQ_SYSTEM']->getMetadataManager();
159 
160  foreach ($current_child_list as $child_assetid) {
161  $child_asset = $GLOBALS['SQ_SYSTEM']->am->getAsset($child_assetid);
162  if (!$GLOBALS['SQ_SYSTEM']->am->acquireLock($child_assetid, 'metadata')) {
163  log_to_file('Unable to acquire metadata lock for assetid ' .$child_assetid.'. Skipping this asset.', LOG_FILE);
164  continue;
165  }//end if
166 
167  if (!$child_asset->writeAccess('metadata')) {
168  log_to_file('Do not have write access for assetid ' .$child_assetid .'. Skipping this asset.', LOG_FILE);
169  continue;
170  }//end if
171 
172  if (!$mm->regenerateMetadata($child_assetid, NULL, $update_assets)) {
173  log_to_file('Failed regenerating metadata for assetid ' .$child_assetid .'.', LOG_FILE);
174  continue;
175  }//end if
176 
177  log_to_file('Regenerated Metadata for child assetid '.$child_assetid, LOG_FILE);
178 
179  $GLOBALS['SQ_SYSTEM']->am->releaseLock($child_assetid, 'metadata');
180  $GLOBALS['SQ_SYSTEM']->am->forgetAsset($child_asset);
181  $child_asset = NULL;
182  unset($child_asset);
183 
184  }//end foreach
185 
186  $GLOBALS['SQ_SYSTEM']->restoreCurrentUser();
187 
188  exit(0);
189  // waiting for child exit signal
190  $status = null;
191  pcntl_waitpid(-1, $status);
192 
193  break;
194  default:
195  // We only want to fork a maximum number of child process, so if we've already reached the max num, sit and wait
196  if ($fork_num >= MAX_CONCURRENCY) {
197  $status = null;
198  pcntl_waitpid(-1, $status);
199  $fork_num--;
200  }//end if
201 
202  if (empty($chunk_children)) {
203  // We wait for all the fork child to finish
204  while ($fork_num > 0) {
205  $status = null;
206  pcntl_waitpid(-1, $status);
207  $fork_num--;
208  }//end
209  }//end if
210 
211  break;
212 
213  }//end switch & thread
214  //}//end foreach
215  }//end while
216 
217 
218  log_to_file('======================= Finished Regenerating Metadata '.date('d-m-Y h:i:s').' =======================', LOG_FILE);
219  if (file_exists(SYNCH_FILE)) {
220  unlink(SYNCH_FILE);
221  }//end if
222  exit(0);
223 
224 
225 
234 function get_line($prompt='')
235 {
236  echo $prompt;
237  // now get their entry and remove the trailing new line
238  return rtrim(fgets(STDIN, 4096));
239 
240 }//end get_line()
241 
242 
247 function log_to_file($content, $file_name="regen_metadata_by_root_nodes.log")
248 {
249  file_put_contents($file_name, '['.date('d-m-Y h:i:s').'] '.$content."\n", FILE_APPEND);
250 
251 }//end log_to_file();
252 
253 
254 function getRootNodes($action)
255 {
256  $rootnodes = explode(',', $action);
257 
258  // Check if each of these rootnodes exists in the system
259  $rootnodes_exists = $GLOBALS['SQ_SYSTEM']->am->assetExists($rootnodes);
260 
261  $not_exists = array_diff($rootnodes, $rootnodes_exists);
262  if (!empty($not_exists)) {
263  $list_not_exists = implode(', ', $not_exists);
264  echo "These rootnode ids do not exists in the system: $list_not_exists \n";
265  exit(1);
266  }//end if
267 
268  return $rootnodes;
269 }//end getRootNodes()
270 
271 ?>