Squiz Matrix  4.12.2
 All Data Structures Namespaces Functions Variables Pages
hipo_job_import_thesaurus_xml.inc
1 <?php
17 require_once SQ_SYSTEM_ROOT.'/core/hipo/hipo_job.inc';
18 
32 {
33 
34 
40  function HIPO_Job_Import_Thesaurus_XML($code_name='')
41  {
42  $this->uses_trans = TRUE;
43  $this->HIPO_Job($code_name);
44 
45  }//end constructor
46 
47 
54  function getCodeName()
55  {
56  return parent::getCodeName().'-'.$this->_running_vars['thesaurus_id'];
57 
58  }//end getCodeName()
59 
60 
67  function getHipoName()
68  {
69  return translate('thesaurus_hipo_name_import_thesaurus_xml');
70 
71  }//end getHipoName()
72 
73 
81  function getInitialStepData()
82  {
83  return Array(
84  Array(
85  'name' => translate('thesaurus_hipo_starting_process'),
86  'function_call' => Array(
87  'process_function' => 'processStart',
88  ),
89  'running_mode' => 'server',
90  'auto_step' => TRUE,
91  'allow_cancel' => FALSE,
92  'percent_done' => 0,
93  'complete' => FALSE,
94  'message' => translate('thesaurus_hipo_setting_up_env'),
95  ),
96  Array(
97  'name' => translate('thesaurus_hipo_acquiring_locks'),
98  'hipo_job' => Array(
99  'init_details_function' => 'getLockAssetDetails',
100  ),
101  'running_mode' => 'server',
102  'auto_step' => TRUE,
103  'allow_cancel' => TRUE,
104  'percent_done' => 0,
105  'complete' => FALSE,
106  'message' => '',
107  ),
108  Array(
109  'name' => translate('thesaurus_hipo_erasing_thesaurus'),
110  'function_call' => Array(
111  'paint_function' => 'paintEraseContents',
112  'process_function' => 'processEraseContents',
113  ),
114  'running_mode' => 'web',
115  'auto_step' => FALSE,
116  'allow_cancel' => TRUE,
117  'percent_done' => 0,
118  'complete' => FALSE,
119  'message' => '',
120  ),
121  Array(
122  'name' => translate('thesaurus_hipo_parsing_xml'),
123  'function_call' => Array(
124  'process_function' => 'processParseXML',
125  ),
126  'running_mode' => 'server',
127  'skip_step' => FALSE,
128  'auto_step' => TRUE,
129  'allow_cancel' => FALSE,
130  'percent_done' => 0,
131  'complete' => FALSE,
132  'message' => '',
133  ),
134  );
135 
136  }//end getInitialStepData()
137 
138 
145  function freestyle()
146  {
147  $step_data = Array();
148  if (!$this->processEraseContents($step_data, '')) {
149  return FALSE;
150  }
151 
152  $step_data['complete'] = FALSE;
153  while (!$step_data['complete']) {
154  if (!$this->processParseXml($step_data, '')) {
155  return FALSE;
156  }
157  }
158  return TRUE;
159 
160  }//end freestyle()
161 
162 
169  function prepare()
170  {
171  // to start regenerating metadata, we need to have a root assetid - default to root folder if none is supplied
172  if (is_null($this->_running_vars['thesaurus_id'])) {
173  trigger_localised_error('HIPO0061', E_USER_WARNING);
174  return FALSE;
175  }
176 
177  // so we have an assetid to start at, but make sure it is a valid assetid
178  $asset = $this->getThesaurusRef();
179  if (is_null($asset)) {
180  trigger_localised_error('HIPO0062', E_USER_WARNING, $this->_running_vars['thesaurus_id']);
181  return FALSE;
182  }
183 
184  return parent::prepare();
185 
186  }//end prepare()
187 
188 
199  function processStart(&$step_data, $prefix)
200  {
201  $step_data['percent_done'] = 100;
202  $step_data['complete'] = TRUE;
203  return TRUE;
204 
205  }//end processStart()
206 
207 
218  function paintEraseContents(&$step_data, &$o, $prefix)
219  {
220  $o->openSection(translate('thesaurus_hipo_erasing_thesaurus'));
221  $o->openField('');
222  echo translate('thesaurus_hipo_confirm_erase');
223  $o->addHiddenField('form_submitted', 1);
224  $o->closeField();
225  $o->closeSection();
226 
227  return TRUE;
228 
229  }//end paintEraseContents()
230 
231 
241  function processEraseContents(&$step_data, $prefix)
242  {
243  if (isset($_REQUEST['form_submitted'])) {
244  $th_asset = $this->getThesaurusRef();
245 
246  if (!$th_asset->erase()) {
247  trigger_localised_error('HIPO0063', E_USER_WARNING);
248  return FALSE;
249  }
250 
251  $step_data['percent_done'] = 100;
252  $step_data['complete'] = TRUE;
253  }
254 
255  return TRUE;
256 
257  }//end processEraseContents()
258 
259 
270  function startElement($parser, $tag, $attrs)
271  {
272 
273  $stack =& $this->_running_vars['stack'];
274  $tagstack =& $this->_running_vars['tagstack'];
275  $relations =& $this->_running_vars['relations'];
276 
277  $thesaurus = $this->getThesaurusRef();
278 
279  if ($tag != 'thesaurus' && $tag != 'term' && $tag != 'relation' && $tag != 'note') {
280  trigger_localised_error('HIPO0064', E_USER_WARNING, $tag);
281  }
282 
283  if (empty($stack) && $tag != 'thesaurus') {
284  trigger_error('EMPTY STACK');
285  return FALSE;
286  }
287 
288  $current_tag = array_get_index($this->_running_vars, 'current_tag');
289  switch ($tag) {
290  case 'thesaurus':
291  if (!empty($stack)) {
292  trigger_localised_error('HIPO0067', E_USER_WARNING, $tag);
293  return FALSE;
294  }
295  array_unshift($stack, Array('term' => NULL));
296 
297  break;
298 
299  case 'relation':
300  if (isset($stack[0]['relation'])) {
301  trigger_localised_error('HIPO0065', E_USER_WARNING, $tag);
302  return FALSE;
303  }
304 
305  $rel_name = array_get_index($attrs, 'name');
306  $rel_id = array_get_index($relations, $rel_name);
307  if (is_null($rel_id)) {
308  // add relation to the thesaurus
309  $rel_id = $thesaurus->addRelation($rel_name);
310  $relations[$rel_name] = $rel_id;
311  }
312 
313  $stack[0]['relation'] = $rel_id;
314 
315  break;
316 
317  case 'term':
318  if (empty($attrs['name'])) {
319  trigger_localised_error('HIPO0066', E_USER_WARNING);
320  return FALSE;
321  }
322 
323  $rel_id = NULL;
324 
325  // add the term
326  if (is_null($stack[0]['term'])) {
327  $parent_asset =& $thesaurus;
328  } else {
329  $parent_asset = $GLOBALS['SQ_SYSTEM']->am->getAsset($stack[0]['term']);
330  $rel_id = array_get_index($stack[0], 'relation');
331  if (is_null($rel_id)) {
332  $NULL_rel_id = array_get_index($relations, NULL);
333  // null relation, which hasn't been added yet
334  if (is_null($NULL_rel_id)) {
335  $rel_id = $thesaurus->addRelation(NULL);
336  $relations[NULL] = $rel_id;
337  } else {
338  $rel_id = $NULL_rel_id;
339  }
340  }
341  }
342 
343  $create_link['asset'] =& $parent_asset;
344  $create_link['value'] = $rel_id;
345 
346  $term_asset = new Thesaurus_Term();
347  $term_asset->setAttrValue('name', $attrs['name']);
348  $term_asset->create($create_link);
349 
350  // put it onto the stack
351  array_unshift($stack, Array('term' => $term_asset->id));
352 
353  break;
354 
355  case 'note':
356  if ($tagstack[0] != 'term') {
357  trigger_error('NOTE tag can only be a child of TERM tag', E_USER_WARNING);
358  return FALSE;
359  }
360 
361  if (empty($attrs['name'])) {
362  trigger_error('Note must have a Name', E_USER_WARNING);
363  return FALSE;
364  }
365 
366  $this->_running_vars['current_note']['name'] = $attrs['name'];
367 
368  break;
369 
370  default:
371  trigger_localised_error('HIPO0067', E_USER_WARNING, $tag);
372  return FALSE;
373 
374  }//end switch $tag
375 
376  array_unshift($tagstack, $tag);
377 
378  }//end startElement()
379 
380 
392  function cDataHandler($parser, $data)
393  {
394  $stack =& $this->_running_vars['stack'];
395  $tagstack =& $this->_running_vars['tagstack'];
396 
397  if (isset($tagstack[0]) && $tagstack[0] == 'note') {
398  // we do this because for some reason if there are newlines in the character data block,
399  // it will be reported as multiple elements
400  if (isset($this->_running_vars['current_note']['text'])) {
401  $this->_running_vars['current_note']['text'] .= $data;
402  } else {
403  $this->_running_vars['current_note']['text'] = $data;
404  }
405  // add note to the current Term
406  }
407 
408  }//end cDataHandler()
409 
410 
420  function endElement($parser, $tag)
421  {
422  $stack =& $this->_running_vars['stack'];
423  $tagstack =& $this->_running_vars['tagstack'];
424  $relations =& $this->_running_vars['relations'];
425 
426  if ($tag != 'thesaurus' && $tag != 'term' && $tag != 'relation' && $tag != 'note') {
427  trigger_localised_error('HIPO0064', E_USER_WARNING, $tag);
428  }
429 
430  if (empty($stack)) {
431  trigger_error('EMPTY STACK');
432  return FALSE;
433  }
434 
435  if ($tagstack[0] != $tag) {
436  trigger_error('Malformed XML. Cannot close tag <'.$tag.'> while being in <'.$tagstack[0].'>');
437  return FALSE;
438  }
439 
440  switch ($tag) {
441  case 'thesaurus':
442  if (is_null($stack[0]['term'])) {
443  array_shift($stack);
444  } else {
445  trigger_error('Malformed XML. Unexpected tag: <'.$tag.'>.');
446  return FALSE;
447  }
448  break;
449 
450  case 'relation':
451  if (!is_null($stack[0]['relation'])) {
452  unset($stack[0]['relation']);
453  } else {
454  trigger_error('Malformed XML. Unexpected tag: <'.$tag.'>.');
455  return FALSE;
456  }
457  break;
458 
459  case 'term':
460  if (!is_null($stack[0]['term'])) {
461  array_shift($stack);
462  } else {
463  trigger_error('Malformed XML. Unexpected tag: <'.$tag.'>.');
464  return FALSE;
465  }
466  break;
467 
468  case 'note':
469  $note_name = $this->_running_vars['current_note']['name'];
470  $note_value = $this->_running_vars['current_note']['text'];
471  $term = $GLOBALS['SQ_SYSTEM']->am->getAsset($stack[0]['term']);
472  $term->addTermNote($note_name, $note_value);
473  unset($this->_running_vars['current_note']);
474 
475  break;
476 
477  default:
478  trigger_localised_error('HIPO0067', E_USER_WARNING, $tag);
479  return FALSE;
480  }//end switch
481 
482  array_shift($tagstack);
483 
484  }//end endElement()
485 
486 
496  function processParseXml(&$step_data, $prefix)
497  {
498 
499  $block_size = 4096;
500  $blocks_per_step = 10;
501 
502 
503  $GLOBALS['SQ_SYSTEM']->am->includeAsset('thesaurus_term');
504 
505  if (!isset($step_data['total_read'])) {
506  // assume this is the first step because we must read something on each step
507  $step_data['total_parsed'] = 0;
508  $step_data['total_read'] = 0;
509  $step_data['remainder'] = '';
510  }
511 
512  if (!isset($this->_running_vars['stack'])) {
513  $this->_running_vars['stack'] = Array();
514  }
515 
516  if (!isset($this->_running_vars['tagstack'])) {
517  $this->_running_vars['tagstack'] = Array();
518  }
519 
520  if (!isset($this->_running_vars['relations'])) {
521  $this->_running_vars['relations'] = Array();
522  }
523 
524 
525  $xml_parser = xml_parser_create();
526  // stop the parser ucasing everything
527  xml_parser_set_option ($xml_parser, XML_OPTION_CASE_FOLDING, 0);
528 
529 
530  // initialise the file pointer
531  $th_asset = $this->getThesaurusRef();
532  $import_path = $th_asset->getXmlFilePath();
533 
534  if (!($fp = fopen($import_path, 'r'))) {
535  trigger_localised_error('CORE0257', E_USER_WARNING, $import_path);
536  return FALSE;
537  die();
538  }
539 
540  $fstat = fstat($fp);
541  $file_size = $fstat['size'];
542 
543  if ($step_data['total_read'] > 0) {
544  // we have to parse the seen portion of the file with the tag handlers unset to recreate the parser state. Sad but true.
545  $data = fread($fp, $step_data['total_read']);
546  if (!xml_parse($xml_parser, $data, feof($fp))) {
547  trigger_localised_error('CORE0258', E_USER_WARNING, xml_error_string(xml_get_error_code($xml_parser)), htmlentities($data, ENT_COMPAT, SQ_CONF_DEFAULT_CHARACTER_SET));
548  return FALSE;
549  }
550  }
551 
552  xml_set_element_handler($xml_parser, Array(&$this,'startElement'), Array(&$this,'endElement'));
553  xml_set_character_data_handler($xml_parser, Array(&$this,'cDataHandler'));
554 
555  $blocks = 1;
556  while (!feof($fp) && $blocks <= $blocks_per_step) {
557 
558  // parse blocks from the file
559  $data = fread($fp, $block_size);
560  if (!xml_parse($xml_parser, $data, feof($fp))) {
561  trigger_localised_error('CORE0258', E_USER_WARNING, xml_error_string(xml_get_error_code($xml_parser)), htmlentities($data, ENT_COMPAT, SQ_CONF_DEFAULT_CHARACTER_SET));
562  return FALSE;
563  }
564 
565  $parsed = xml_get_current_byte_index($xml_parser);
566 
567  $step_data['remainder'] = substr($data, $parsed);
568  $step_data['total_read'] = $step_data['total_read'] + strlen($data);
569  $step_data['total_parsed'] = $step_data['total_parsed'] + $parsed;
570  $blocks++;
571  }
572 
573 
574  $step_data['percent_done'] = ($step_data['total_read']/$file_size) * 100;
575 
576  if (feof($fp)) {
577  // unset all running vars we are now finished with
578  $step_data['complete'] = TRUE;
579  xml_parser_free($xml_parser);
580  } else {
581  $step_data['complete'] = FALSE;
582  }
583  $step_data['message'] = 'Parsed '.sprintf('%01.2f',$step_data['total_read']/1024).' Kb of '.sprintf('%01.2f',$file_size/1024).' Kb';
584  fclose($fp);
585  return TRUE;
586 
587  }//end processParseXml()
588 
589 
600  function getLockAssetDetails(&$job_type, &$running_vars, &$options)
601  {
602  $job_type = 'hipo_job_acquire_locks';
603 
604  $options['auto_complete'] = TRUE;
605 
606  $running_vars = Array(
607  'assetids' => Array($this->_running_vars['thesaurus_id'],),
608  'lock_type' => 'attributes',
609  'dependants_only' => FALSE,
610  'forceably_acquire' => FALSE,
611  );
612 
613  }//end getLockAssetDetails()
614 
615 
622  function &getThesaurusRef()
623  {
624  $thes = $GLOBALS['SQ_SYSTEM']->am->getAsset($this->_running_vars['thesaurus_id']);
625  return $thes;
626 
627  }//end getThesaurusRef()
628 
629 
630 }//end class
631 ?>