Squiz Matrix  4.12.2
 All Data Structures Namespaces Functions Variables Pages
hipo_job_structured_file_import.inc
1 <?php
17 require_once SQ_SYSTEM_ROOT.'/core/hipo/hipo_job.inc';
18 require_once SQ_FUDGE_PATH.'/general/file_system.inc';
19 
35 {
36 
37 
43  function HIPO_Job_Structured_File_Import($code_name='')
44  {
45  $this->uses_trans = FALSE;
46  $this->HIPO_Job($code_name);
47 
48  }//end constructor
49 
50 
57  function getCodeName()
58  {
59  return 'HIPO_Job_Structured_File_Import-'.$this->_running_vars['root_assetid'].'-'.$this->_running_vars['import_file'];
60 
61  }//end getCodeName()
62 
63 
70  function getHipoName()
71  {
72  return 'Hipo Job Large Document Import';
73 
74  }//end getHipoName()
75 
76 
84  function getInitialStepData()
85  {
86  return Array(
87  Array(
88  'name' => 'Tidying Document',
89  'function_call' => Array(
90  'process_function' => 'processTidying',
91  ),
92  'running_mode' => 'server',
93  'auto_step' => TRUE,
94  'percent_done' => 0,
95  'complete' => FALSE,
96  'message' => '',
97  'allow_cancel' => TRUE,
98  ),
99  Array(
100  'name' => 'Processing Headings',
101  'function_call' => Array(
102  'process_function' => 'processHeadings',
103  ),
104  'running_mode' => 'server',
105  'auto_step' => TRUE,
106  'percent_done' => 0,
107  'complete' => FALSE,
108  'message' => '',
109  'allow_cancel' => TRUE,
110  ),
111  Array(
112  'name' => 'Processing Images',
113  'function_call' => Array(
114  'process_function' => 'processImages',
115  ),
116  'running_mode' => 'server',
117  'auto_step' => TRUE,
118  'percent_done' => 0,
119  'complete' => FALSE,
120  'message' => '',
121  'allow_cancel' => TRUE,
122  ),
123  Array(
124  'name' => 'Scanning Document',
125  'function_call' => Array(
126  'process_function' => 'processScanning',
127  ),
128  'running_mode' => 'server',
129  'auto_step' => TRUE,
130  'percent_done' => 0,
131  'complete' => FALSE,
132  'message' => '',
133  'allow_cancel' => TRUE,
134  ),
135  Array(
136  'name' => 'Splitting Document',
137  'function_call' => Array(
138  'process_function' => 'processSplitting',
139  ),
140  'running_mode' => 'server',
141  'auto_step' => TRUE,
142  'percent_done' => 0,
143  'complete' => FALSE,
144  'message' => '',
145  'allow_cancel' => TRUE,
146  ),
147  Array(
148  'name' => 'Performing Additional Operations',
149  'function_call' => Array(
150  'process_function' => 'processAdditional',
151  ),
152  'running_mode' => 'server',
153  'auto_step' => TRUE,
154  'percent_done' => 0,
155  'complete' => FALSE,
156  'message' => '',
157  'allow_cancel' => TRUE,
158  ),
159  );
160 
161  }//end getInitialStepData()
162 
163 
170  function freestyle()
171  {
172  $code_name = $this->getCodeName();
173  if (!$this->processTidying($this->_steps[0], $code_name)) {
174  return FALSE;
175  }
176 
177  while (count($this->_running_vars['headings_to_process']) != 0) {
178  if (!$this->processHeadings($this->_steps[1], $code_name)) {
179  return FALSE;
180  }
181  }
182 
183  while ($this->_running_vars['image_count'] != count($this->_running_vars['image_names'])) {
184  if (!$this->processImages($this->_steps[2], $code_name)) {
185  return FALSE;
186  }
187  }
188 
189  do {
190  if (!$this->processScanning($this->_steps[3], $code_name)) {
191  return FALSE;
192  }
193  } while ($this->_running_vars['running_count'] != 0);
194 
195  while ($this->_running_vars['top_level_count'] != $this->_running_vars['running_count']) {
196  if (!$this->processSplitting($this->_steps[4], $code_name)) {
197  return FALSE;
198  }
199  }
200 
201  if (!$this->processAdditional($this->_steps[5], $code_name)) {
202  return FALSE;
203  }
204 
205  return TRUE;
206 
207  }//end freestyle()
208 
209 
219  function processTidying(&$step_data, $prefix)
220  {
221  $this->_running_vars['styles'] = Array();
222  $file_data = file_get_contents($this->_running_vars['import_file']);
223 
224  // Remove any HTML comment lines exist
225  $this->_removeHtmlComment($file_data);
226 
227  $style_matches = Array();
228  preg_match_all('/<style[^>]*>([^<]*)<\/\s*style>/i', $file_data, $style_matches);
229 
230  foreach ($style_matches[1] as $style_data) {
231  $this->_running_vars['styles'][] = $style_data;
232  }
233 
234  $title_matches = Array();
235  preg_match('/<title[^>]*>([^<]*)<\/\s*title>/i', $file_data, $title_matches);
236  if (!empty($title_matches)) {
237  $this->_running_vars['site_title'] = $title_matches[1];
238  } else {
239  $this->_running_vars['site_title'] = '';
240  }
241 
242  // This will replace the complete style for each style found with an empty string.
243  // This is needed, in case there are some style blocks inside the body, that we don't need
244  $file_data = str_replace($style_matches[0], '', $file_data);
245 
246  // DOS newline
247  $file_data = str_replace("\r", "\n", $file_data);
248 
249  // Extended Ascii
250  $file_data = str_replace(chr(145), '\'', $file_data);
251  $file_data = str_replace(chr(146), '\'', $file_data);
252  $file_data = str_replace(chr(147), '"', $file_data);
253  $file_data = str_replace(chr(148), '"', $file_data);
254  $file_data = str_replace(chr(149), '-', $file_data);
255  $file_data = str_replace(chr(150), '-', $file_data);
256 
257  $file_data = str_replace(chr(92), '\'', $file_data);
258  $file_data = str_replace(chr(97), 'a', $file_data);
259  $file_data = str_replace(chr(160), '&nbsp;', $file_data);
260 
261  $body_start_match = Array();
262  $body_end_match = Array();
263  preg_match('/<body[^>]*>/i', $file_data, $body_start_match, PREG_OFFSET_CAPTURE);
264  preg_match('/<\/\s*body\s*>/i', $file_data, $body_end_match, PREG_OFFSET_CAPTURE);
265 
266  if (empty($body_start_match)) {
267  $body_start = 0;
268  } else {
269  $body_start = $body_start_match[0][1];
270  }
271 
272  if (empty($body_end_match)) {
273  $body_end = strlen($file_data);
274  } else {
275  $body_end = $body_end_match[0][1];
276  }
277  $body = substr($file_data, $body_start, $body_end - $body_start);
278  // Result is the page stripped down to only the content inside the body tags
279  if (!empty($body_start_match)) {
280  $body = str_replace($body_start_match[0][0], '', $body);
281  }
282 
283  // Word Stuff
284  if ($this->_running_vars['remove_word']) {
285  // Fix the problems where the shorttags are not put in quotes
286  $body = preg_replace('/class=([a-z]+[a-z\d]*)/i', 'class="\\1"', $body);
287  $body = preg_replace('/lang=([a-z]+[a-z\d-]*)/i', 'lang="\\1"', $body);
288  $body = preg_replace('/align=[a-z]+\s/i', ' ', $body);
289  $body = preg_replace('/<[\/]?st1:[^>]+>/i', ' ', $body);
290 
291  // Vector formats
292  $body = preg_replace('/<(\/)?v:shapetype[^>]*>/', '', $body);
293  $body = preg_replace('/<(\/)?v:shape[^>]*>/', '', $body);
294  $body = preg_replace('/<v:stroke[^>]*>/', '', $body);
295  $body = preg_replace('/<(\/)?v:formulas[^>]*>/', '', $body);
296  $body = preg_replace('/<v:f[^>]*>/', '', $body);
297  $body = preg_replace('/<v:path[^>]*>/', '', $body);
298  $body = preg_replace('/<v:rect[^>]*>/', '', $body);
299  $body = preg_replace('/<v:line[^>]*>/', '', $body);
300 
301  $body = preg_replace('/<(\/)?o:p[^>]*>/', '', $body);
302  $body = preg_replace('/<!\[if ![a-zA-Z !&]+\]>/', '', $body);
303  $body = str_replace('<![endif]>', '', $body);
304  }
305 
306  // Run HTMLTIDY
307  if ($this->_running_vars['run_tidy']) {
308  $success = FALSE;
309  if (is_writable($this->_running_vars['import_dir'])) {
310  $tmp_fname = $this->_running_vars['import_file'].'_tmp';
311  if (($fh = fopen($tmp_fname, 'w')) !== FALSE) {
312  if (fwrite($fh, $body) !== FALSE) {
313  fclose($fh);
314  $success = TRUE;
315  }
316  }
317  }
318  if ($success) {
319  if (file_exists(SQ_TOOL_HTML_TIDY_PATH)) {
320  // tidy the HTML produced using the PHP5 Tidy
321  $tidy = new tidy;
322 
323  $config = Array (
324  'output-xhtml' => TRUE,
325  'preserve-entities' => TRUE,
326  'show-body-only' => TRUE,
327  'wrap' => FALSE,
328  'word-2000' => TRUE,
329  'show-warnings' => FALSE,
330  'show-errors' => 0,
331  'force-output' => TRUE,
332  'quote-marks' => TRUE,
333  );
334 
335  $tidy->parseFile($tmp_fname, $config);
336  $tidy->cleanRepair();
337 
338  if (!empty($tidy)) {
339  $body = $tidy;
340  unlink($tmp_fname);
341  unset($tidy);
342  }//end if
343  }//end if
344  }//end if
345  }//end if tidy
346 
347  $this->_running_vars['headings_to_process'] = Array();
348  // Due to Word inserting newlines in some Heading tags, we need to strip them out, or our regex wont work
349  foreach ($this->_running_vars['headings'] as $heading) {
350  $tidy_matches = Array();
351  preg_match_all('/<\s*'.$heading.'/i', $body, $tidy_matches, PREG_OFFSET_CAPTURE);
352  foreach ($tidy_matches[0] as $data) {
353  $this->_running_vars['headings_to_process'][] = Array('heading' => $heading, 'data' => $data);
354  }
355  }
356 
357  // Set the filtered data to the file_data var
358  $this->_running_vars['file_data'] = $body;
359  $this->_running_vars['started_scanning'] = FALSE;
360 
361  // Create the new Site if its been selected
362  if ($this->_running_vars['create_new_site'] == TRUE) {
363  $GLOBALS['SQ_SYSTEM']->am->includeAsset('site');
364  $root_folder = $GLOBALS['SQ_SYSTEM']->am->getSystemAsset('root_folder');
365  if (!empty($this->_running_vars['root_assetid'])) {
366  $root_link_parent = $GLOBALS['SQ_SYSTEM']->am->getAsset($this->_running_vars['root_assetid']);
367  } else {
368  $root_link_parent =& $root_folder;
369  }
370  $root_link = Array('asset' => &$root_link_parent, 'link_type' => SQ_LINK_TYPE_1);
371 
372  $new_site = new Site();
373 
374  $new_site_name = $this->_running_vars['new_site_name'];
375 
376  if (trim($new_site_name) == '') {
377  if ($this->_running_vars['site_title'] == '') {
378  $file_name = basename($this->_running_vars['import_file']);
379  $strrpos_file_name = strrpos($file_name, '.');
380  $file_name = substr($file_name, 0, $strrpos_file_name);
381  $replaced_file_name = str_replace('_', ' ', $file_name);
382  $new_site_name = ucwords($replaced_file_name);
383  } else {
384  // The doco has a title specified, so we should use it for the site.
385  $new_site_name = $this->_running_vars['site_title'];
386  }
387  }
388 
389  $new_site->setAttrValue('name', $new_site_name);
390 
391  $new_site->create($root_link);
392 
393  $this->_running_vars['root_assetid'] = $new_site->id;
394  $this->_running_vars['create_new_site'] = FALSE;
395  }//end if
396 
397  $this->_running_vars['processed_images'] = FALSE;
398  $this->_running_vars['image_folder_id'] = '0';
399  $this->_running_vars['image_count'] = 0;
400  $this->_running_vars['image_names'] = Array();
401  $step_data['complete'] = TRUE;
402  $step_data['percent_done'] = 100;
403 
404  return TRUE;
405 
406  }//end processTidying()
407 
408 
418  function processHeadings(&$step_data, $prefix)
419  {
420  if (!isset($this->_running_vars['initial_heading_count'])) {
421  $this->_running_vars['initial_heading_count'] = count($this->_running_vars['headings_to_process']);
422  }
423  $i = 0;
424 
425  foreach ($this->_running_vars['headings_to_process'] as $id => $top_data) {
426  $data = $top_data['data'];
427  $heading = $top_data['heading'];
428  $strpos_file_data = (strpos($this->_running_vars['file_data'], '</'.$heading.'>', $data[1]) - $data[1]);
429  $replace = substr($this->_running_vars['file_data'], $data[1], $strpos_file_data);
430  $replace_array = Array("\n", "\r");
431  $new_text = str_replace($replace_array, ' ', $replace);
432  $this->_running_vars['file_data'] = str_replace($replace, $new_text, $this->_running_vars['file_data']);
433  unset($this->_running_vars['headings_to_process'][$id]);
434  $i++;
435  if ($i == 100) break;
436  }
437  if (count($this->_running_vars['headings_to_process']) != 0) {
438  $step_data['percent_done'] = floor((count($this->_running_vars['headings_to_process'])/$this->_running_vars['initial_heading_count']) * 100);
439  $step_data['complete'] = FALSE;
440  } else {
441  $step_data['percent_done'] = 100;
442  $step_data['complete'] = TRUE;
443  }
444 
445  return TRUE;
446 
447  }//end processHeadings()
448 
449 
459  function processImages(&$step_data, $prefix)
460  {
461  $image_dir_name = preg_replace('/\.[a-zA-Z\(\) ]+/i', '_files', $this->_running_vars['import_file']);
462 
463  $body_data = $this->_running_vars['file_data'];
464 
465  $image_names = Array();
466 
467  if ($this->_running_vars['processed_images'] == FALSE && is_dir($image_dir_name)) {
468  // We have a word external files dir, lets get the images
469  $image_dir = opendir($image_dir_name);
470 
471  while (FALSE !== ($filename = readdir($image_dir))) {
472  switch (get_file_type($filename)) {
473  case 'png':
474  case 'jpg':
475  case 'jpeg':
476  case 'gif':
477  $image_names[] = $image_dir_name.'/'.$filename;
478  default:
479  // Not an image
480  }
481  }
482  $this->_running_vars['image_names'] = $image_names;
483  }
484 
485  if (empty($this->_running_vars['image_names'])) {
486  $step_data['complete'] = TRUE;
487  return TRUE;
488  }
489 
490  if ($this->_running_vars['image_folder_id'] == '0') {
491 
492  $GLOBALS['SQ_SYSTEM']->am->includeAsset('folder');
493 
494  $image_folder = new Folder();
495 
496  $image_folder->setAttrValue('name', 'Images');
497 
498  $root_asset = $GLOBALS['SQ_SYSTEM']->am->getAsset($this->_running_vars['root_assetid']);
499  $folder_import_link = Array('asset' => &$root_asset, 'link_type' => SQ_LINK_TYPE_2);
500 
501  $image_folder->create($folder_import_link);
502  $this->_running_vars['image_folder_id'] = $image_folder->id;
503  } else {
504  $image_folder = $GLOBALS['SQ_SYSTEM']->am->getAsset($this->_running_vars['image_folder_id']);
505  }
506 
507  $filename = $this->_running_vars['image_names'][$this->_running_vars['image_count']];
508 
509  $GLOBALS['SQ_SYSTEM']->am->includeAsset('image');
510 
511  $import_link = Array('asset' => &$image_folder, 'link_type' => SQ_LINK_TYPE_1);
512 
513  $temp_info = Array('name' => basename($filename), 'tmp_name' => $filename, 'non_uploaded_file' => TRUE);
514 
515  $new_file = new Image();
516  $new_file->_tmp['uploading_file'] = TRUE;
517  $new_file->setAttrValue('name', basename($filename));
518 
519  if (!$new_file->create($import_link, $temp_info)) {
520  trigger_error('Failed to import Image: '.$filename, E_USER_WARNING);
521  }
522 
523  $strrpost_image_dir_name = strrpos($image_dir_name, '/') + 1;
524  $find = substr($image_dir_name, $strrpost_image_dir_name);
525  $test_reg = '/[\.\/]?'.$find.'\/'.str_replace('.', '\.', basename($filename)).'/i';
526  $test_reg = str_replace('(', '\(', $test_reg);
527  $test_reg = str_replace(')', '\)', $test_reg);
528  $replace_asset = './?a='.$new_file->id;
529  preg_replace($test_reg, $replace_asset, $this->_running_vars['file_data']);
530  // Replace all instances of this image, with its new asset id.
531  $body_data = preg_replace($test_reg, $replace_asset, $body_data);
532 
533  // Handle vector format. Some images have this type of tags, if they were drawn from Drawing tool in MS Word
534  // Convert them into <img> tags
535  $result = Array();
536  preg_match_all('/<v:imagedata\s+src=["\']{1}([^>]+)["\']{1}>/i', $body_data, $result);
537  if (!empty($result)) {
538  for ($i=0; $i < count($result[1]); $i++) {
539  $str = trim($result[1][$i]);
540  if (!empty($str)) {
541  $body_data = str_replace($result[0][$i], '<img src="'.$result[1][$i].'" />', $body_data);
542  }
543  }
544  }
545 
546  $new_file = NULL;
547 
548  $this->_running_vars['file_data'] = $body_data;
549 
550  $this->_running_vars['image_count']++;
551 
552  $step_data['percent_done'] = round($this->_running_vars['image_count'] / count($this->_running_vars['image_names']) * 100);
553 
554  $step_data['complete'] = $this->_running_vars['image_count'] == count($this->_running_vars['image_names']) ? TRUE : FALSE;
555 
556  return TRUE;
557 
558  }//end processImages()
559 
560 
570  function processScanning(&$step_data, $prefix)
571  {
572  if ($this->_running_vars['fix_anchors']) {
573  // Setup some variables for storing our anchor gear
574  $this->_running_vars['anchor_links'] = Array();
575  $this->_running_vars['anchor_names'] = Array();
576  }
577  if (!$this->_running_vars['use_headings']) {
578  // Lets get our stuff setup so we can get out of here quickly
579  $initial_data = Array();
580  $single_page_name = $this->_running_vars['site_title'];
581  if ($single_page_name == '') {
582  $file_name = basename($this->_running_vars['import_file']);
583  $strrpos_file_name = strrpos($file_name, '.');
584  $file_name = substr($file_name, 0, $strrpos_file_name);
585  $str_replace_file_name = str_replace('_', ' ', $file_name);
586  $single_page_name = ucwords($str_replace_file_name);
587  }
588  $initial_data[] = Array (
589  'name' => $single_page_name,
590  'title' => '',
591  'children' => Array(),
592  'data' => $this->_running_vars['file_data'],
593  );
594  $this->_running_vars['data_structure'] = $initial_data;
595  $step_data['complete'] = TRUE;
596  $step_data['percent_done'] = 100;
597  $this->_running_vars['running_count'] = 0;
598  $this->_running_vars['top_level_count'] = 1;
599  return TRUE;
600  }
601 
602  // First time in here, so we need to setup some vars.
603  $matches=Array();
604  $file_data = $this->_running_vars['file_data'];
605  if ($this->_running_vars['started_scanning'] == FALSE) {
606  $pages = 0;
607 
608  $this->_running_vars['running_count'] = 0;
609  $heading = current($this->_running_vars['headings']);
610  if (!empty($this->_running_vars['headings']) && $heading === FALSE) {
611  reset($this->_running_vars['headings']);
612  $heading = current($this->_running_vars['headings']);
613  }
614  $e = '/(<\s*'.$heading.'[^>]*>.*<\/\s*'.$heading.'>)/i';
615  $matches = preg_split($e, $file_data, -1, PREG_SPLIT_DELIM_CAPTURE);
616 
617  $this->_running_vars['started_scanning'] = TRUE;
618  $this->_running_vars['top_level_count'] = count($matches);
619 
620  $initial_data = Array();
621  $leading_update = FALSE;
622  $counter = 0;
623  if ($this->_running_vars['leading_create'] == TRUE) {
624  // only create this page, if there is sufficient text
625  if ((count($matches) % 2) == 1) {
626  $initial_data[] = Array (
627  'name' => $this->_running_vars['leading_text'],
628  'title' => '',
629  'children' => Array(),
630  'data' => $matches[0],
631  );
632  $leading_update = TRUE;
633  }
634  }
635  if ((count($matches) % 2) == 1) $counter++;
636 
637  for ($i=$counter; $i < count($matches); $i+=2) {
638  $initial_data[] = Array (
639  'name' => $this->_cleanHeading($matches[$i]),
640  'title' => $matches[$i],
641  'children' => Array(),
642  'data' => $matches[$i + 1],
643  );
644  }
645 
646  $this->_running_vars['data_structure'] = $initial_data;
647  $this->_running_vars['top_level_count'] = count($initial_data);
648  }//end if not started scanning
649 
650  $data = $this->_running_vars['data_structure'][$this->_running_vars['running_count']];
651 
652  $this->_running_vars['data_structure'][$this->_running_vars['running_count']]['children'] = $this->scanChildren($this->_running_vars['headings'], $this->_running_vars['data_structure'][$this->_running_vars['running_count']]['data']);
653 
654  $this->_running_vars['data_structure'][$this->_running_vars['running_count']]['data'] = $this->_running_vars['data_structure'][$this->_running_vars['running_count']]['children']['parent_data'];
655  unset($this->_running_vars['data_structure'][$this->_running_vars['running_count']]['children']['parent_data']);
656 
657  $this->_running_vars['running_count']++;
658 
659  if ($this->_running_vars['running_count'] >= $this->_running_vars['top_level_count']) {
660  $step_data['complete'] = TRUE;
661  $this->_running_vars['running_count'] = 0;
662  }
663  $step_data['message'] = 'Scanning Page: '.$data['name'].' and it\'s children';
664  $step_data['percent_done'] = ($this->_running_vars['top_level_count'] == 0) ? 100 : round($this->_running_vars['running_count'] / $this->_running_vars['top_level_count'] * 100);
665  return TRUE;
666 
667  }//end processScanning()
668 
669 
679  function scanChildren($headings, $data)
680  {
681  // Process the next chunk in the array
682  $headings = array_slice($headings, 1);
683  if (empty($headings)) {
684  return Array(
685  'parent_data' => $data,
686  );
687  }
688 
689  $heading = current($headings);
690 
691  $e = '/(<\s*'.$heading.'[^>]*>.*<\/\s*'.$heading.'>)/i';
692  $matches = preg_split($e, $data, -1, PREG_SPLIT_DELIM_CAPTURE);
693 
694  $match_data = Array();
695 
696  $counter = 0;
697  if ((count($matches) % 2) == 1) {
698  $match_data['parent_data'] = $matches[0];
699  $counter++;
700  }
701 
702  for ($i = $counter; $i < count($matches); $i += 2) {
703 
704  $new_match = Array(
705  'name' => $this->_cleanHeading($matches[$i]),
706  'title' => $matches[$i],
707  'data' => $matches[$i + 1],
708  );
709 
710  $new_match['children'] = $this->scanChildren($headings, $matches[$i + 1]);
711  $new_match['data'] = $new_match['children']['parent_data'];
712  unset($new_match['children']['parent_data']);
713  $match_data[] = $new_match;
714  }
715  return $match_data;
716 
717  }//end scanChildren()
718 
719 
729  function processSplitting(&$step_data, $prefix)
730  {
731  $GLOBALS['SQ_SYSTEM']->am->includeAsset('page_standard');
732 
733  $data = $this->_running_vars['data_structure'][$this->_running_vars['running_count']];
734 
735  $empty_var = NULL;
736 
737  $this->splitData($data, $empty_var);
738 
739  $this->_running_vars['running_count']++;
740 
741  $percent_done = $this->_running_vars['top_level_count'] == 0 ? 100 : round($this->_running_vars['running_count'] / $this->_running_vars['top_level_count'] * 100);
742 
743  $step_data['percent_done'] = $percent_done;
744 
745  $step_data['complete'] = $this->_running_vars['running_count'] == $this->_running_vars['top_level_count'];
746  $step_data['message'] = 'Splitting Page: '.$data['name'].' and it\'s children';
747  if ($this->_running_vars['top_level_count'] == 0) {
748  $step_data['complete'] = TRUE;
749  }
750 
751  return TRUE;
752 
753  }//end processSplitting()
754 
755 
765  function splitData($page_data, &$parent_asset)
766  {
767  set_time_limit(0);
768 
769  if (empty($page_data)) return TRUE;
770 
771  // Get the top page
772  if (is_null($parent_asset)) {
773  $parent_asset = $GLOBALS['SQ_SYSTEM']->am->getAsset($this->_running_vars['root_assetid']);
774  }
775 
776  $import_link = Array('asset' => &$parent_asset, 'link_type' => SQ_LINK_TYPE_1);
777 
778  $new_page = new Page_Standard();
779  if (trim($page_data['name']) == '') {
780  $page_data['name'] = 'Preface';
781  }
782 
783  $stripped_tag_name = strip_tags($page_data['name']);
784  $trimmed_tag_name = trim($stripped_tag_name);
785  $new_page->setAttrValue('name', $trimmed_tag_name);
786  $new_page->create($import_link);
787 
788  if ($this->_running_vars['fix_anchors']) {
789  // Lets get any internal links, and store them for later
790  $anchor_record = Array();
791  $anchor_tags = Array();
792  preg_match_all('/<a[^>]*>/i', $page_data['data'], $anchor_tags);
793  foreach ($anchor_tags[0] as $tag) {
794  $name_matches = Array();
795  preg_match('/name="([_a-z\d]+)"/i', $tag, $name_matches);
796 
797  if (!empty($name_matches)) {
798  $this->_running_vars['anchor_names'][$name_matches[1]] = $new_page->id;
799  }
800 
801  $link_matches = Array();
802  preg_match('/href="#([_a-z\d]+)"/i', $tag, $link_matches);
803 
804  if (!empty($link_matches)) {
805  $anchor_record[] = $link_matches[1];
806  }
807 
808  }
809  if (!empty($anchor_record)) {
810  $this->_running_vars['anchor_links'][$new_page->id] = $anchor_record;
811  }
812  }
813 
814  $this->setPageContent($new_page, $page_data['data']);
815 
816  // Process the children
817  foreach ($page_data['children'] as $child_data) {
818  $this->splitData($child_data, $new_page);
819  }
820 
821  return TRUE;
822 
823  }//end splitData()
824 
825 
835  function setPageContent(&$page, $content)
836  {
837  $bc = $page->getBodycopy();
838  $containers = $bc->getContainers();
839  $div = current($containers);
840 
841  $content_types = $GLOBALS['SQ_SYSTEM']->am->getLinks($div->id, SQ_LINK_TYPE_2, 'content_type', FALSE);
842  $ct_link = current($content_types);
843 
844  $ct = $GLOBALS['SQ_SYSTEM']->am->getAsset($ct_link['minorid']);
845  $GLOBALS['SQ_SYSTEM']->setRunLevel(SQ_RUN_LEVEL_FORCED);
846  $ct->setAttrValue('html', $content);
847  $ct->saveAttributes();
848  $GLOBALS['SQ_SYSTEM']->restoreRunLevel();
849 
850  return TRUE;
851 
852  }//end setPageContent()
853 
854 
863  function getPageContent(&$page)
864  {
865  $bc = $page->getBodycopy();
866  $containers = $bc->getContainers();
867  $div = current($containers);
868 
869  $content_types = $GLOBALS['SQ_SYSTEM']->am->getLinks($div->id, SQ_LINK_TYPE_2, 'content_type', FALSE);
870  $ct_link = current($content_types);
871 
872  $ct = $GLOBALS['SQ_SYSTEM']->am->getAsset($ct_link['minorid']);
873  return $ct->attr('html');
874 
875  }//end getPageContent()
876 
877 
887  function processAdditional(&$step_data, $prefix)
888  {
889  // Get a link to our parent, as we'll use it a lot
890  $parent_asset = $GLOBALS['SQ_SYSTEM']->am->getAsset($this->_running_vars['root_assetid']);
891 
892  $import_link = Array('asset' => &$parent_asset, 'link_type' => SQ_LINK_TYPE_2);
893 
894  // CSS Styles
895  if ($this->_running_vars['create_css']) {
896  $styles = $this->_running_vars['styles'];
897  $style_output = implode("\n", $styles);
898  $css_file = fopen($this->_running_vars['import_dir'].'/css_file.css', 'w');
899  fputs($css_file, $style_output);
900  fclose($css_file);
901 
902  // create an asset under the new parent of the correct type
903  $import_path = $this->_running_vars['import_dir'].'/css_file.css';
904  $filename = 'css_file.css';
905 
906  $temp_info = Array('name' => $filename, 'tmp_name' => $import_path, 'non_uploaded_file' => TRUE);
907 
908  $GLOBALS['SQ_SYSTEM']->am->includeAsset('text_file');
909 
910  $new_file = new Text_File();
911  $new_file->_tmp['uploading_file'] = TRUE;
912  $new_file->setAttrValue('name', $filename);
913 
914  if (!$new_file->create($import_link, $temp_info)) {
915  trigger_error('Failed to import CSS File '.$filename, E_USER_WARNING);
916  }
917  }
918 
919  // Change anchors to point to correct assets
920  if ($this->_running_vars['fix_anchors']) {
921  foreach ($this->_running_vars['anchor_links'] as $id => $links) {
922  $page = $GLOBALS['SQ_SYSTEM']->am->getAsset($id);
923  $html = $this->getPageContent($page);
924  foreach ($links as $link) {
925  $new_asset = './?a=';
926  if (isset($this->_running_vars['anchor_names']) && isset($this->_running_vars['anchor_names'][$link])) {
927  $new_asset .= $this->_running_vars['anchor_names'][$link];
928  } else {
929  continue;
930  }
931  $html = str_replace('#'.$link, $new_asset.'#'.$link, $html);
932  }
933  $this->setPageContent($page, $html);
934  }
935 
936  }
937 
938  // Table of Contents
939 
940  if ($this->_running_vars['create_site_map']) {
941  $GLOBALS['SQ_SYSTEM']->am->includeAsset('page_site_map');
942  $site_map = new Page_Site_Map();
943 
944  $import_link['link_type'] = SQ_LINK_TYPE_1;
945  $site_map->setAttrValue('name', 'Table Of Contents');
946  if (!$site_map->create($import_link)) {
947  trigger_error('Failed to Create Site Map', E_USER_WARNING);
948  }
949  $exclude_list = Array();
950  $exclude_list[$site_map->id] = $site_map->id;
951  $site_map->setAttrValue('exclude_list', $exclude_list);
952 
953  }
954 
955  $step_data['complete'] = TRUE;
956  $step_data['percent_done'] = 100;
957  return TRUE;
958 
959  }//end processAdditional()
960 
961 
971  function _removeHtmlComment(&$html)
972  {
973  $open_matches = Array();
974  $close_matches = Array();
975  preg_match_all('/<!--/', $html, $open_matches, PREG_OFFSET_CAPTURE);
976  preg_match_all('/-->/', $html, $close_matches, PREG_OFFSET_CAPTURE);
977 
978  $comments = Array();
979  $prev_close_pos = 0;
980  if (count($open_matches[0]) == count($close_matches[0])) {
981  $style_open_matches = Array();
982  $style_close_matches = Array();
983  preg_match_all('/<style>/', $html, $style_open_matches, PREG_OFFSET_CAPTURE);
984  preg_match_all('/<\/style>/', $html, $style_close_matches, PREG_OFFSET_CAPTURE);
985  $style_pos = Array();
986  if (count($style_open_matches[0]) == count($style_close_matches[0])) {
987 
988  for ($i = 0; $i < count($style_open_matches[0]); $i++) {
989  $style_pos[] = Array('o' => (int) $style_open_matches[0][$i][1], 'c' => (int) $style_close_matches[0][$i][1]);
990  }
991 
992  $verified = TRUE;
993  for ($i = 0; $i < count($open_matches[0]); $i++) {
994  $open_pos = (int) $open_matches[0][$i][1];
995  $close_pos = (int) $close_matches[0][$i][1];
996 
997  if (!$prev_close_pos) {
998  if ($open_pos > $close_pos) $verified = FALSE;
999  } else {
1000  if (($open_pos > $close_pos) || ($prev_close_pos > $open_pos)) {
1001  $verified = FALSE;
1002  }
1003  }
1004  $prev_close_pos = $close_pos;
1005  }
1006  if ($verified) {
1007  $open_matches = array_reverse($open_matches);
1008  $close_matches = array_reverse($close_matches);
1009  for ($i = 0; $i < count($open_matches[0]); $i++) {
1010  $open_pos = (int) $open_matches[0][$i][1];
1011  $close_pos = (int) $close_matches[0][$i][1];
1012 
1013  foreach ($style_pos as $style_tag) {
1014  if (($style_tag['o'] < $open_pos) && ($style_tag['c'] > $close_pos)) {
1015  continue 2;
1016  }
1017  }
1018 
1019  $str = substr($html, $open_pos, ($close_pos + 3 - $open_pos));
1020  $comments[] = $str;
1021  }
1022  }
1023  }//end if
1024  }//end if
1025 
1026  foreach ($comments as $comment) {
1027  if (strpos($html, $comment) !== FALSE) {
1028  $html = str_replace($comment, '', $html);
1029  }
1030  }
1031 
1032  }//end _removeHtmlComment()
1033 
1034 
1043  function _cleanHeading($heading)
1044  {
1045  $heading = strip_tags($heading);
1046  $heading = preg_replace('/(&nbsp;){2,}/', '&nbsp;', $heading);
1047  $heading = str_replace('&nbsp;', ' ', $heading);
1048  $heading = preg_replace('/ {2,}/', ' ', $heading);
1049 
1050  return $heading;
1051 
1052  }//end _cleanHeading()
1053 
1054 
1055 }//end class
1056 
1057 ?>