Squiz Matrix  4.12.2
 All Data Structures Namespaces Functions Variables Pages
search_manager_plugin_oci.inc
1 <?php
18 require_once SQ_FUDGE_PATH.'/general/text.inc';
19 
20 
33 {
34 
35 
52  function processWordSearch(&$sm, $search_term, $data_source, $base_query, $word_logic='AND')
53  {
54  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('dbsearch');
55 
56  switch ($data_source['type']) {
57 
58  case 'include_all':
59  $contains_str = $this->_buildContainsQuery($sm, 'ai.value', $search_term, $word_logic);
60  if (!$contains_str) {
61  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
62  return Array();
63  }
64  $base_query['where'][] = $contains_str;
65  break;
66 
67  case 'asset_attrib':
68  $attribute = $GLOBALS['SQ_SYSTEM']->am->getAttribute($data_source['params']['attrid']);
69  $base_query['where'][] = 'ai.component = '.MatrixDAL::quote('attr:'.$attribute->name);
70 
71  $contains_str = $this->_buildContainsQuery($sm, 'ai.value', $search_term, $word_logic);
72  if (!$contains_str) {
73  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
74  return Array();
75  }
76  $base_query['where'][] = $contains_str;
77  break;
78 
79  case 'metadata':
80  $metadata_field = $GLOBALS['SQ_SYSTEM']->am->getAsset($data_source['params']['assetid']);
81  $key_types = Array(
82  'metadata_field_hierarchy' => 'selection',
83  'metadata_field_select' => 'selection',
84  'metadata_field_multiple_text' => 'selection',
85  'metadata_field_thesaurus' => 'thesaurus',
86  'metadata_field_wysiwyg' => 'metadata_field_wysiwyg',
87  );
88  $key_type = array_get_index($key_types, get_class_lower($metadata_field), 'text');
89  $base_query['where'][] = 'ai.type = '.MatrixDAL::quote($key_type);
90  $base_query['where'][] = 'ai.component = '.MatrixDAL::quote('metadata:'.(int)$data_source['params']['assetid']);
91 
92  if (get_class_lower($metadata_field) == 'metadata_field_select' && $metadata_field->attr('multiple')) {
93  if (strpos($search_term,' AND ') !== FALSE) {
94  $split = ' AND ';
95  $split_type = '&';
96  } else {
97  $split = ' OR ';
98  $split_type = '|';
99  }
100 
101  $searched_terms = explode($split,$search_term);
102  $sub_contains_qry = '';
103  foreach ($searched_terms as $sterm) {
104  // TODO: This needs to be changed to whatever the
105  // TODO: regular search manager used
106  $sub_contains_qry .= '{[option]'.$sterm.'[/option]}'.$split_type;
107  }
108  $sub_contains_qry = trim($sub_contains_qry,$split_type);
109  $contains_str = ' contains(ai.value, '.MatrixDAL::quote($sub_contains_qry).', 1) > 0';
110  } else {
111 
112  $contains_str = $this->_buildContainsQuery($sm, 'ai.value', $search_term, $word_logic, 1, ($key_type != 'text' && $key_type != 'metadata_field_wysiwyg' && get_class_lower($metadata_field) != 'metadata_field_multiple_text' && ($key_type != 'thesaurus')));
113  }
114  if (!$contains_str) {
115  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
116  return Array();
117  }
118  $base_query['where'][] = $contains_str;
119  break;
120 
121  case 'standard':
122  $field = $data_source['params']['field'];
123  if (isset($sm->standard_text_fields[$field])) {
124  $base_query['where'][] = 'ai.component = '.MatrixDAL::quote('__'.$field.'__');
125 
126  $contains_str = $this->_buildContainsQuery($sm, 'ai.value', $search_term, $word_logic);
127  if (!$contains_str) {
128  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
129  return Array();
130  }
131  $base_query['where'][] = $contains_str;
132  } else {
133  trigger_error('Invalid standard search field '.$field, E_USER_WARNING);
134  }
135  break;
136 
137  }//end switch
138 
139  $bind_vars = array_get_index($base_query, 'bind_vars', Array());
140  $sql = implode_sql($base_query);
141  $query = MatrixDAL::preparePDOQuery($sql);
142  foreach ($bind_vars as $bind_var => $value) {
143  MatrixDAL::bindValueToPdo($query, $bind_var, $value);
144  }//end foreach
145  if (count($base_query['select']) > 2) {
146  $result = MatrixDAL::executePdoAssoc($query);
147  } else {
148  $result = MatrixDAL::executePdoGroupedAssoc($query);
149  // (3.18) only selecting assetid and search score
150  // use old format and don't worry about result format later
151  $result_old_format = Array();
152  foreach ($result as $assetid => $info) {
153  $result_old_format[$assetid] = $info[0]['search_score'];
154  }
155 
156  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
157 
158  return $result_old_format;
159  }
160 
161  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
162 
163  return $result;
164 
165  }//end processWordSearch()
166 
167 
186  function processNumericSearch(&$sm, $numeric_range, $data_source, $base_query)
187  {
188  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('dbsearch');
189 
190  switch ($data_source['type']) {
191 
192  case 'asset_attrib':
193  $attrid = $data_source['params']['attrid'];
194  $attr_info = $GLOBALS['SQ_SYSTEM']->am->getAttributeInfo(Array($attrid));
195  $attr_info = $attr_info[$attrid];
196 
197  $attr_type = $attr_info['type'];
198  $base_query['where'][] = 'ai.component = '.MatrixDAL::quote('attr:'.$attr_info['name']);
199 
200  if ($numeric_range['upper'] == $numeric_range['lower']) {
201  $search_compare = '= '.MatrixDAL::quote($numeric_range['upper']);
202  } else if (is_null($numeric_range['upper'])) {
203  $search_compare = '>= '.MatrixDAL::quote($numeric_range['lower']);
204  } else if (is_null($numeric_range['lower'])) {
205  $search_compare = '<= '.MatrixDAL::quote($numeric_range['upper']);
206  } else if ($numeric_range['upper'] > $numeric_range['lower']) {
207  // upper > lower as expected
208  $search_compare = 'BETWEEN '.MatrixDAL::quote($numeric_range['lower']).' AND '.MatrixDAL::quote($numeric_range['upper']);
209  } else {
210  // accept lower > upper but we need to switch values because
211  // some DB engines won't accept BETWEEN upper AND lower
212  $search_compare = 'BETWEEN '.MatrixDAL::quote($numeric_range['upper']).' AND '.MatrixDAL::quote($numeric_range['lower']);
213  }
214 
215  if ($attr_type == 'int') {
216  $base_query['where'][] = 'CAST(CAST(ai.value AS varchar(255)) AS integer) '.$search_compare;
217  } else {
218  $base_query['where'][] = 'CAST(CAST(ai.value AS varchar(255)) AS double precision) '.$search_compare;
219  }
220  break;
221 
222  case 'metadata':
223  // not implemented yet
224  break;
225 
226  case 'standard':
227  // not implemented yet
228  break;
229 
230  }//end switch $data_source['type']
231  $bind_vars = array_get_index($base_query, 'bind_vars', Array());
232  $sql = implode_sql($base_query);
233  $query = MatrixDAL::preparePDOQuery($sql);
234  foreach ($bind_vars as $bind_var => $value) {
235  MatrixDAL::bindValueToPdo($query, $bind_var, $value);
236  }//end foreach
237  if (count($base_query['select']) > 2) {
238  $result = MatrixDAL::executePdoAssoc($query);
239  } else {
240  $result = MatrixDAL::executePdoGroupedAssoc($query);
241  // (3.18) only selecting assetid and search score
242  // use old format and don't worry about result format later
243  $result_old_format = Array();
244  foreach ($result as $assetid => $info) {
245  $result_old_format[$assetid] = $info[0]['search_score'];
246  }
247 
248  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
249 
250  return $result_old_format;
251  }
252 
253  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
254 
255  return $result;
256 
257  }//end processNumericSearch()
258 
259 
278  function processDateSearch(&$sm, $date_range, $data_source, $base_query)
279  {
280  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('dbsearch');
281 
282  switch ($data_source['type']) {
283 
284  case 'asset_attrib':
285  // TODO: restrict by asset type here?
286  $attribute = $GLOBALS['SQ_SYSTEM']->am->getAttribute($data_source['params']['attrid']);
287  $base_query['where'][] = 'ai.component = '.MatrixDAL::quote('attr:'.$attribute->name);
288  break;
289 
290  case 'metadata':
291  $metadata_field = $GLOBALS['SQ_SYSTEM']->am->getAsset($data_source['params']['assetid']);
292  $key_types = Array(
293  'metadata_field_select' => 'selection',
294  'metadata_field_thesaurus' => 'thesaurus',
295  'metadata_field_date' => 'datetime',
296  );
297  $key_type = array_get_index($key_types, get_class_lower($metadata_field), 'text');
298  $base_query['where'][] = 'ai.type = '.MatrixDAL::quote($key_type);
299  $base_query['where'][] = 'ai.component = '.MatrixDAL::quote('metadata:'.(int)$data_source['params']['assetid']);
300  break;
301 
302  case 'standard':
303  $field = $data_source['params']['field'];
304  if (isset($sm->standard_date_fields[$field])) {
305  $base_query['where'][] = 'ai.component = '.MatrixDAL::quote('__'.$field.'__');
306  } else {
307  trigger_error('Invalid standard date search field '.$field, E_USER_WARNING);
308  }
309  break;
310 
311  }
312  if ($date_range['from'] != '---------- --:--:--') {
313  $base_query['where'][] = $this->_getDateCompareWithClobSql('ai.value', MatrixDAL::quote($date_range['from']), '>=');
314  }
315  if ($date_range['to'] != '---------- --:--:--') {
316  $base_query['where'][] = $this->_getDateCompareWithClobSql('ai.value', MatrixDAL::quote($date_range['to']), '<=');
317  }
318  $bind_vars = array_get_index($base_query, 'bind_vars', Array());
319  $sql = implode_sql($base_query);
320  $query = MatrixDAL::preparePDOQuery($sql);
321  foreach ($bind_vars as $bind_var => $value) {
322  MatrixDAL::bindValueToPdo($query, $bind_var, $value);
323  }//end foreach
324  if (count($base_query['select']) > 2) {
325  $result = MatrixDAL::executePdoAssoc($query);
326  } else {
327  $result = MatrixDAL::executePdoGroupedAssoc($query);
328  // (3.18) only selecting assetid and search score
329  // use old format and don't worry about result format later
330  $result_old_format = Array();
331  foreach ($result as $assetid => $info) {
332  $result_old_format[$assetid] = $info[0]['search_score'];
333  }
334 
335  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
336 
337  return $result_old_format;
338  }
339 
340  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
341 
342  return $result;
343 
344  }//end processDateSearch()
345 
346 
360  function processExcludeQuery(&$sm, $search_term, $base_query)
361  {
362  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('dbsearch');
363 
364  $words = Array();
365 
366  foreach ($search_term as $exclude_value) {
367  $words = array_merge($words, $this->generateWordList($exclude_value));
368  }
369  $words = array_unique($words);
370  foreach($words as $key => $val) {
371  $words[$key] = strtolower($val);
372  }
373 
374  $new_base = $base_query;
375  $contains_str = $this->_buildContainsQuery($sm, 'ai.value', implode(' or ', $words));
376 
377  if ($contains_str) {
378  $new_base['where'][] = $contains_str;
379  $new_base['select'] = Array('ai.assetid');
380  $new_base['group_by'] = Array('ai.assetid');
381  $bind_vars = array_get_index($new_base, 'bind_vars', Array());
382  $sql = implode_sql($new_base);
383  $query = MatrixDAL::preparePDOQuery($sql);
384  foreach ($bind_vars as $bind_var => $value) {
385  MatrixDAL::bindValueToPdo($query, $bind_var, $value);
386  }//end foreach
387  $result = MatrixDAL::executePdoAssoc($query, 0);
388  } else {
389  $result = Array();
390  }
391 
392  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
393 
394  // Result returned above is in following format:
395  // Array(
396  // '0' => ASSETID1,....
397  // )
398  //
399  // However, the search manager expects the result in following format:
400  // Which is same as that returned by posgre
401  // Array(
402  // ASSETID => Array(), ...
403  // )
404  //
405  // Hence, changing the result format to make it readable by search manager.
406  if (!empty($result)) {
407  $new_result_format = Array();
408  foreach($result as $key => $val) {
409  $new_result_format[$val] = Array();
410  }
411  $result = $new_result_format;
412  }
413 
414  return $result;
415 
416  }//end processExcludeQuery()
417 
418 
432  function constructBaseSearchQuery($search_info)
433  {
434  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('dbsearch');
435 
436  $query = Array(
437  'select' => Array(),
438  'from' => Array('sq_ast a'),
439  'where' => Array(),
440  'where_joiner' => 'AND',
441  'order_by' => Array(),
442  );
443 
444  // TREE LOCATIONS
445  if (!empty($search_info['roots'])) {
446  $root_logic = array_get_index($search_info, 'root_logic', 'OR');
447  // get the treeids of our search roots
448  $treeid_sql = 'SELECT l.minorid, t.treeid
449  FROM sq_ast_lnk_tree t
450  JOIN sq_ast_lnk l on t.linkid = l.linkid
451  WHERE l.minorid IN ('.implode(',', $search_info['roots']).')';
452  // getAssoc only gives us the first treeid for each minorid, which is actually just what we want!
453  $root_treeids = MatrixDAL::executeSqlGroupedAssoc($treeid_sql);
454 
455  if ($root_logic == 'AND') {
456  foreach (array_values($search_info['roots']) as $i => $rootid) {
457  $treeid = $root_treeids[$rootid][0]['treeid'];
458  $query['where'][] = ' EXISTS (
459  SELECT 1 FROM
460  sq_ast_lnk l INNER JOIN sq_ast_lnk_tree t ON l.linkid = t.linkid
461  WHERE
462  a.assetid = l.minorid
463  AND substr(t.treeid, 1, '.strlen($treeid).') = '.MatrixDAL::quote($treeid).'
464  )';
465  }
466  } else {
467  $sql = 'EXISTS (
468  SELECT 1 FROM
469  sq_ast_lnk l INNER JOIN sq_ast_lnk_tree t ON l.linkid = t.linkid
470  WHERE
471  a.assetid = l.minorid
472  AND ';
473 
474  $treeids_where = Array();
475  foreach ($root_treeids as $treeid_value) {
476  $treeid = $treeid_value[0]['treeid'];
477  $treeids_where[] = 'substr(t.treeid, 1, '.strlen($treeid).') = '.MatrixDAL::quote($treeid);
478  }
479  $sql .= '('.implode(' OR ', $treeids_where).')';
480  $query['where'][] = $sql.')';
481  }
482  }//end if !empty($search_info['roots']
483 
484  // ACCESS RESTRICTIONS
485  $user_restrictions = (!$GLOBALS['SQ_SYSTEM']->userRoot() && !$GLOBALS['SQ_SYSTEM']->userSystemAdmin());
486  if ($user_restrictions) {
487  $query['from'][] = 'sq_ast_perm ap';
488  $query['where'][] = 'a.assetid = ap.assetid';
489  $userids = array_keys($GLOBALS['SQ_SYSTEM']->am->getParents($GLOBALS['SQ_SYSTEM']->user->id, 'user_group', FALSE));
490  array_push($userids, $GLOBALS['SQ_SYSTEM']->am->getSystemAssetid('public_user'), $GLOBALS['SQ_SYSTEM']->user->id);
491  for (reset($userids); NULL !== ($i = key($userids)); next($userids)) {
492  $userids[$i] = MatrixDAL::quote($userids[$i]);
493  }
494  $query['where'][] = 'ap.userid IN ('.implode(',', $userids).')';
495 
496  }
497 
498  // STATUS RESTRICTIONS
499  if (!empty($search_info['statuses'])) {
500  $statuses = $search_info['statuses'];
501  if (array_sum($statuses) != SQ_SC_STATUS_ALL) {
502  foreach ($statuses as $i => $status) {
503  $statuses[$i] = MatrixDAL::quote($status);
504  }
505  $query['where'][] = 'a.status IN ('.implode(', ', $statuses).')';
506  }
507  } else {
508  // if there are no status restrictions, default to LIVE assets only
509  $query['where'][] = 'a.status >= '.MatrixDAL::quote(SQ_STATUS_LIVE);
510  }
511 
512  // ASSET TYPE RESTRICTIONS
513  if (!empty($search_info['asset_types'])) {
514  $inherited_types = Array();
515  $normal_types = Array();
516  for (reset($search_info['asset_types']); NULL !== ($i = key($search_info['asset_types'])); next($search_info['asset_types'])) {
517  if ($search_info['asset_types'][$i] == 1) {
518  $inherited_types[] = MatrixDAL::quote($i);
519  } else {
520  $normal_types[] = MatrixDAL::quote($i);
521  }
522  }
523 
524  $type_code_cond = Array();
525  // if we have inherited types and/or normal types
526  if (!empty($inherited_types)) {
527  $type_code_cond[] = 'inhd_type_code IN ('.implode(', ', $inherited_types).')';
528  if (!empty($normal_types)) {
529  $type_code_cond[] = 'type_code IN ('.implode(', ', $normal_types).')';
530  }
531  $type_code_cond = implode(' OR ', $type_code_cond);
532  $query['where'][] = 'a.type_code IN (
533  SELECT type_code
534  FROM sq_ast_typ_inhd
535  WHERE '.$type_code_cond.'
536  )';
537  } else {
538  // if we only got normal type instead we are not using the subquery
539  $query['where'][] = 'a.type_code IN ('.implode(', ', $normal_types).')';
540  }
541  }
542 
543  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
544 
545  return $query;
546 
547  }//end constructBaseSearchQuery()
548 
549 
565  function extractKeywords(&$asset, $include_metadata=FALSE, $include_scores=FALSE)
566  {
567  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('dbsearch');
568  $sm = $GLOBALS['SQ_SYSTEM']->am->getSystemAsset('search_manager');
569 
570  // The SUBSTR stuff is to turn the CLOB field of value into a VARCHAR
571  // that can be handled by the LOWER() function
572  $sql = 'SELECT
573  value, type, score
574  FROM
575  sq_sch_idx
576  WHERE
577  assetid = '.MatrixDAL::quote($asset->id).
578  ($include_metadata ? '' : ' AND component NOT LIKE '.MatrixDAL::quote('metadata:%'));
579 
580  $result = MatrixDAL::executeSqlAssoc($sql);
581 
582  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
583 
584  // TODO: change it so the words are split up
585  $word_scores = Array();
586 
587  foreach ($result as $result_item) {
588  $word_string = strtolower($result_item['value']);
589  if ($result_item['type'] == 'text') {
590  $words = preg_split('|\s+|', $word_string);
591  } else {
592  $words = Array($word_string);
593  }
594  $words = array_count_values($words);
595  foreach ($words as $word => $score) {
596  $word = trim($word);
597  if ($sm->isWordIndexable($word)) {
598  if (!isset($word_scores[$word])) {
599  $word_scores[$word] = 0;
600  }
601  $word_scores[$word] += $score * $result_item['score'];
602  }
603  }
604  }
605 
606  arsort($word_scores);
607 
608  if ($include_scores) {
609  return $word_scores; // return result with scores
610  } else {
611  return array_keys($word_scores); // just return the keywords
612  }
613 
614  }//end extractKeywords()
615 
616 
629  function generateWordList($words)
630  {
631  if (is_array($words)) {
632  return $words;
633  } else if (is_string($words)) {
634  //convert wildcard symbols to corresponding SQL wildcards
635  $words = strtr($words, '*?', '%_');
636 
637  //invoke remove_silent_chars in text.inc
638  $words = remove_silent_chars($words, TRUE, TRUE);
639 
640  return Array($words);
641  }
642 
643  }//end generateWordList()
644 
645 
664  function splitIndexableContent($contents, $type_code, $data_type, $component, $weighting, $contextid, $use_default)
665  {
666  $index_content = Array();
667 
668  //only use mb_strtolower() function if it is supported
669  if (function_exists('mb_strtolower')) {
670  //use mbstring function because the input contents can be different from ASCII characters
671  $contents = mb_strtolower($contents, SQ_CONF_DEFAULT_CHARACTER_SET);
672  } else {
673  //lowercase the string which has non-ASCII characters by encoding them to HTML entities
674  $contents = htmlentities($contents, ENT_NOQUOTES, SQ_CONF_DEFAULT_CHARACTER_SET);
675  $contents = strtolower($contents);
676  $contents = html_entity_decode($contents, ENT_NOQUOTES, SQ_CONF_DEFAULT_CHARACTER_SET);
677  }
678  $contents = str_replace('&nbsp;', ' ', $contents);
679  $contents = trim($contents);
680 
681  if (($data_type == 'text') || ($data_type == 'wysiwyg')) {
682  //the wysiwyg tool converts special characters to html entities in the HTML editor so they need to be converted back
683  $contents = html_entity_decode($contents, ENT_QUOTES, SQ_CONF_DEFAULT_CHARACTER_SET);
684  }
685 
686  $index_content[] = Array(
687  'value' => $contents,
688  'type_code' => $type_code,
689  'type' => $data_type,
690  'component' => $component,
691  'score' => $weighting,
692  'contextid' => $contextid,
693  'use_default' => $use_default,
694  );
695 
696  return $index_content;
697 
698  }//end splitIndexableContent()
699 
700 
713  {
714  $components = explode('; ', $value);
715  $value = '';
716  foreach ($components as $comp) {
717  $comp = trim($comp);
718  $value .= '[option]'.$comp.'[/option]';
719  }
720  return $value;
721 
722  }//end handleMultipleMetadataSelect()
723 
724 
725  //-- ORACLE TEXT EXTENSION FUNCTIONS --//
726 
727 
739  function _parseBrackets(&$terms)
740  {
741  if ($terms == '') return FALSE;
742  $reparse = FALSE;
743  $prev_terms = $terms;
744 
745  $open_brackets = 0;
746  $close_brackets = 0;
747  $last_close = -1;
748  $last_open = -1;
749 
750  for ($i = 0; $i < strlen($terms); $i++) {
751  if ($terms{$i} == ')') {
752  $last_close = $i;
753  if (($last_open != -1 && $last_close != -1) && $last_open < $last_close) {
754  if ($open_brackets == 0) {
755  $close_brackets++;
756  $last_open = -1;
757  } else {
758  $open_brackets--;
759  }
760  } else {
761  $close_brackets++;
762  }
763  } else if ($terms{$i} == '(') {
764  $open_brackets++;
765  $last_open = $i;
766  }
767  }
768 
769  if ($close_brackets != $open_brackets) {
770  if ($close_brackets > 0) {
771  $terms = str_repeat('(', $close_brackets).$terms;
772  $reparse = TRUE;
773  }
774  if ($open_brackets > 0) {
775  $terms = $terms.str_repeat(')', $open_brackets);
776  $reparse = TRUE;
777  }
778  } else if ($close_brackets > 0) {
779  $terms = '('.$terms.')';
780  }
781 
782  // remove any empty brackets
783  while (substr_count($terms, '()') != 0) {
784  $terms = str_replace('()', '', $terms);
785  }
786 
787  return ($prev_terms != $terms || $reparse);
788 
789  }//end _parseBrackets()
790 
791 
803  function _parseOperators(&$terms)
804  {
805  $prev_terms = $terms;
806  $operators = Array('|', '&', '~');
807 
808  // replace all occurances of 2 operators next to eachother with
809  // the second operator. We need the while loop because if there exists
810  // more than 2 operators next to eachother, we will reduce them by half
811  // and the next foreach iteration may have passed the new set of operators
812  // next to eachother
813 
814  $pattern_found = TRUE;
815  while ($pattern_found) {
816  $pattern_found = FALSE;
817  foreach ($operators as $op1) {
818  foreach ($operators as $op2) {
819  $prev_terms = $terms;
820  $terms = preg_replace('/(\\'.$op1.')(\\'.$op2.')/', '\\2', $terms);
821  if ($prev_terms != $terms) $pattern_found = TRUE;
822  }
823  }
824  }
825 
826  // the % indicates where operators are not permitted
827  $bad_op_patterns = Array(
828  '/^%(.+)/',
829  '/(.+)%$/',
830  '/(\()%(.+)/',
831  '/(.+)%(\))/',
832  '/^%$/',
833  );
834 
835  // strip out any operators that are in the wrong place
836  foreach ($operators as $op) {
837  foreach ($bad_op_patterns as $patt) {
838  $patt = str_replace('%', '\\'.$op, $patt);
839  $terms = preg_replace($patt, '\1\2\3', $terms);
840  if ($prev_terms != $terms) $pattern_found = TRUE;
841  }
842  }
843 
844  return ($prev_terms != $terms);
845 
846  }//end _parseOperators()
847 
848 
861  function _parseTerms(Search_Manager $sm, &$terms, $word_logic)
862  {
863  $prev_terms = $terms;
864  // 1 replace all space seperated words and phrases so that they are 'and'ed together
865  // so that oracle does not perform an exact phrase match on them
866  // 2 make sure that there are operators between brackets, if not add the & operator
867  // 3 all words next to brackets need an operator between them
868  // 4 all phrases next to brackets need an operator between them
869  // 5 all phrases and words need an operator between them
870 
871  $op = ($word_logic == 'OR') ? '|' : '&';
872 
873  $replacements = Array(
874  '/(#)+(#)+/U' => '\1'.$op.'\2',
875  '/(@)+(@)+/U' => '\1'.$op.'\2',
876  '/(\)+)([^\&\|\~\@\#])*(\()/U' => '\1'.$op.'\3',
877  '/(\))(#)/' => '\1'.$op.'\2',
878  '/(#)(\()/' => '\1'.$op.'\2',
879  '/(\))(@)/' => '\1'.$op.'\2',
880  '/(@)(\()/' => '\1'.$op.'\2',
881  '/(#)(@)/' => '\1'.$op.'\2',
882  '/(@)(#)/' => '\1'.$op.'\2',
883  );
884  $terms = preg_replace(array_keys($replacements), $replacements, $terms);
885 
886  return ($terms != $prev_terms);
887 
888  }//end _parseTerms()
889 
890 
901  function _replaceOperators($terms, $clear=FALSE)
902  {
903  $ops = Array(
904  'and' => '&',
905  'not' => '~',
906  'or' => '|',
907  );
908 
909  $sym = ($clear) ? ' ' : '!';
910 
911  $match_chars = '[\r\n\t ]';
912 
913  $pats = Array(
914  '/('.$match_chars.'+)%('.$match_chars.'+)/' => '\1'.$sym.'\2',
915  '/('.$match_chars.'+)%('.$match_chars.'+)/U' => '\1'.$sym.'\2',
916  '/^%('.$match_chars.'+)/U' => $sym.'\1',
917  '/('.$match_chars.'+)%$/U' => '\1'.$sym.'\2',
918  '/^%$/' => $sym,
919  );
920 
921  foreach ($ops as $op => $sym) {
922  foreach ($pats as $pat => $rep) {
923  $pat = str_replace('%', $op, $pat);
924  $rep = str_replace('!', $sym, $rep);
925  $terms = preg_replace($pat, $rep, $terms);
926  }
927  }
928 
929  return $terms;
930 
931  }//end _replaceOperators()
932 
933 
948  function _getWords(&$terms, $remove_stop_words=FALSE, $return_stop_words=FALSE, $replace=FALSE, $ignore_chars=Array())
949  {
950  $preg_ignore_chars = Array();
951  foreach ($ignore_chars as $char) {
952  $preg_ignore_chars[] = preg_quote($char, '/');
953  }
954 
955  // We need to not only match (or NOT match, rather) whitespace as word
956  // breaks, but also the operators themselves. When we replace with
957  // hashes we need to match the hash so it doesn't match with a previous
958  // one
959  $match_chars = '[^\r\n\t \(\)\|\&\~\%]';
960  $replace_chars = '[^\r\n\t \(\)\|\&\~\%#'.implode('', $preg_ignore_chars).']';
961 
962  // replace all words with #
963  if (preg_match_all('/('.$match_chars.'+)/', $terms, $words)) {
964  // remove any # symbols as we are going to replace words
965  // with that symbol
966  if ($replace) {
967  $terms = preg_replace('/#/', '', $terms);
968  }
969 
970  $good_words = Array();
971  $sm = $GLOBALS['SQ_SYSTEM']->am->getSystemAsset('search_manager');
972 
973  foreach ($words[1] as $word) {
974  if (empty($ignore_chars) || !in_array($word, $ignore_chars)) {
975  // If the word is indexable, then replace it with a # symbol
976  // (if we are replacing) and add it to the list of
977  // replacement words we'll put back in later
978  if ($sm->isWordIndexable($word)) {
979  if ($replace) {
980  $terms = preg_replace('/'.$replace_chars.'+/', '#', $terms, 1);
981  }
982 
983  $good_words[] = $word;
984 
985  } else {
986  // We're removing the word (if we are replacing - ie.
987  // we're replacing it with nothing) if it is not
988  // something that we can search on
989  if ($replace) {
990  $terms = preg_replace('/'.$replace_chars.'+/', '', $terms, 1);
991  }
992 
993  }
994  }
995  }
996  return $good_words;
997  }
998  return Array();
999 
1000  }//end _getWords()
1001 
1002 
1022  function _buildContainsQuery(Search_Manager $sm, $column, $terms, $word_logic='AND', $score=1, $exact=FALSE)
1023  {
1024  $phrase_placeholder = '@';
1025  $preg_phrase_placeholder = preg_quote($phrase_placeholder, '/');
1026 
1027  $exact_terms = $terms;
1028  $terms = strtolower($terms);
1029  // we can only process a string 255 chars or less with CONTAINS
1030  // anything else, we have to use LIKE
1031  $use_contains = (strlen($terms) <= 252);
1032 
1033  // remove all empty phrases
1034  $terms = str_replace('""', '', $terms);
1035 
1036  $good_phrases = Array();
1037  // get the phrase matching terms and replace them so that
1038  // we dont override them
1039  if (preg_match_all('/"([^"]+)"/', $terms, $phrases)) {
1040  $terms = preg_replace('/'.$preg_phrase_placeholder.'/', '', $terms);
1041 
1042  foreach ($phrases[1] as $phrase) {
1043  // verify that this phrase does not have any illegal characters.
1044  $phrase = preg_replace('/[^a-zA-Z0-9\s\.]/', ' ', $phrase);
1045  // if after bad character removal, the phrase is empty, remove it
1046  // from the phrase list and replace it in the terms string with a space
1047  // so that anything around it is seperated
1048 
1049  $phrase = trim($phrase);
1050 
1051  if ($phrase == '') {
1052  $replacement = ' ';
1053  } else {
1054  $good_phrases[] = $phrase;
1055  $replacement = $phrase_placeholder;
1056  }
1057  $terms = preg_replace('/"[^"]+"/', $replacement, $terms, 1);
1058  }
1059  }// end if match phrases
1060 
1061  $terms = $this->_replaceOperators($terms);
1062  $terms = preg_replace('/[^a-zA-Z0-9_\&\|\~'.$preg_phrase_placeholder.'\(\)\s]/', '&', $terms);
1063 
1064  // get the words from the terms string and replace them
1065  // with #, and remove non-indexable words
1066  // (make an exception for @, as it's our search phrase placeholder)
1067  $words = $this->_getWords($terms, TRUE, FALSE, TRUE, Array($phrase_placeholder));
1068 
1069  // remove spaces
1070  $terms = preg_replace('/\s+/', '', $terms);
1071 
1072  $reparse = TRUE;
1073  while ($reparse) {
1074  $reparse = FALSE;
1075  $reparse |= $this->_parseBrackets($terms);
1076  $reparse |= $this->_parseOperators($terms);
1077  $reparse |= $this->_parseTerms($sm, $terms, $word_logic);
1078  }
1079 
1080  // re-replace the terms
1081  foreach ($words as $word) {
1082  if ($exact) {
1083  $word = '{'.$word.'}';
1084  } else {
1085  // If using '%' searching, have to escape wildcards with '{ }'
1086  // (we think)
1087  $word = str_replace('_', '{_}', $word);
1088  $word = str_replace('%', '{%}', $word);
1089  }
1090 
1091  $word = $this->_escapeContainReservedChars($word);
1092  $word = $this->_escapeContainReservedWords($word);
1093 
1094  if (!$exact) {
1095  // Naming of this attribute is not good. If TRUE, full searching
1096  // is actually enabled!
1097  if ($sm->attr('enable_begins_with')) {
1098  $word = '%'.$word.'%';
1099  } else {
1100  $word = $word.'%';
1101  }
1102  }
1103 
1104  $terms = preg_replace('/#/', " $word ", $terms, 1);
1105  }
1106 
1107  // some operators might have slipped in if words
1108  // were broken apart and now have joined to create
1109  // a valid operator, so remove them.
1110  // eg an\d could become and after bad character removal
1111  $terms = $this->_replaceOperators($terms, TRUE);
1112 
1113  // re-replace the phrase matching terms
1114  foreach ($good_phrases as $phrase) {
1115  $terms = preg_replace('/'.$preg_phrase_placeholder.'/', '{'.$phrase.'}', $terms, 1);
1116  }
1117 
1118  // if there is nothing to search on
1119  if (!preg_match('/[^\r\n\t ]+/', str_replace('_', '', $terms))) {
1120  return FALSE;
1121  }
1122  if (trim($terms) == '') return FALSE;
1123 
1124  $multiple = FALSE;
1125  if (strpos($terms, '|') !== FALSE) $multiple = TRUE;
1126 
1127  if ($use_contains) {
1128  $sql = 'contains('.$column.', '.MatrixDAL::quote(trim($terms)).', '.$score.') > 0';
1129  } else {
1130  $sql = MatrixDAL::quote(trim($terms)).' like '.MatrixDAL::quote('%').' || '.$column.' || '.MatrixDAL::quote('%');
1131  }
1132 
1133  if ($exact && !$multiple) {
1134  $sql .= ' AND length(ai.value) = '.strlen($exact_terms);
1135  }
1136 
1137  return $sql;
1138 
1139  }//end _buildContainsQuery()
1140 
1141 
1152  function _getDateCompareWithClobSql($date1, $date2, $operator='=')
1153  {
1154  return 'DBMS_LOB.COMPARE('.$date1.', '.$date2.') '.$operator.' 0';
1155 
1156  }//end _getDateCompareWithClobSql()
1157 
1158 
1168  function getAssetidsByWordIntersection($source_id, $type=NULL)
1169  {
1170  // load the asset from source_id
1171  // get all the keywords from this asset
1172  // get all assets (of the supplied type) that contain any of the keywords in the source asset
1173  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('dbsearch');
1174 
1175  $asset = $GLOBALS['SQ_SYSTEM']->am->getAsset($source_id);
1176  $keywords = $this->extractKeywords($asset);
1177 
1178  $query_array['select'][] = 'ai.assetid';
1179  $query_array['from'][] = 'sq_sch_idx ai';
1180 
1181  $query_array['where_joiner'] = 'AND';
1182  if ($type) {
1183  $query_array['where'][] = 'ai.type_code = '.MatrixDAL::quote($type);
1184  }
1185 
1186  $terms = implode(' or ', $keywords);
1187 
1188  // get the words from the terms string and remove non-indexable words
1189  $words = $this->_getWords($terms, TRUE, FALSE);
1190 
1191  $words = array_unique($words);
1192  $terms = implode('|', $words);
1193 
1194  $terms_len = strlen($terms);
1195 
1196  $found_ids = Array();
1197 
1198  while ($words) {
1199  $term_string_built = FALSE;
1200  $terms ='';
1201  while (!$term_string_built && !empty($words)) {
1202  $recent_term = array_pop($words);
1203  if (strlen($terms.$recent_term) < 200) {
1204  $escaped_term = $this->_escapeContainReservedChars($recent_term);
1205  $escaped_term = $this->_escapeContainReservedWords($escaped_term);
1206  $terms .= empty($terms) ? $escaped_term : '|'.$escaped_term;
1207  } else {
1208  array_push($words, $recent_term);
1209  $term_string_built = TRUE;
1210  }
1211  }
1212 
1213  $contains_str = 'contains(value, '.MatrixDAL::quote(trim($terms)).', 1) > 0';
1214 
1215  $paginated_query_array = $query_array;
1216  $paginated_query_array['where'][] = $contains_str;
1217  if (!empty($found_ids)) {
1218  $pag_query_array['where'][] = 'ai.assetid not in (\''.implode($found_ids,'\', \'').'\')';
1219  }
1220 
1221  $sql = implode_sql($paginated_query_array);
1222  $result = MatrixDAL::executeSqlAssoc($sql, 0);
1223  $found_ids = array_merge($found_ids, array_unique($result));
1224  }
1225 
1226  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
1227 
1228  return $found_ids;
1229 
1230  }//end getAssetidsByWordIntersection()
1231 
1232 
1239  function getMaxWordLength()
1240  {
1241  return NULL;
1242 
1243  }//end getMaxWordLength()
1244 
1245 
1256  function _escapeContainReservedWords($word)
1257  {
1258  $reserved_words = Array(
1259  'about' => '',
1260  'accum' => '',
1261  'and' => '',
1262  'bt' => '',
1263  'btg' => '',
1264  'bti' => '',
1265  'btp' => '',
1266  'fuzzy' => '',
1267  'haspath' => '',
1268  'inpath' => '',
1269  'minus' => '',
1270  'near' => '',
1271  'not' => '',
1272  'nt' => '',
1273  'ntg' => '',
1274  'nti' => '',
1275  'ntp' => '',
1276  'or' => '',
1277  'pt' => '',
1278  'rt' => '',
1279  'sqe' => '',
1280  'syn' => '',
1281  'tr' => '',
1282  'trsyn' => '',
1283  'tt' => '',
1284  'within' => '',
1285  );
1286  if (isset($reserved_words[$word])) {
1287  return '{'.$word.'}';
1288  } else {
1289  return $word;
1290  }
1291 
1292  }//end _escapeContainReservedWords()
1293 
1294 
1303  function _escapeContainReservedChars($word)
1304  {
1305  $reserved_chars = Array('\\',',','?','&','=','-',';','~','|','$','!','>','*','%','_','(',')','[',']','{','}');
1306  $escaped_chars = Array('\\\\','\,','\?','\&','\=','\-','\;','\~','\|','\$','\!','\>','\*','\%','\_','\(','\)','\[','\]','\{','\}');
1307 
1308  $escaped_word = str_replace($reserved_chars, $escaped_chars, $word);
1309 
1310  return $escaped_word;
1311 
1312 
1313  }//end _escapeContainReservedChars()
1314 
1315 
1325  function getWords($search_string)
1326  {
1327  return $this->_getWords($search_string);
1328 
1329  }//end getWords()
1330 
1331 
1341  function getSearchListAssets(&$search_list, &$search_page)
1342  {
1343  // get information about the search field
1344  $search_page_fields = $search_page->attr('fields');
1345  $search_field_name = $search_list->attr('search_field');
1346  if (empty($search_page_fields) || empty($search_field_name)) {
1347  // if search field has not been setup properly
1348  return Array();
1349  }
1350  $search_field = $search_page_fields[$search_field_name];
1351 
1352  $GLOBALS['SQ_SYSTEM']->changeDatabaseConnection('dbsearch');
1353  $sm = $GLOBALS['SQ_SYSTEM']->am->getSystemAsset('search_manager');
1354  $search_info = $search_page->populateBaseSearchInfo();
1355  $query_comps = $sm->constructBaseSearchQuery($search_info);
1356 
1357  $query_comps['select'][] = 'ai.assetid';
1358 
1359  $data_source_comps = Array();
1360  $data_source_comps_values = Array();
1361 
1362  foreach ($search_field['data_sources'] as $data_source) {
1363  switch ($data_source['type']) {
1364  case 'asset_attrib' :
1365  $type_code = $data_source['params']['asset_type'];
1366  $attrid = $data_source['params']['attrid'];
1367 
1368  $attr_info = $GLOBALS['SQ_SYSTEM']->am->getAttributeInfo(Array($attrid));
1369  $name = $attr_info[$attrid]['name'];
1370  $data_source_comps[] = 'ai.component = '.MatrixDAL::quote('attr:'.$name);
1371  $data_source_comps_values[] = MatrixDAL::quote('attr:'.$name);
1372  break;
1373  case 'metadata' :
1374  $assetid = $data_source['params']['assetid'];
1375  $data_source_comps[] = 'ai.component = '.MatrixDAL::quote('metadata:'.$assetid);
1376  $data_source_comps_values[] = MatrixDAL::quote('metadata:'.$assetid);
1377  break;
1378  case 'standard' :
1379  $param_field = $data_source['params']['field'];
1380  $data_source_comps[] = 'ai.component = '.MatrixDAL::quote('__'.$param_field.'__');
1381  $data_source_comps_values[] = MatrixDAL::quote('__'.$param_field.'__');
1382  break;
1383 
1384  default :
1385  return Array();
1386  break;
1387  }
1388  }
1389 
1390  if (!empty($data_source_comps)) {
1391  $query_comps['where'][] = '('.implode(') OR (', $data_source_comps).')';
1392  }
1393 
1394  $noise_words = array_keys($sm->attr('noise_word_list'));
1395  $white_words = array_keys($sm->attr('white_word_list'));
1396  $where_cond = '('.implode(') AND (',$query_comps['where']).')';
1397 
1398  $min_length = $sm->attr('min_word_length');
1399  $noise_words_str = '"'.implode('", "', $noise_words).'"';
1400  $white_words_str = '"'.implode('", "', $white_words).'"';
1401  $sql = 'SELECT * FROM table(
1402  sq_search_pkg.get_search_list(\''.
1403  str_replace('\'', '"', $where_cond).'\', \''.
1404  $min_length.'\', \''.
1405  $noise_words_str.'\', \''.
1406  $white_words_str.'\'))';
1407 
1408  $results = MatrixDAL::executeSqlAssoc($sql);
1409  $children = Array();
1410  if (!empty($results)) {
1411  foreach ($results as $ele) {
1412  $children[$ele['word']] = Array(
1413  'num_values' => $ele['hits'],
1414  'first_letter' => $ele['word']{0},
1415  'type_code' => '',
1416  );
1417  }
1418  }
1419 
1420  $GLOBALS['SQ_SYSTEM']->restoreDatabaseConnection();
1421 
1422  return $children;
1423 
1424  }//end getSearchListAssets()
1425 
1426 
1427 }//end class