Squiz Matrix  4.12.2
 All Data Structures Namespaces Functions Variables Pages
rss_feed_reader.inc
1 <?php
30 {
31 
37  public $open_tags = Array();
38 
44  public $parent_tags = Array();
45 
51  public $item = Array();
52 
58  public $image = Array();
59 
65  public $channel = Array();
66 
72  public $textinput = Array();
73 
79  public $attributes = Array();
80 
86  public $unrecognised = Array();
87 
93  public $element = '';
94 
100  public $fp = NULL;
101 
107  public $parser;
108 
114  public $type = '';
115 
121  public $parsed = FALSE;
122 
123 
128  function __construct()
129  {
130  $this->parser = xml_parser_create();
131 
132  xml_set_object($this->parser, $this);
133  xml_set_element_handler($this->parser, 'startElementHandler', 'endElementHandler');
134  xml_set_character_data_handler($this->parser, 'characterHandler');
135 
136  }//end constructor
137 
138 
147  public function setInputFile($file='')
148  {
149  $check = FALSE;
150  if (!empty($file)) {
151  // Make sure it's compatible with new schemes
152  // feed:[/]
153  // feed:http://[/]
154  // feed://[/]
155  // feed://http://[/]
156  $file = preg_replace('/^feed:(\/\/)?(http:\/\/)?/','http://',$file);
157 
158  if (preg_match('%^(http[s]*|ftp)://%', substr($file, 0, 10))) {
159  if (!ini_get('allow_url_fopen')) {
160  trigger_error('Remote Files Cannot Be Parsed', E_USER_WARNING);
161  return FALSE;
162  }
163  require_once(SQ_INCLUDE_PATH.'/general_occasional.inc');
164  $this->fp = open_socket_connection($file, $errno, $errstr);
165  } else {
166  $this->fp = fopen($file, 'r');
167  }
168 
169 
170  if (!$this->fp) {
171  trigger_error('Error occured during opening the file '.$file, E_USER_WARNING);
172  return FALSE;
173  }
174  } else {
175  trigger_error('No File Path Or URL Entered', E_USER_ERROR);
176  return FALSE;
177  }
178  return TRUE;
179 
180  }//end setInputFile()
181 
182 
191  public function setInputString($xml)
192  {
193  if (empty($xml)) {
194  trigger_error('Empty Strings Cannot Be Parsed', E_USER_ERROR);
195  return FALSE;
196  }
197  if (!is_string($xml)) {
198  trigger_error('Only XML Strings Can Be Parsed', E_USER_ERROR);
199  return FALSE;
200  }
201  $this->fp = $xml;
202  return TRUE;
203 
204  }//end setInputString()
205 
206 
215  public function parse()
216  {
217  $fp = $this->fp;
218  if (!$fp) {
219  trigger_error('Invalid file pointer', E_USER_WARNING);
220  return 'Invalid file pointer';
221  } else {
222  $this->parsed = TRUE;
223  if (!is_string($this->fp)) {
224  $found_xml = FALSE;
225  // Read the XML file 4KB at a time
226  while (!feof($fp)) {
227  $data = fread($fp, 4096);
228  // find the XML so we can strip out possible http headers (when proxy is used)
229  if (!$found_xml) {
230  $data = explode('<?xml', $data);
231  if (count($data) > 1) {
232  array_shift($data);
233  $data = '<?xml'.implode('<?xml', $data);
234  $found_xml = TRUE;
235  } else {
236  // try find a valid XML start tag (in this order)
237  $start_tags = Array ('rss', 'rdf', 'feed');
238  foreach ($start_tags as $tag){
239  $test_data = str_ireplace("<$tag", "<$tag", $data[0]);
240  $test_data = explode("<$tag", $test_data);
241  if (count($test_data) > 1) {
242  array_shift($test_data);
243  $data = "<$tag".implode("<$tag", $test_data);
244  $found_xml = TRUE;
245  break;
246  }
247  }
248  }
249  }
250 
251  if ($found_xml && !xml_parse($this->parser, $data)) {
252  return 'Error in the feed';
253  }
254  }
255 
256  // still did not find the XML
257  if (!$found_xml){
258  return 'Error in the feed';
259  }
260 
261  } else {
262  if (!xml_parse($this->parser, $this->fp)) {
263  return 'Error in the feed';
264  }
265  }
266  }
267 
268  return TRUE;
269 
270  }//end parse()
271 
272 
279  public function getRssType()
280  {
281  if ($this->_issetFile()) return $this->type;
282 
283  }//end getRssType()
284 
285 
292  public function getItems()
293  {
294  if ($this->_issetFile()) return $this->item;
295 
296  }//end getItems()
297 
298 
305  public function getTextInput()
306  {
307  if ($this->_issetFile()) return $this->textinput;
308 
309  }//end getTextInput()
310 
311 
318  public function getChannelInfo()
319  {
320  if ($this->_issetFile()) return $this->channel;
321 
322  }//end getChannelInfo()
323 
324 
331  public function getImageInfo()
332  {
333  if ($this->_issetFile()) return $this->image;
334 
335  }//end getImageInfo()
336 
337 
344  public function getUnrecognisedElements()
345  {
346  if ($this->_issetFile()) return $this->unrecognised;
347 
348  }//end getUnrecognisedElements()
349 
350 
351 //-- INTERNAL FUNCTIONS FOR HANDLING TAGS AND DATA --//
352 
353 
354  //-- THIS HANDLES OPENING TAGS --//
355 
356 
367  public function startElementHandler($xp, $name, $attributes)
368  {
369  if (!empty($name)) {
370  // for atom the tags are feed and entry for channel and item respectivel. This has to be renamed as channel and
371  // item so that we can use the proper class variables
372  switch ($name) {
373  case 'ENTRY':
374  $name = 'ITEM';
375  break;
376  case 'FEED':
377  $this->type = 'atom_1.0';
378  $name = 'CHANNEL';
379  break;
380  case 'RSS':
381  $this->type = 'rss_';
382  $this->type .= isset($attributes['VERSION']) ? $attributes['VERSION'] : '';
383  break;
384  case 'RDF':
385  $this->type = 'rss_1.0';
386  break;
387  }
388  $this->open_tags[] = $name;
389  // if there is a tag that is previously opened but not closed till now then the current tag is its child
390  // so push the parent into the parent_tags array
391  $this->parent_tags[] = $name;
392 
393  $this->element = $name;
394  // if teh current tag has any attributes then pass the attributes to the character handler so that it can be pushed
395  // into the respective array as attribute
396  if (!empty($attributes)) {
397  $this->attributes = $attributes;
398  $this->characterHandler($xp, $this->attributes, TRUE);
399  }
400  }//end if
401 
402  }//end startElementHandler()
403 
404 
405  //-- THIS HANDLES CLOSING TAGS --//
406 
407 
417  public function endElementHandler($xp, $name)
418  {
419  switch ($name) {
420  case 'ENTRY':
421  $name = 'ITEM';
422  break;
423  case 'FEED':
424  $name = 'CHANNEL';
425  break;
426  }
427  if ($key = array_search($name, $this->open_tags)) {
428  if ($key == count($this->open_tags)-1) {
429  $element = array_pop($this->open_tags);
430  }
431  // unset($this->open_tags[$key]);
432  $count = count($this->parent_tags);
433 
434  if ($this->element == $this->parent_tags[$count-1]) {
435  $element = array_pop($this->parent_tags);
436  }
437  }
438 
439  }//end endElementHandler()
440 
441 
442  //-- THIS HANDLES DATA --//
443 
444 
455  public function characterHandler($xp, $data, $attributes=FALSE)
456  {
457  $index = NULL;
458  if ((is_array($data) && !empty($data)) || (is_string($data) && trim($data) != '')) {
459  $element = $this->element;
460  $var = NULL;
461  // get the key for the current element in the open tags and get its parents from teh open tags
462  // for e.g. open tags could be array('CHANNEL', 'ITEM', 'LINK')
463  // if the current element is link then in the above example it has to be pplaced in to the ITEM array
464  // as its first set parent which is also a class variable is Item
465  // Hence $var is item
466  // the parent is the if the current tag has any parent that is not var. For example.. if the current tags is name
467  // open tags is Array('CHANNEL', 'AUTHOR', 'NAME'). in this example $parent is author as name is a subtag
468  // and var is CHANNEL
469  // if teh tag is an unrecognised tag then the var is unrecognised
470  $key = array_search($element, $this->open_tags);
471  if($key === FALSE)
472  $key = -1;
473  $class_vars = get_class_vars(get_class($this));
474  for ( ; $key != -1; $key--) {
475  if (isset($class_vars[strtolower($this->open_tags[$key])]) && ($this->open_tags[$key] != 'TYPE')) {
476  break;
477  }
478  }
479  $parent = NULL;
480  if ($key != -1) {
481  $var = strtolower($this->open_tags[$key]);
482  $key = array_search($element, $this->open_tags) - 1;
483  if (isset($this->open_tags[$key])) {
484  if (strcasecmp($this->open_tags[$key], $var) != 0) {
485  $parent = strtolower($this->open_tags[$key]);
486  }
487  }
488  } else {
489  $var = 'unrecognised';
490  $key = array_search($element, $this->open_tags) - 1;
491  if (isset($this->open_tags[$key])) {
492  $parent = strtolower($this->open_tags[$key]);
493  } else {
494  $parent = strtolower($element);
495  }
496  }
497  // if the var is unrecognised then push the element intot he unrecognised array
498  if ($var == 'unrecognised') {
499  // get the index where the data has to be inserted in the unrecognised array
500  // group the child tags together basing upon how many times the parent tags has repeated
501  if (!empty($this->{$var})) {
502  $required = $this->{$var};
503  if (array_search(strtoupper($parent), $this->parent_tags)) {
504  $keys = Array();
505  foreach ($this->parent_tags as $key => $name) {
506  if (strcasecmp($name, $parent) == 0) $keys[] = $key;
507  }
508  $index = count($keys) - 1;
509  } else {
510  $index = 0;
511  }
512  } else {
513  $index = 0;
514  }
515  // if attributes is not true then this is a value else we are dealing with the value of attributes
516  if (!$attributes) {
517  $this->{$var}[$parent][$index][$element]['value'] = $data;
518  } else {
519  $this->{$var}[$parent][$index][$element]['_attributes'] = $data;
520  }
521  } else {
522  // RSS feeds should only have one LINK. But Atom may have multiple, with different REL attributes ('enclosure', 'alternate').
523  // Since multiple links are not part of the design here, ensure we only get 'alternate' links if attributes are provided.
524  // This is essentially a hack, but provides better Atom support than without it.
525  // @see http://www.atomenabled.org/developers/syndication/atom-format-spec.php#element.link
526  // @see http://bugs.matrix.squiz.net/view_bug.php?bug_id=4700
527  if ($element == 'LINK') {
528  if ((is_array ($data)) && (array_key_exists('REL', $data)) && ($data['REL'] != 'alternate')) return;
529  }
530 
531  // if the array exists and it is not empty
532  // then we get the index in which this data has to be inserted
533  if (!empty($this->{$var})) {
534  // if this var is in parents array then check how many times it is in array
535  // depending upon that we could get theo index wherte this element has to be inserted
536  // if it is not in parents tag then this is the first time we are pushing the element in to the repsective array
537  // so the index is 0
538  if (array_search(strtoupper($var), $this->parent_tags)) {
539  $count = 0;
540  foreach ($this->parent_tags as $key => $value) {
541  if (strcasecmp($value, $var) == 0) $count++;
542  }
543 
544  $index = $count - 1;
545 
546  } else {
547  $index = 0;
548  }
549  } else {
550  $index = 0;
551  }
552  // if this element is not in parent tags and if this is not empty then we can use the data
553  if (!empty($element)) {
554  $element = strtolower($element);
555  // if the current element is already in the array then we have to append the current data to the already existing data
556  // this happens when the data has tags in it.
557  if (empty($parent)) {
558  if (isset($this->{$var}[$index][$element]['value']) && (!$attributes)) {
559  $value = $this->{$var}[$index][$element]['value'];
560  $data = $value.$data;
561  }
562  } else {
563  if (isset($this->{$var}[$index][$parent][$element]['value']) && (!$attributes)) {
564  $value = $this->{$var}[$index][$parent][$element]['value'];
565  $data = $value.$data;
566  }
567  }
568  // if it is value that we are dealing with then store it in the array with the 'value' key
569  // otherwise then it is an attribute we are trying to store. So we store it in the right place
570  // with the '_attributes' key
571  if (!$attributes) {
572  if (empty($parent)) {
573  $this->{$var}[$index][$element]['value'] = $data;
574  } else {
575  $this->{$var}[$index][$parent][$element]['value'] = $data;
576  }
577  } else {
578 
579  if ($var != $element) {
580  if (empty($parent)) {
581  $this->{$var}[$index][$element]['_attributes'] = $data;
582  } else {
583  $this->{$var}[$index][$parent][$element]['_attributes'] = $data;
584  }
585  } else {
586  $this->{$var}[$index]['_attributes'] = $data;
587  }
588  }
589  }//end if element not empty
590  }//end else where var is unrecognised or not
591  }//end if data not empty
592 
593  }//end characterHandler()
594 
595 
602  public function _issetFile()
603  {
604  if (!isset($this->fp)) {
605  trigger_error('The XML That Has To Be Parsed Is To Be Set First Using setInputFile or setInputString');
606  return FALSE;
607  }
608  if (!$this->parsed) {
609  trigger_error('The XML That Has To Be Parsed First Using parse() Function');
610  return FALSE;
611  }
612  return TRUE;
613 
614  }//end _issetFile()
615 
616 
617 //-- END OF INTERNAL FUNCTIONS --//
618 
619 
620 }//end class
621 
622 
623 ?>