[ Index ]

PHP Cross Reference of Limb3

title

Body

[close]

/view/lib/XML/ -> HTMLSax3.php (source)

   1  <?php
   2  /* vim: set expandtab tabstop=4 shiftwidth=4: */

   3  //

   4  // +----------------------------------------------------------------------+

   5  // | PHP Version 4                                                        |

   6  // +----------------------------------------------------------------------+

   7  // | Copyright (c) 1997-2002 The PHP Group                                |

   8  // +----------------------------------------------------------------------+

   9  // | This source file is subject to version 2.02 of the PHP license,      |

  10  // | that is bundled with this package in the file LICENSE, and is        |

  11  // | available at through the world-wide-web at                           |

  12  // | http://www.php.net/license/3_0.txt.                                  |

  13  // | If you did not receive a copy of the PHP license and are unable to   |

  14  // | obtain it through the world-wide-web, please send a note to          |

  15  // | license@php.net so we can mail you a copy immediately.               |

  16  // +----------------------------------------------------------------------+

  17  // | Authors: Alexander Zhukov <alex@veresk.ru> Original port from Python |

  18  // | Authors: Harry Fuecks <hfuecks@phppatterns.com> Port to PEAR + more  |

  19  // | Authors: Many @ Sitepointforums Advanced PHP Forums                  |

  20  // +----------------------------------------------------------------------+

  21  //

  22  // $Id: HTMLSax3.php,v 1.1 2004/06/02 14:09:19 hfuecks Exp $

  23  //

  24  /**

  25  * Main parser components

  26  * @package view

  27  * @version $Id: HTMLSax3.php,v 1.1 2004/06/02 14:09:19 hfuecks Exp $

  28  */
  29  /**

  30  * Required classes

  31  */
  32  if (!defined('XML_HTMLSAX3')) {
  33      define('XML_HTMLSAX3', dirname(__FILE__) . '/');
  34  }
  35  require_once (XML_HTMLSAX3 . 'HTMLSax3/States.php');
  36  require_once (XML_HTMLSAX3 . 'HTMLSax3/Decorators.php');
  37  
  38  /**

  39  * Base State Parser

  40  * @package view

  41  * @access protected

  42  * @abstract

  43  */
  44  class XML_HTMLSax3_StateParser {
  45      /**

  46      * Instance of user front end class to be passed to callbacks

  47      * @var XML_HTMLSax3

  48      * @access private

  49      */
  50      var $htmlsax;
  51      /**

  52      * User defined object for handling elements

  53      * @var object

  54      * @access private

  55      */
  56      var $handler_object_element;
  57      /**

  58      * User defined open tag handler method

  59      * @var string

  60      * @access private

  61      */
  62      var $handler_method_opening;
  63      /**

  64      * User defined close tag handler method

  65      * @var string

  66      * @access private

  67      */
  68      var $handler_method_closing;
  69      /**

  70      * User defined object for handling data in elements

  71      * @var object

  72      * @access private

  73      */
  74      var $handler_object_data;
  75      /**

  76      * User defined data handler method

  77      * @var string

  78      * @access private

  79      */
  80      var $handler_method_data;
  81      /**

  82      * User defined object for handling processing instructions

  83      * @var object

  84      * @access private

  85      */
  86      var $handler_object_pi;
  87      /**

  88      * User defined processing instruction handler method

  89      * @var string

  90      * @access private

  91      */
  92      var $handler_method_pi;
  93      /**

  94      * User defined object for handling JSP/ASP tags

  95      * @var object

  96      * @access private

  97      */
  98      var $handler_object_jasp;
  99      /**

 100      * User defined JSP/ASP handler method

 101      * @var string

 102      * @access private

 103      */
 104      var $handler_method_jasp;
 105      /**

 106      * User defined object for handling XML escapes

 107      * @var object

 108      * @access private

 109      */
 110      var $handler_object_escape;
 111      /**

 112      * User defined XML escape handler method

 113      * @var string

 114      * @access private

 115      */
 116      var $handler_method_escape;
 117      /**

 118      * User defined handler object or NullHandler

 119      * @var object

 120      * @access private

 121      */
 122      var $handler_default;
 123      /**

 124      * Parser options determining parsing behavior

 125      * @var array

 126      * @access private

 127      */
 128      var $parser_options = array();
 129      /**

 130      * XML document being parsed

 131      * @var string

 132      * @access private

 133      */
 134      var $rawtext;
 135      /**

 136      * Position in XML document relative to start (0)

 137      * @var int

 138      * @access private

 139      */
 140      var $position;
 141      /**

 142      * Length of the XML document in characters

 143      * @var int

 144      * @access private

 145      */
 146      var $length;
 147      /**

 148      * Array of state objects

 149      * @var array

 150      * @access private

 151      */
 152      var $State = array();
 153  
 154      /**

 155      * Constructs XML_HTMLSax3_StateParser setting up states

 156      * @var XML_HTMLSax3 instance of user front end class

 157      * @access protected

 158      */
 159      function XML_HTMLSax3_StateParser (& $htmlsax) {
 160          $this->htmlsax = & $htmlsax;
 161          $this->State[XML_HTMLSAX3_STATE_START] =& new XML_HTMLSax3_StartingState();
 162  
 163          $this->State[XML_HTMLSAX3_STATE_CLOSING_TAG] =& new XML_HTMLSax3_ClosingTagState();
 164          $this->State[XML_HTMLSAX3_STATE_TAG] =& new XML_HTMLSax3_TagState();
 165          $this->State[XML_HTMLSAX3_STATE_OPENING_TAG] =& new XML_HTMLSax3_OpeningTagState();
 166  
 167          $this->State[XML_HTMLSAX3_STATE_PI] =& new XML_HTMLSax3_PiState();
 168          $this->State[XML_HTMLSAX3_STATE_JASP] =& new XML_HTMLSax3_JaspState();
 169          $this->State[XML_HTMLSAX3_STATE_ESCAPE] =& new XML_HTMLSax3_EscapeState();
 170      }
 171  
 172      /**

 173      * Moves the position back one character

 174      * @access protected

 175      * @return void

 176      */
 177      function unscanCharacter() {
 178          $this->position -= 1;
 179      }
 180  
 181      /**

 182      * Moves the position forward one character

 183      * @access protected

 184      * @return void

 185      */
 186      function ignoreCharacter() {
 187          $this->position += 1;
 188      }
 189  
 190      /**

 191      * Returns the next character from the XML document or void if at end

 192      * @access protected

 193      * @return mixed

 194      */
 195      function scanCharacter() {
 196          if ($this->position < $this->length) {
 197              return $this->rawtext{$this->position++};
 198          }
 199      }
 200  
 201      /**

 202      * Returns a string from the current position to the next occurance

 203      * of the supplied string

 204      * @param string string to search until

 205      * @access protected

 206      * @return string

 207      */
 208      function scanUntilString($string) {
 209          $start = $this->position;
 210          $this->position = strpos($this->rawtext, $string, $start);
 211          if ($this->position === FALSE) {
 212              $this->position = $this->length;
 213          }
 214          return substr($this->rawtext, $start, $this->position - $start);
 215      }
 216  
 217      /**

 218      * Returns a string from the current position until the first instance of

 219      * one of the characters in the supplied string argument

 220      * @param string string to search until

 221      * @access protected

 222      * @return string

 223      * @abstract

 224      */
 225      function scanUntilCharacters($string) {}
 226  
 227      /**

 228      * Moves the position forward past any whitespace characters

 229      * @access protected

 230      * @return void

 231      * @abstract

 232      */
 233      function ignoreWhitespace() {}
 234  
 235      /**

 236      * Begins the parsing operation, setting up any decorators, depending on

 237      * parse options invoking _parse() to execute parsing

 238      * @param string XML document to parse

 239      * @access protected

 240      * @return void

 241      */
 242      function parse($data) {
 243          if ($this->parser_options['XML_OPTION_TRIM_DATA_NODES']==1) {
 244              $decorator =& new XML_HTMLSax3_Trim(
 245                  $this->handler_object_data,
 246                  $this->handler_method_data);
 247              $this->handler_object_data =& $decorator;
 248              $this->handler_method_data = 'trimData';
 249          }
 250          if ($this->parser_options['XML_OPTION_CASE_FOLDING']==1) {
 251              $open_decor =& new XML_HTMLSax3_CaseFolding(
 252                  $this->handler_object_element,
 253                  $this->handler_method_opening,
 254                  $this->handler_method_closing);
 255              $this->handler_object_element =& $open_decor;
 256              $this->handler_method_opening ='foldOpen';
 257              $this->handler_method_closing ='foldClose';
 258          }
 259          if ($this->parser_options['XML_OPTION_LINEFEED_BREAK']==1) {
 260              $decorator =& new XML_HTMLSax3_Linefeed(
 261                  $this->handler_object_data,
 262                  $this->handler_method_data);
 263              $this->handler_object_data =& $decorator;
 264              $this->handler_method_data = 'breakData';
 265          }
 266          if ($this->parser_options['XML_OPTION_TAB_BREAK']==1) {
 267              $decorator =& new XML_HTMLSax3_Tab(
 268                  $this->handler_object_data,
 269                  $this->handler_method_data);
 270              $this->handler_object_data =& $decorator;
 271              $this->handler_method_data = 'breakData';
 272          }
 273          if ($this->parser_options['XML_OPTION_ENTITIES_UNPARSED']==1) {
 274              $decorator =& new XML_HTMLSax3_Entities_Unparsed(
 275                  $this->handler_object_data,
 276                  $this->handler_method_data);
 277              $this->handler_object_data =& $decorator;
 278              $this->handler_method_data = 'breakData';
 279          }
 280          if ($this->parser_options['XML_OPTION_ENTITIES_PARSED']==1) {
 281              $decorator =& new XML_HTMLSax3_Entities_Parsed(
 282                  $this->handler_object_data,
 283                  $this->handler_method_data);
 284              $this->handler_object_data =& $decorator;
 285              $this->handler_method_data = 'breakData';
 286          }
 287          // Note switched on by default

 288          if ($this->parser_options['XML_OPTION_STRIP_ESCAPES']==1) {
 289              $decorator =& new XML_HTMLSax3_Escape_Stripper(
 290                  $this->handler_object_escape,
 291                  $this->handler_method_escape);
 292              $this->handler_object_escape =& $decorator;
 293              $this->handler_method_escape = 'strip';
 294          }
 295          $this->rawtext = $data;
 296          $this->length = strlen($data);
 297          $this->position = 0;
 298          $this->_parse();
 299      }
 300  
 301      /**

 302      * Performs the parsing itself, delegating calls to a specific parser

 303      * state

 304      * @param constant state object to parse with

 305      * @access protected

 306      * @return void

 307      */
 308      function _parse($state = XML_HTMLSAX3_STATE_START) {
 309          do {
 310              $state = $this->State[$state]->parse($this);
 311          } while ($state != XML_HTMLSAX3_STATE_STOP &&
 312                      $this->position < $this->length);
 313      }
 314  }
 315  
 316  /**

 317  * Parser for PHP Versions below 4.3.0. Uses a slower parsing mechanism than

 318  * the equivalent PHP 4.3.0+  subclass of StateParser

 319  * @package view

 320  * @access protected

 321  * @see XML_HTMLSax3_StateParser_Gtet430

 322  */
 323  class XML_HTMLSax3_StateParser_Lt430 extends XML_HTMLSax3_StateParser {
 324      /**

 325      * Constructs XML_HTMLSax3_StateParser_Lt430 defining available

 326      * parser options

 327      * @var XML_HTMLSax3 instance of user front end class

 328      * @access protected

 329      */
 330      function XML_HTMLSax3_StateParser_Lt430(& $htmlsax) {
 331          parent::XML_HTMLSax3_StateParser($htmlsax);
 332          $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0;
 333          $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0;
 334          $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0;
 335          $this->parser_options['XML_OPTION_TAB_BREAK'] = 0;
 336          $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0;
 337          $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0;
 338          $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0;
 339      }
 340  
 341      /**

 342      * Returns a string from the current position until the first instance of

 343      * one of the characters in the supplied string argument

 344      * @param string string to search until

 345      * @access protected

 346      * @return string

 347      */
 348      function scanUntilCharacters($string) {
 349          $startpos = $this->position;
 350          while ($this->position < $this->length && strpos($string, $this->rawtext{$this->position}) === FALSE) {
 351              $this->position++;
 352          }
 353          return substr($this->rawtext, $startpos, $this->position - $startpos);
 354      }
 355  
 356      /**

 357      * Moves the position forward past any whitespace characters

 358      * @access protected

 359      * @return void

 360      */
 361      function ignoreWhitespace() {
 362          while ($this->position < $this->length &&
 363              strpos(" \n\r\t", $this->rawtext{$this->position}) !== FALSE) {
 364              $this->position++;
 365          }
 366      }
 367  
 368      /**

 369      * Begins the parsing operation, setting up the unparsed XML entities

 370      * decorator if necessary then delegating further work to parent

 371      * @param string XML document to parse

 372      * @access protected

 373      * @return void

 374      */
 375      function parse($data) {
 376          parent::parse($data);
 377      }
 378  }
 379  
 380  /**

 381  * Parser for PHP Versions equal to or greater than 4.3.0. Uses a faster

 382  * parsing mechanism than the equivalent PHP < 4.3.0 subclass of StateParser

 383  * @package view

 384  * @access protected

 385  * @see XML_HTMLSax3_StateParser_Lt430

 386  */
 387  class XML_HTMLSax3_StateParser_Gtet430 extends XML_HTMLSax3_StateParser {
 388      /**

 389      * Constructs XML_HTMLSax3_StateParser_Gtet430 defining available

 390      * parser options

 391      * @var XML_HTMLSax3 instance of user front end class

 392      * @access protected

 393      */
 394      function XML_HTMLSax3_StateParser_Gtet430(& $htmlsax) {
 395          parent::XML_HTMLSax3_StateParser($htmlsax);
 396          $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0;
 397          $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0;
 398          $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0;
 399          $this->parser_options['XML_OPTION_TAB_BREAK'] = 0;
 400          $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0;
 401          $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0;
 402          $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0;
 403      }
 404      /**

 405      * Returns a string from the current position until the first instance of

 406      * one of the characters in the supplied string argument.

 407      * @param string string to search until

 408      * @access protected

 409      * @return string

 410      */
 411      function scanUntilCharacters($string) {
 412          $startpos = $this->position;
 413          $length = strcspn($this->rawtext, $string, $startpos);
 414          $this->position += $length;
 415          return substr($this->rawtext, $startpos, $length);
 416      }
 417  
 418      /**

 419      * Moves the position forward past any whitespace characters

 420      * @access protected

 421      * @return void

 422      */
 423      function ignoreWhitespace() {
 424          $this->position += strspn($this->rawtext, " \n\r\t", $this->position);
 425      }
 426  
 427      /**

 428      * Begins the parsing operation, setting up the parsed and unparsed

 429      * XML entity decorators if necessary then delegating further work

 430      * to parent

 431      * @param string XML document to parse

 432      * @access protected

 433      * @return void

 434      */
 435      function parse($data) {
 436          parent::parse($data);
 437      }
 438  }
 439  
 440  /**

 441  * Default NullHandler for methods which were not set by user

 442  * @package view

 443  * @access protected

 444  */
 445  class XML_HTMLSax3_NullHandler {
 446      /**

 447      * Generic handler method which does nothing

 448      * @access protected

 449      * @return void

 450      */
 451      function DoNothing() {
 452      }
 453  }
 454  
 455  /**

 456  * User interface class. All user calls should only be made to this class

 457  * @package view

 458  * @access public

 459  */
 460  class XML_HTMLSax3 {
 461      /**

 462      * Instance of concrete subclass of XML_HTMLSax3_StateParser

 463      * @var XML_HTMLSax3_StateParser

 464      * @access private

 465      */
 466      var $state_parser;
 467  
 468      /**

 469      * Constructs XML_HTMLSax3 selecting concrete StateParser subclass

 470      * depending on PHP version being used as well as setting the default

 471      * NullHandler for all callbacks<br />

 472      * <b>Example:</b>

 473      * <pre>

 474      * $myHandler = & new MyHandler();

 475      * $parser = new XML_HTMLSax3();

 476      * $parser->set_object($myHandler);

 477      * $parser->set_option('XML_OPTION_CASE_FOLDING');

 478      * $parser->set_element_handler('myOpenHandler','myCloseHandler');

 479      * $parser->set_data_handler('myDataHandler');

 480      * $parser->parser($xml);

 481      * </pre>

 482      * @access public

 483      */
 484      function XML_HTMLSax3() {
 485          if (version_compare(phpversion(), '4.3', 'ge')) {
 486              $this->state_parser =& new XML_HTMLSax3_StateParser_Gtet430($this);
 487          } else {
 488              $this->state_parser =& new XML_HTMLSax3_StateParser_Lt430($this);
 489          }
 490          $nullhandler =& new XML_HTMLSax3_NullHandler();
 491          $this->set_object($nullhandler);
 492          $this->set_element_handler('DoNothing', 'DoNothing');
 493          $this->set_data_handler('DoNothing');
 494          $this->set_pi_handler('DoNothing');
 495          $this->set_jasp_handler('DoNothing');
 496          $this->set_escape_handler('DoNothing');
 497      }
 498  
 499      /**

 500      * Sets the user defined handler object. Returns a PEAR Error

 501      * if supplied argument is not an object.

 502      * @param object handler object containing SAX callback methods

 503      * @access public

 504      * @return mixed

 505      */
 506      function set_object(&$object) {
 507          if ( is_object($object) ) {
 508              $this->state_parser->handler_default =& $object;
 509              return true;
 510          } else {
 511              require_once('PEAR.php');
 512              PEAR::raiseError('XML_HTMLSax3::set_object requires '.
 513                  'an object instance');
 514          }
 515      }
 516  
 517      /**

 518      * Sets a parser option. By default all options are switched off.

 519      * Returns a PEAR Error if option is invalid<br />

 520      * <b>Available options:</b>

 521      * <ul>

 522      * <li>XML_OPTION_TRIM_DATA_NODES: trim whitespace off the beginning

 523      * and end of data passed to the data handler</li>

 524      * <li>XML_OPTION_LINEFEED_BREAK: linefeeds result in additional data

 525      * handler calls</li>

 526      * <li>XML_OPTION_TAB_BREAK: tabs result in additional data handler

 527      * calls</li>

 528      * <li>XML_OPTION_ENTITIES_UNPARSED: XML entities are returned as

 529      * seperate data handler calls in unparsed form</li>

 530      * <li>XML_OPTION_ENTITIES_PARSED: (PHP 4.3.0+ only) XML entities are

 531      * returned as seperate data handler calls and are parsed with

 532      * PHP's html_entity_decode() function</li>

 533      * <li>XML_OPTION_STRIP_ESCAPES: strips out the -- -- comment markers

 534      * or CDATA markup inside an XML escape, if found.</li>

 535      * </ul>

 536      * To get HTMLSax to behave in the same way as the native PHP SAX parser,

 537      * using it's default state, you need to switch on XML_OPTION_LINEFEED_BREAK,

 538      * XML_OPTION_ENTITIES_PARSED and XML_OPTION_CASE_FOLDING

 539      * @param string name of parser option

 540      * @param int (optional) 1 to switch on, 0 for off

 541      * @access public

 542      * @return boolean

 543      */
 544      function set_option($name, $value=1) {
 545          if ( array_key_exists($name,$this->state_parser->parser_options) ) {
 546              $this->state_parser->parser_options[$name] = $value;
 547              return true;
 548          } else {
 549              require_once('PEAR.php');
 550              PEAR::raiseError('XML_HTMLSax3::set_option('.$name.') illegal');
 551          }
 552      }
 553  
 554      /**

 555      * Sets the data handler method which deals with the contents of XML

 556      * elements.<br />

 557      * The handler method must accept two arguments, the first being an

 558      * instance of XML_HTMLSax3 and the second being the contents of an

 559      * XML element e.g.

 560      * <pre>

 561      * function myDataHander(& $parser,$data){}

 562      * </pre>

 563      * @param string name of method

 564      * @access public

 565      * @return void

 566      * @see set_object

 567      */
 568      function set_data_handler($data_method) {
 569          $this->state_parser->handler_object_data =& $this->state_parser->handler_default;
 570          $this->state_parser->handler_method_data = $data_method;
 571      }
 572  
 573      /**

 574      * Sets the open and close tag handlers

 575      * <br />The open handler method must accept three arguments; the parser,

 576      * the tag name and an array of attributes e.g.

 577      * <pre>

 578      * function myOpenHander(& $parser,$tagname,$attrs=array()){}

 579      * </pre>

 580      * The close handler method must accept two arguments; the parser and

 581      * the tag name e.g.

 582      * <pre>

 583      * function myCloseHander(& $parser,$tagname){}

 584      * </pre>

 585      * @param string name of open method

 586      * @param string name of close method

 587      * @access public

 588      * @return void

 589      * @see set_object

 590      */
 591      function set_element_handler($opening_method, $closing_method) {
 592          $this->state_parser->handler_object_element =& $this->