| [ Index ] |
PHP Cross Reference of Limb3 |
[Summary view] [Print] [Text view]
1 <?php 2 /* vim: set expandtab tabstop=4 shiftwidth=4: */ 3 // 4 // +----------------------------------------------------------------------+ 5 // | PHP Version 4 | 6 // +----------------------------------------------------------------------+ 7 // | Copyright (c) 1997-2002 The PHP Group | 8 // +----------------------------------------------------------------------+ 9 // | This source file is subject to version 2.02 of the PHP license, | 10 // | that is bundled with this package in the file LICENSE, and is | 11 // | available at through the world-wide-web at | 12 // | http://www.php.net/license/3_0.txt. | 13 // | If you did not receive a copy of the PHP license and are unable to | 14 // | obtain it through the world-wide-web, please send a note to | 15 // | license@php.net so we can mail you a copy immediately. | 16 // +----------------------------------------------------------------------+ 17 // | Authors: Alexander Zhukov <alex@veresk.ru> Original port from Python | 18 // | Authors: Harry Fuecks <hfuecks@phppatterns.com> Port to PEAR + more | 19 // | Authors: Many @ Sitepointforums Advanced PHP Forums | 20 // +----------------------------------------------------------------------+ 21 // 22 // $Id: HTMLSax3.php,v 1.1 2004/06/02 14:09:19 hfuecks Exp $ 23 // 24 /** 25 * Main parser components 26 * @package view 27 * @version $Id: HTMLSax3.php,v 1.1 2004/06/02 14:09:19 hfuecks Exp $ 28 */ 29 /** 30 * Required classes 31 */ 32 if (!defined('XML_HTMLSAX3')) { 33 define('XML_HTMLSAX3', dirname(__FILE__) . '/'); 34 } 35 require_once (XML_HTMLSAX3 . 'HTMLSax3/States.php'); 36 require_once (XML_HTMLSAX3 . 'HTMLSax3/Decorators.php'); 37 38 /** 39 * Base State Parser 40 * @package view 41 * @access protected 42 * @abstract 43 */ 44 class XML_HTMLSax3_StateParser { 45 /** 46 * Instance of user front end class to be passed to callbacks 47 * @var XML_HTMLSax3 48 * @access private 49 */ 50 var $htmlsax; 51 /** 52 * User defined object for handling elements 53 * @var object 54 * @access private 55 */ 56 var $handler_object_element; 57 /** 58 * User defined open tag handler method 59 * @var string 60 * @access private 61 */ 62 var $handler_method_opening; 63 /** 64 * User defined close tag handler method 65 * @var string 66 * @access private 67 */ 68 var $handler_method_closing; 69 /** 70 * User defined object for handling data in elements 71 * @var object 72 * @access private 73 */ 74 var $handler_object_data; 75 /** 76 * User defined data handler method 77 * @var string 78 * @access private 79 */ 80 var $handler_method_data; 81 /** 82 * User defined object for handling processing instructions 83 * @var object 84 * @access private 85 */ 86 var $handler_object_pi; 87 /** 88 * User defined processing instruction handler method 89 * @var string 90 * @access private 91 */ 92 var $handler_method_pi; 93 /** 94 * User defined object for handling JSP/ASP tags 95 * @var object 96 * @access private 97 */ 98 var $handler_object_jasp; 99 /** 100 * User defined JSP/ASP handler method 101 * @var string 102 * @access private 103 */ 104 var $handler_method_jasp; 105 /** 106 * User defined object for handling XML escapes 107 * @var object 108 * @access private 109 */ 110 var $handler_object_escape; 111 /** 112 * User defined XML escape handler method 113 * @var string 114 * @access private 115 */ 116 var $handler_method_escape; 117 /** 118 * User defined handler object or NullHandler 119 * @var object 120 * @access private 121 */ 122 var $handler_default; 123 /** 124 * Parser options determining parsing behavior 125 * @var array 126 * @access private 127 */ 128 var $parser_options = array(); 129 /** 130 * XML document being parsed 131 * @var string 132 * @access private 133 */ 134 var $rawtext; 135 /** 136 * Position in XML document relative to start (0) 137 * @var int 138 * @access private 139 */ 140 var $position; 141 /** 142 * Length of the XML document in characters 143 * @var int 144 * @access private 145 */ 146 var $length; 147 /** 148 * Array of state objects 149 * @var array 150 * @access private 151 */ 152 var $State = array(); 153 154 /** 155 * Constructs XML_HTMLSax3_StateParser setting up states 156 * @var XML_HTMLSax3 instance of user front end class 157 * @access protected 158 */ 159 function XML_HTMLSax3_StateParser (& $htmlsax) { 160 $this->htmlsax = & $htmlsax; 161 $this->State[XML_HTMLSAX3_STATE_START] =& new XML_HTMLSax3_StartingState(); 162 163 $this->State[XML_HTMLSAX3_STATE_CLOSING_TAG] =& new XML_HTMLSax3_ClosingTagState(); 164 $this->State[XML_HTMLSAX3_STATE_TAG] =& new XML_HTMLSax3_TagState(); 165 $this->State[XML_HTMLSAX3_STATE_OPENING_TAG] =& new XML_HTMLSax3_OpeningTagState(); 166 167 $this->State[XML_HTMLSAX3_STATE_PI] =& new XML_HTMLSax3_PiState(); 168 $this->State[XML_HTMLSAX3_STATE_JASP] =& new XML_HTMLSax3_JaspState(); 169 $this->State[XML_HTMLSAX3_STATE_ESCAPE] =& new XML_HTMLSax3_EscapeState(); 170 } 171 172 /** 173 * Moves the position back one character 174 * @access protected 175 * @return void 176 */ 177 function unscanCharacter() { 178 $this->position -= 1; 179 } 180 181 /** 182 * Moves the position forward one character 183 * @access protected 184 * @return void 185 */ 186 function ignoreCharacter() { 187 $this->position += 1; 188 } 189 190 /** 191 * Returns the next character from the XML document or void if at end 192 * @access protected 193 * @return mixed 194 */ 195 function scanCharacter() { 196 if ($this->position < $this->length) { 197 return $this->rawtext{$this->position++}; 198 } 199 } 200 201 /** 202 * Returns a string from the current position to the next occurance 203 * of the supplied string 204 * @param string string to search until 205 * @access protected 206 * @return string 207 */ 208 function scanUntilString($string) { 209 $start = $this->position; 210 $this->position = strpos($this->rawtext, $string, $start); 211 if ($this->position === FALSE) { 212 $this->position = $this->length; 213 } 214 return substr($this->rawtext, $start, $this->position - $start); 215 } 216 217 /** 218 * Returns a string from the current position until the first instance of 219 * one of the characters in the supplied string argument 220 * @param string string to search until 221 * @access protected 222 * @return string 223 * @abstract 224 */ 225 function scanUntilCharacters($string) {} 226 227 /** 228 * Moves the position forward past any whitespace characters 229 * @access protected 230 * @return void 231 * @abstract 232 */ 233 function ignoreWhitespace() {} 234 235 /** 236 * Begins the parsing operation, setting up any decorators, depending on 237 * parse options invoking _parse() to execute parsing 238 * @param string XML document to parse 239 * @access protected 240 * @return void 241 */ 242 function parse($data) { 243 if ($this->parser_options['XML_OPTION_TRIM_DATA_NODES']==1) { 244 $decorator =& new XML_HTMLSax3_Trim( 245 $this->handler_object_data, 246 $this->handler_method_data); 247 $this->handler_object_data =& $decorator; 248 $this->handler_method_data = 'trimData'; 249 } 250 if ($this->parser_options['XML_OPTION_CASE_FOLDING']==1) { 251 $open_decor =& new XML_HTMLSax3_CaseFolding( 252 $this->handler_object_element, 253 $this->handler_method_opening, 254 $this->handler_method_closing); 255 $this->handler_object_element =& $open_decor; 256 $this->handler_method_opening ='foldOpen'; 257 $this->handler_method_closing ='foldClose'; 258 } 259 if ($this->parser_options['XML_OPTION_LINEFEED_BREAK']==1) { 260 $decorator =& new XML_HTMLSax3_Linefeed( 261 $this->handler_object_data, 262 $this->handler_method_data); 263 $this->handler_object_data =& $decorator; 264 $this->handler_method_data = 'breakData'; 265 } 266 if ($this->parser_options['XML_OPTION_TAB_BREAK']==1) { 267 $decorator =& new XML_HTMLSax3_Tab( 268 $this->handler_object_data, 269 $this->handler_method_data); 270 $this->handler_object_data =& $decorator; 271 $this->handler_method_data = 'breakData'; 272 } 273 if ($this->parser_options['XML_OPTION_ENTITIES_UNPARSED']==1) { 274 $decorator =& new XML_HTMLSax3_Entities_Unparsed( 275 $this->handler_object_data, 276 $this->handler_method_data); 277 $this->handler_object_data =& $decorator; 278 $this->handler_method_data = 'breakData'; 279 } 280 if ($this->parser_options['XML_OPTION_ENTITIES_PARSED']==1) { 281 $decorator =& new XML_HTMLSax3_Entities_Parsed( 282 $this->handler_object_data, 283 $this->handler_method_data); 284 $this->handler_object_data =& $decorator; 285 $this->handler_method_data = 'breakData'; 286 } 287 // Note switched on by default 288 if ($this->parser_options['XML_OPTION_STRIP_ESCAPES']==1) { 289 $decorator =& new XML_HTMLSax3_Escape_Stripper( 290 $this->handler_object_escape, 291 $this->handler_method_escape); 292 $this->handler_object_escape =& $decorator; 293 $this->handler_method_escape = 'strip'; 294 } 295 $this->rawtext = $data; 296 $this->length = strlen($data); 297 $this->position = 0; 298 $this->_parse(); 299 } 300 301 /** 302 * Performs the parsing itself, delegating calls to a specific parser 303 * state 304 * @param constant state object to parse with 305 * @access protected 306 * @return void 307 */ 308 function _parse($state = XML_HTMLSAX3_STATE_START) { 309 do { 310 $state = $this->State[$state]->parse($this); 311 } while ($state != XML_HTMLSAX3_STATE_STOP && 312 $this->position < $this->length); 313 } 314 } 315 316 /** 317 * Parser for PHP Versions below 4.3.0. Uses a slower parsing mechanism than 318 * the equivalent PHP 4.3.0+ subclass of StateParser 319 * @package view 320 * @access protected 321 * @see XML_HTMLSax3_StateParser_Gtet430 322 */ 323 class XML_HTMLSax3_StateParser_Lt430 extends XML_HTMLSax3_StateParser { 324 /** 325 * Constructs XML_HTMLSax3_StateParser_Lt430 defining available 326 * parser options 327 * @var XML_HTMLSax3 instance of user front end class 328 * @access protected 329 */ 330 function XML_HTMLSax3_StateParser_Lt430(& $htmlsax) { 331 parent::XML_HTMLSax3_StateParser($htmlsax); 332 $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0; 333 $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0; 334 $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0; 335 $this->parser_options['XML_OPTION_TAB_BREAK'] = 0; 336 $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0; 337 $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0; 338 $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0; 339 } 340 341 /** 342 * Returns a string from the current position until the first instance of 343 * one of the characters in the supplied string argument 344 * @param string string to search until 345 * @access protected 346 * @return string 347 */ 348 function scanUntilCharacters($string) { 349 $startpos = $this->position; 350 while ($this->position < $this->length && strpos($string, $this->rawtext{$this->position}) === FALSE) { 351 $this->position++; 352 } 353 return substr($this->rawtext, $startpos, $this->position - $startpos); 354 } 355 356 /** 357 * Moves the position forward past any whitespace characters 358 * @access protected 359 * @return void 360 */ 361 function ignoreWhitespace() { 362 while ($this->position < $this->length && 363 strpos(" \n\r\t", $this->rawtext{$this->position}) !== FALSE) { 364 $this->position++; 365 } 366 } 367 368 /** 369 * Begins the parsing operation, setting up the unparsed XML entities 370 * decorator if necessary then delegating further work to parent 371 * @param string XML document to parse 372 * @access protected 373 * @return void 374 */ 375 function parse($data) { 376 parent::parse($data); 377 } 378 } 379 380 /** 381 * Parser for PHP Versions equal to or greater than 4.3.0. Uses a faster 382 * parsing mechanism than the equivalent PHP < 4.3.0 subclass of StateParser 383 * @package view 384 * @access protected 385 * @see XML_HTMLSax3_StateParser_Lt430 386 */ 387 class XML_HTMLSax3_StateParser_Gtet430 extends XML_HTMLSax3_StateParser { 388 /** 389 * Constructs XML_HTMLSax3_StateParser_Gtet430 defining available 390 * parser options 391 * @var XML_HTMLSax3 instance of user front end class 392 * @access protected 393 */ 394 function XML_HTMLSax3_StateParser_Gtet430(& $htmlsax) { 395 parent::XML_HTMLSax3_StateParser($htmlsax); 396 $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0; 397 $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0; 398 $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0; 399 $this->parser_options['XML_OPTION_TAB_BREAK'] = 0; 400 $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0; 401 $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0; 402 $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0; 403 } 404 /** 405 * Returns a string from the current position until the first instance of 406 * one of the characters in the supplied string argument. 407 * @param string string to search until 408 * @access protected 409 * @return string 410 */ 411 function scanUntilCharacters($string) { 412 $startpos = $this->position; 413 $length = strcspn($this->rawtext, $string, $startpos); 414 $this->position += $length; 415 return substr($this->rawtext, $startpos, $length); 416 } 417 418 /** 419 * Moves the position forward past any whitespace characters 420 * @access protected 421 * @return void 422 */ 423 function ignoreWhitespace() { 424 $this->position += strspn($this->rawtext, " \n\r\t", $this->position); 425 } 426 427 /** 428 * Begins the parsing operation, setting up the parsed and unparsed 429 * XML entity decorators if necessary then delegating further work 430 * to parent 431 * @param string XML document to parse 432 * @access protected 433 * @return void 434 */ 435 function parse($data) { 436 parent::parse($data); 437 } 438 } 439 440 /** 441 * Default NullHandler for methods which were not set by user 442 * @package view 443 * @access protected 444 */ 445 class XML_HTMLSax3_NullHandler { 446 /** 447 * Generic handler method which does nothing 448 * @access protected 449 * @return void 450 */ 451 function DoNothing() { 452 } 453 } 454 455 /** 456 * User interface class. All user calls should only be made to this class 457 * @package view 458 * @access public 459 */ 460 class XML_HTMLSax3 { 461 /** 462 * Instance of concrete subclass of XML_HTMLSax3_StateParser 463 * @var XML_HTMLSax3_StateParser 464 * @access private 465 */ 466 var $state_parser; 467 468 /** 469 * Constructs XML_HTMLSax3 selecting concrete StateParser subclass 470 * depending on PHP version being used as well as setting the default 471 * NullHandler for all callbacks<br /> 472 * <b>Example:</b> 473 * <pre> 474 * $myHandler = & new MyHandler(); 475 * $parser = new XML_HTMLSax3(); 476 * $parser->set_object($myHandler); 477 * $parser->set_option('XML_OPTION_CASE_FOLDING'); 478 * $parser->set_element_handler('myOpenHandler','myCloseHandler'); 479 * $parser->set_data_handler('myDataHandler'); 480 * $parser->parser($xml); 481 * </pre> 482 * @access public 483 */ 484 function XML_HTMLSax3() { 485 if (version_compare(phpversion(), '4.3', 'ge')) { 486 $this->state_parser =& new XML_HTMLSax3_StateParser_Gtet430($this); 487 } else { 488 $this->state_parser =& new XML_HTMLSax3_StateParser_Lt430($this); 489 } 490 $nullhandler =& new XML_HTMLSax3_NullHandler(); 491 $this->set_object($nullhandler); 492 $this->set_element_handler('DoNothing', 'DoNothing'); 493 $this->set_data_handler('DoNothing'); 494 $this->set_pi_handler('DoNothing'); 495 $this->set_jasp_handler('DoNothing'); 496 $this->set_escape_handler('DoNothing'); 497 } 498 499 /** 500 * Sets the user defined handler object. Returns a PEAR Error 501 * if supplied argument is not an object. 502 * @param object handler object containing SAX callback methods 503 * @access public 504 * @return mixed 505 */ 506 function set_object(&$object) { 507 if ( is_object($object) ) { 508 $this->state_parser->handler_default =& $object; 509 return true; 510 } else { 511 require_once('PEAR.php'); 512 PEAR::raiseError('XML_HTMLSax3::set_object requires '. 513 'an object instance'); 514 } 515 } 516 517 /** 518 * Sets a parser option. By default all options are switched off. 519 * Returns a PEAR Error if option is invalid<br /> 520 * <b>Available options:</b> 521 * <ul> 522 * <li>XML_OPTION_TRIM_DATA_NODES: trim whitespace off the beginning 523 * and end of data passed to the data handler</li> 524 * <li>XML_OPTION_LINEFEED_BREAK: linefeeds result in additional data 525 * handler calls</li> 526 * <li>XML_OPTION_TAB_BREAK: tabs result in additional data handler 527 * calls</li> 528 * <li>XML_OPTION_ENTITIES_UNPARSED: XML entities are returned as 529 * seperate data handler calls in unparsed form</li> 530 * <li>XML_OPTION_ENTITIES_PARSED: (PHP 4.3.0+ only) XML entities are 531 * returned as seperate data handler calls and are parsed with 532 * PHP's html_entity_decode() function</li> 533 * <li>XML_OPTION_STRIP_ESCAPES: strips out the -- -- comment markers 534 * or CDATA markup inside an XML escape, if found.</li> 535 * </ul> 536 * To get HTMLSax to behave in the same way as the native PHP SAX parser, 537 * using it's default state, you need to switch on XML_OPTION_LINEFEED_BREAK, 538 * XML_OPTION_ENTITIES_PARSED and XML_OPTION_CASE_FOLDING 539 * @param string name of parser option 540 * @param int (optional) 1 to switch on, 0 for off 541 * @access public 542 * @return boolean 543 */ 544 function set_option($name, $value=1) { 545 if ( array_key_exists($name,$this->state_parser->parser_options) ) { 546 $this->state_parser->parser_options[$name] = $value; 547 return true; 548 } else { 549 require_once('PEAR.php'); 550 PEAR::raiseError('XML_HTMLSax3::set_option('.$name.') illegal'); 551 } 552 } 553 554 /** 555 * Sets the data handler method which deals with the contents of XML 556 * elements.<br /> 557 * The handler method must accept two arguments, the first being an 558 * instance of XML_HTMLSax3 and the second being the contents of an 559 * XML element e.g. 560 * <pre> 561 * function myDataHander(& $parser,$data){} 562 * </pre> 563 * @param string name of method 564 * @access public 565 * @return void 566 * @see set_object 567 */ 568 function set_data_handler($data_method) { 569 $this->state_parser->handler_object_data =& $this->state_parser->handler_default; 570 $this->state_parser->handler_method_data = $data_method; 571 } 572 573 /** 574 * Sets the open and close tag handlers 575 * <br />The open handler method must accept three arguments; the parser, 576 * the tag name and an array of attributes e.g. 577 * <pre> 578 * function myOpenHander(& $parser,$tagname,$attrs=array()){} 579 * </pre> 580 * The close handler method must accept two arguments; the parser and 581 * the tag name e.g. 582 * <pre> 583 * function myCloseHander(& $parser,$tagname){} 584 * </pre> 585 * @param string name of open method 586 * @param string name of close method 587 * @access public 588 * @return void 589 * @see set_object 590 */ 591 function set_element_handler($opening_method, $closing_method) { 592 $this->state_parser->handler_object_element =& $this->