[ Index ]

PHP Cross Reference of Limb3

title

Body

[close]

/tests_runner/lib/simpletest/ -> parser.php (source)

   1  <?php
   2      /**

   3       *    base include file for SimpleTest

   4       *    @package    SimpleTest

   5       *    @subpackage    MockObjects

   6       *    @version    $Id: parser.php 5999 2007-06-18 13:13:08Z pachanga $

   7       */
   8  
   9      /**#@+

  10       * Lexer mode stack constants

  11       */
  12      if (! defined('LEXER_ENTER')) {
  13          define('LEXER_ENTER', 1);
  14      }
  15      if (! defined('LEXER_MATCHED')) {
  16          define('LEXER_MATCHED', 2);
  17      }
  18      if (! defined('LEXER_UNMATCHED')) {
  19          define('LEXER_UNMATCHED', 3);
  20      }
  21      if (! defined('LEXER_EXIT')) {
  22          define('LEXER_EXIT', 4);
  23      }
  24      if (! defined('LEXER_SPECIAL')) {
  25          define('LEXER_SPECIAL', 5);
  26      }
  27      /**#@-*/

  28      
  29      /**

  30       *    Compounded regular expression. Any of

  31       *    the contained patterns could match and

  32       *    when one does, it's label is returned.

  33       *    @package SimpleTest

  34       *    @subpackage WebTester

  35       */
  36      class ParallelRegex {
  37          var $_patterns;
  38          var $_labels;
  39          var $_regex;
  40          var $_case;
  41          
  42          /**

  43           *    Constructor. Starts with no patterns.

  44           *    @param boolean $case    True for case sensitive, false

  45           *                            for insensitive.

  46           *    @access public

  47           */
  48          function ParallelRegex($case) {
  49              $this->_case = $case;
  50              $this->_patterns = array();
  51              $this->_labels = array();
  52              $this->_regex = null;
  53          }
  54          
  55          /**

  56           *    Adds a pattern with an optional label.

  57           *    @param string $pattern      Perl style regex, but ( and )

  58           *                                lose the usual meaning.

  59           *    @param string $label        Label of regex to be returned

  60           *                                on a match.

  61           *    @access public

  62           */
  63          function addPattern($pattern, $label = true) {
  64              $count = count($this->_patterns);
  65              $this->_patterns[$count] = $pattern;
  66              $this->_labels[$count] = $label;
  67              $this->_regex = null;
  68          }
  69          
  70          /**

  71           *    Attempts to match all patterns at once against

  72           *    a string.

  73           *    @param string $subject      String to match against.

  74           *    @param string $match        First matched portion of

  75           *                                subject.

  76           *    @return boolean             True on success.

  77           *    @access public

  78           */
  79          function match($subject, &$match) {
  80              if (count($this->_patterns) == 0) {
  81                  return false;
  82              }
  83              if (! preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
  84                  $match = '';
  85                  return false;
  86              }
  87              $match = $matches[0];
  88              for ($i = 1; $i < count($matches); $i++) {
  89                  if ($matches[$i]) {
  90                      return $this->_labels[$i - 1];
  91                  }
  92              }
  93              return true;
  94          }
  95          
  96          /**

  97           *    Compounds the patterns into a single

  98           *    regular expression separated with the

  99           *    "or" operator. Caches the regex.

 100           *    Will automatically escape (, ) and / tokens.

 101           *    @param array $patterns    List of patterns in order.

 102           *    @access private

 103           */
 104          function _getCompoundedRegex() {
 105              if ($this->_regex == null) {
 106                  for ($i = 0, $count = count($this->_patterns); $i < $count; $i++) {
 107                      $this->_patterns[$i] = '(' . str_replace(
 108                              array('/', '(', ')'),
 109                              array('\/', '\(', '\)'),
 110                              $this->_patterns[$i]) . ')';
 111                  }
 112                  $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
 113              }
 114              return $this->_regex;
 115          }
 116          
 117          /**

 118           *    Accessor for perl regex mode flags to use.

 119           *    @return string       Perl regex flags.

 120           *    @access private

 121           */
 122          function _getPerlMatchingFlags() {
 123              return ($this->_case ? "msS" : "msSi");
 124          }
 125      }
 126      
 127      /**

 128       *    States for a stack machine.

 129       *    @package SimpleTest

 130       *    @subpackage WebTester

 131       */
 132      class SimpleStateStack {
 133          var $_stack;
 134          
 135          /**

 136           *    Constructor. Starts in named state.

 137           *    @param string $start        Starting state name.

 138           *    @access public

 139           */
 140          function SimpleStateStack($start) {
 141              $this->_stack = array($start);
 142          }
 143          
 144          /**

 145           *    Accessor for current state.

 146           *    @return string       State.

 147           *    @access public

 148           */
 149          function getCurrent() {
 150              return $this->_stack[count($this->_stack) - 1];
 151          }
 152          
 153          /**

 154           *    Adds a state to the stack and sets it

 155           *    to be the current state.

 156           *    @param string $state        New state.

 157           *    @access public

 158           */
 159          function enter($state) {
 160              array_push($this->_stack, $state);
 161          }
 162          
 163          /**

 164           *    Leaves the current state and reverts

 165           *    to the previous one.

 166           *    @return boolean    False if we drop off

 167           *                       the bottom of the list.

 168           *    @access public

 169           */
 170          function leave() {
 171              if (count($this->_stack) == 1) {
 172                  return false;
 173              }
 174              array_pop($this->_stack);
 175              return true;
 176          }
 177      }
 178      
 179      /**

 180       *    Accepts text and breaks it into tokens.

 181       *    Some optimisation to make the sure the

 182       *    content is only scanned by the PHP regex

 183       *    parser once. Lexer modes must not start

 184       *    with leading underscores.

 185       *    @package SimpleTest

 186       *    @subpackage WebTester

 187       */
 188      class SimpleLexer {
 189          var $_regexes;
 190          var $_parser;
 191          var $_mode;
 192          var $_mode_handlers;
 193          var $_case;
 194          
 195          /**

 196           *    Sets up the lexer in case insensitive matching

 197           *    by default.

 198           *    @param SimpleSaxParser $parser  Handling strategy by

 199           *                                    reference.

 200           *    @param string $start            Starting handler.

 201           *    @param boolean $case            True for case sensitive.

 202           *    @access public

 203           */
 204          function SimpleLexer(&$parser, $start = "accept", $case = false) {
 205              $this->_case = $case;
 206              $this->_regexes = array();
 207              $this->_parser = &$parser;
 208              $this->_mode = &new SimpleStateStack($start);
 209              $this->_mode_handlers = array($start => $start);
 210          }
 211          
 212          /**

 213           *    Adds a token search pattern for a particular

 214           *    parsing mode. The pattern does not change the

 215           *    current mode.

 216           *    @param string $pattern      Perl style regex, but ( and )

 217           *                                lose the usual meaning.

 218           *    @param string $mode         Should only apply this

 219           *                                pattern when dealing with

 220           *                                this type of input.

 221           *    @access public

 222           */
 223          function addPattern($pattern, $mode = "accept") {
 224              if (! isset($this->_regexes[$mode])) {
 225                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 226              }
 227              $this->_regexes[$mode]->addPattern($pattern);
 228              if (! isset($this->_mode_handlers[$mode])) {
 229                  $this->_mode_handlers[$mode] = $mode;
 230              }
 231          }
 232          
 233          /**

 234           *    Adds a pattern that will enter a new parsing

 235           *    mode. Useful for entering parenthesis, strings,

 236           *    tags, etc.

 237           *    @param string $pattern      Perl style regex, but ( and )

 238           *                                lose the usual meaning.

 239           *    @param string $mode         Should only apply this

 240           *                                pattern when dealing with

 241           *                                this type of input.

 242           *    @param string $new_mode     Change parsing to this new

 243           *                                nested mode.

 244           *    @access public

 245           */
 246          function addEntryPattern($pattern, $mode, $new_mode) {
 247              if (! isset($this->_regexes[$mode])) {
 248                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 249              }
 250              $this->_regexes[$mode]->addPattern($pattern, $new_mode);
 251              if (! isset($this->_mode_handlers[$new_mode])) {
 252                  $this->_mode_handlers[$new_mode] = $new_mode;
 253              }
 254          }
 255          
 256          /**

 257           *    Adds a pattern that will exit the current mode

 258           *    and re-enter the previous one.

 259           *    @param string $pattern      Perl style regex, but ( and )

 260           *                                lose the usual meaning.

 261           *    @param string $mode         Mode to leave.

 262           *    @access public

 263           */
 264          function addExitPattern($pattern, $mode) {
 265              if (! isset($this->_regexes[$mode])) {
 266                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 267              }
 268              $this->_regexes[$mode]->addPattern($pattern, "__exit");
 269              if (! isset($this->_mode_handlers[$mode])) {
 270                  $this->_mode_handlers[$mode] = $mode;
 271              }
 272          }
 273          
 274          /**

 275           *    Adds a pattern that has a special mode. Acts as an entry

 276           *    and exit pattern in one go, effectively calling a special

 277           *    parser handler for this token only.

 278           *    @param string $pattern      Perl style regex, but ( and )

 279           *                                lose the usual meaning.

 280           *    @param string $mode         Should only apply this

 281           *                                pattern when dealing with

 282           *                                this type of input.

 283           *    @param string $special      Use this mode for this one token.

 284           *    @access public

 285           */
 286          function addSpecialPattern($pattern, $mode, $special) {
 287              if (! isset($this->_regexes[$mode])) {
 288                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 289              }
 290              $this->_regexes[$mode]->addPattern($pattern, "_$special");
 291              if (! isset($this->_mode_handlers[$special])) {
 292                  $this->_mode_handlers[$special] = $special;
 293              }
 294          }
 295          
 296          /**

 297           *    Adds a mapping from a mode to another handler.

 298           *    @param string $mode        Mode to be remapped.

 299           *    @param string $handler     New target handler.

 300           *    @access public

 301           */
 302          function mapHandler($mode, $handler) {
 303              $this->_mode_handlers[$mode] = $handler;
 304          }
 305          
 306          /**

 307           *    Splits the page text into tokens. Will fail

 308           *    if the handlers report an error or if no

 309           *    content is consumed. If successful then each

 310           *    unparsed and parsed token invokes a call to the

 311           *    held listener.

 312           *    @param string $raw        Raw HTML text.

 313           *    @return boolean           True on success, else false.

 314           *    @access public

 315           */
 316          function parse($raw) {
 317              if (! isset($this->_parser)) {
 318                  return false;
 319              }
 320              $length = strlen($raw);
 321              while (is_array($parsed = $this->_reduce($raw))) {
 322                  list($raw, $unmatched, $matched, $mode) = $parsed;
 323                  if (! $this->_dispatchTokens($unmatched, $matched, $mode)) {
 324                      return false;
 325                  }
 326                  if ($raw === '') {
 327                      return true;
 328                  }
 329                  if (strlen($raw) == $length) {
 330                      return false;
 331                  }
 332                  $length = strlen($raw);
 333              }
 334              if (! $parsed) {
 335                  return false;
 336              }
 337              return $this->_invokeParser($raw, LEXER_UNMATCHED);
 338          }
 339          
 340          /**

 341           *    Sends the matched token and any leading unmatched

 342           *    text to the parser changing the lexer to a new

 343           *    mode if one is listed.

 344           *    @param string $unmatched    Unmatched leading portion.

 345           *    @param string $matched      Actual token match.

 346           *    @param string $mode         Mode after match. A boolean

 347           *                                false mode causes no change.

 348           *    @return boolean             False if there was any error

 349           *                                from the parser.

 350           *    @access private

 351           */
 352          function _dispatchTokens($unmatched, $matched, $mode = false) {
 353              if (! $this->_invokeParser($unmatched, LEXER_UNMATCHED)) {
 354                  return false;
 355              }
 356              if (is_bool($mode)) {
 357                  return $this->_invokeParser($matched, LEXER_MATCHED);
 358              }
 359              if ($this->_isModeEnd($mode)) {
 360                  if (! $this->_invokeParser($matched, LEXER_EXIT)) {
 361                      return false;
 362                  }
 363                  return $this->_mode->leave();
 364              }
 365              if ($this->_isSpecialMode($mode)) {
 366                  $this->_mode->enter($this->_decodeSpecial($mode));
 367                  if (! $this->_invokeParser($matched, LEXER_SPECIAL)) {
 368                      return false;
 369                  }
 370                  return $this->_mode->leave();
 371              }
 372              $this->_mode->enter($mode);
 373              return $this->_invokeParser($matched, LEXER_ENTER);
 374          }
 375          
 376          /**

 377           *    Tests to see if the new mode is actually to leave

 378           *    the current mode and pop an item from the matching

 379           *    mode stack.

 380           *    @param string $mode    Mode to test.

 381           *    @return boolean        True if this is the exit mode.

 382           *    @access private

 383           */
 384          function _isModeEnd($mode) {
 385              return ($mode === "__exit");
 386          }
 387          
 388          /**

 389           *    Test to see if the mode is one where this mode

 390           *    is entered for this token only and automatically

 391           *    leaves immediately afterwoods.

 392           *    @param string $mode    Mode to test.

 393           *    @return boolean        True if this is the exit mode.

 394           *    @access private

 395           */
 396          function _isSpecialMode($mode) {
 397              return (strncmp($mode, "_", 1) == 0);
 398          }
 399          
 400          /**

 401           *    Strips the magic underscore marking single token

 402           *    modes.

 403           *    @param string $mode    Mode to decode.

 404           *    @return string         Underlying mode name.

 405           *    @access private

 406           */
 407          function _decodeSpecial($mode) {
 408              return substr($mode, 1);
 409          }
 410          
 411          /**

 412           *    Calls the parser method named after the current

 413           *    mode. Empty content will be ignored. The lexer

 414           *    has a parser handler for each mode in the lexer.

 415           *    @param string $content        Text parsed.

 416           *    @param boolean $is_match      Token is recognised rather

 417           *                                  than unparsed data.

 418           *    @access private

 419           */
 420          function _invokeParser($content, $is_match) {
 421              if (($content === '') || ($content === false)) {
 422                  return true;
 423              }
 424              $handler = $this->_mode_handlers[$this->_mode->getCurrent()];
 425              return $this->_parser->$handler($content, $is_match);
 426          }
 427          
 428          /**

 429           *    Tries to match a chunk of text and if successful

 430           *    removes the recognised chunk and any leading

 431           *    unparsed data. Empty strings will not be matched.

 432           *    @param string $raw         The subject to parse. This is the

 433           *                               content that will be eaten.

 434           *    @return array/boolean      Three item list of unparsed

 435           *                               content followed by the

 436           *                               recognised token and finally the

 437           *                               action the parser is to take.

 438           *                               True if no match, false if there

 439           *                               is a parsing error.

 440           *    @access private

 441           */
 442          function _reduce($raw) {
 443              if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
 444                  $unparsed_character_count = strpos($raw, $match);
 445                  $unparsed = substr($raw, 0, $unparsed_character_count);
 446                  $raw = substr($raw, $unparsed_character_count + strlen($match));
 447                  return array($raw, $unparsed, $match, $action);
 448              }
 449              return true;
 450          }
 451      }
 452      
 453      /**

 454       *    Breas HTML into SAX events.

 455       *    @package SimpleTest

 456       *    @subpackage WebTester

 457       */
 458      class SimpleHtmlLexer extends SimpleLexer {
 459          
 460          /**

 461           *    Sets up the lexer with case insensitive matching

 462           *    and adds the HTML handlers.

 463           *    @param SimpleSaxParser $parser  Handling strategy by

 464           *                                    reference.

 465           *    @access public

 466           */
 467          function SimpleHtmlLexer(&$parser) {
 468              $this->SimpleLexer($parser, 'text');
 469              $this->mapHandler('text', 'acceptTextToken');
 470              $this->_addSkipping();
 471              foreach ($this->_getParsedTags() as $tag) {
 472                  $this->_addTag($tag);
 473              }
 474              $this->_addInTagTokens();
 475          }
 476          
 477          /**

 478           *    List of parsed tags. Others are ignored.

 479           *    @return array        List of searched for tags.

 480           *    @access private

 481           */
 482          function _getParsedTags() {
 483              return array('a', 'title', 'form', 'input', 'button', 'textarea', 'select',
 484                      'option', 'frameset', 'frame', 'label');
 485          }
 486          
 487          /**

 488           *    The lexer has to skip certain sections such

 489           *    as server code, client code and styles.

 490           *    @access private

 491           */
 492          function _addSkipping() {
 493              $this->mapHandler('css', 'ignore');
 494              $this->addEntryPattern('<style', 'text', 'css');
 495              $this->addExitPattern('</style>', 'css');
 496              $this->mapHandler('js', 'ignore');
 497              $this->addEntryPattern('<script', 'text', 'js');
 498              $this->addExitPattern('</script>', 'js');
 499              $this->mapHandler('comment', 'ignore');
 500              $this->addEntryPattern('<!--', 'text', 'comment');
 501              $this->addExitPattern('-->', 'comment');
 502          }
 503          
 504          /**

 505           *    Pattern matches to start and end a tag.

 506           *    @param string $tag          Name of tag to scan for.

 507           *    @access private

 508           */
 509          function _addTag($tag) {
 510              $this->addSpecialPattern("</$tag>", 'text', 'acceptEndToken');
 511              $this->addEntryPattern("<$tag", 'text', 'tag');
 512          }
 513          
 514          /**

 515           *    Pattern matches to parse the inside of a tag

 516           *    including the attributes and their quoting.

 517           *    @access private

 518           */
 519          function _addInTagTokens() {
 520              $this->mapHandler('tag', 'acceptStartToken');
 521              $this->addSpecialPattern('\s+', 'tag', 'ignore');
 522              $this->_addAttributeTokens();
 523              $this->addExitPattern('/>', 'tag');
 524              $this->addExitPattern('>', 'tag');
 525          }
 526          
 527          /*