[ Index ]

PHP Cross Reference of Limb3

title

Body

[close]

/wact/src/compiler/parser/ -> WactHTMLParser.class.php (source)

   1  <?php
   2  /*
   3   * Limb PHP Framework
   4   *
   5   * @link http://limb-project.com 
   6   * @copyright  Copyright &copy; 2004-2007 BIT(http://bit-creative.com)
   7   * @license    LGPL http://www.gnu.org/copyleft/lesser.html 
   8   */
   9  
  10  /**
  11   * HTML/XHTML/XML parser
  12   * @package wact
  13   * @version $Id: WactHTMLParser.class.php 5945 2007-06-06 08:31:43Z pachanga $
  14   */
  15  class WactHTMLParser
  16  {
  17    protected $file_name;
  18    /**

  19    * Parser listener

  20    * @var WactHTMLParserListener

  21    */
  22    protected $observer;
  23    /**

  24    * XML document being parsed

  25    * @var string

  26    */
  27    protected $rawtext;
  28    /**

  29    * Position in XML document relative to start (0)

  30    * @var int

  31    */
  32    protected $position;
  33    /**

  34    * Length of the XML document in characters

  35    * @var int

  36    */
  37    protected $length;
  38  
  39    protected $element_pos;
  40  
  41    /**

  42    * @var Observer event handler

  43    * @access protected

  44    */
  45    function __construct($observer)
  46    {
  47      $this->observer = $observer;
  48    }
  49  
  50    /*

  51    * Calculates the line number from the byte index

  52    * @return int the current line number

  53    * @access private

  54    */
  55    function getLineNumber()
  56    {
  57      return 1 + substr_count(substr($this->rawtext, 0, $this->position), "\n");
  58    }
  59  
  60    function getFile()
  61    {
  62      return $this->file_name;
  63    }
  64  
  65    function getCurrentLocation()
  66    {
  67      return new WactSourceLocation($this->getFile(), $this->getLineNumber());
  68    }
  69  
  70    /**

  71    * Moves the position forward past any whitespace characters

  72    * @access protected

  73    * @return void

  74    */
  75    function ignoreWhitespace()
  76    {
  77      while ($this->position < $this->length &&
  78          strpos(" \n\r\t", $this->rawtext{$this->position}) !== FALSE)
  79      {
  80        $this->position++;
  81      }
  82    }
  83  
  84    /**

  85    * Begins the parsing operation, setting up any decorators, depending on

  86    * parse options invoking _parse() to execute parsing

  87    * @param string XML document to parse

  88    * @access protected

  89    * @return void

  90    */
  91    function parse($data, $file_name = NULL)
  92    {
  93      $this->rawtext = $data;
  94      $this->length = strlen($data);
  95      $this->position = 0;
  96      $this->file_name = $file_name;
  97  
  98      do
  99      {
 100        $start = $this->position;
 101        $this->position = strpos($this->rawtext, '<', $start);
 102        if ($this->position === FALSE)
 103        {
 104          if ($start < $this->length)
 105            $this->observer->characters(substr($this->rawtext, $start), $this->getCurrentLocation());
 106          return;
 107        }
 108  
 109        // any text before < considered as characters

 110        if ($this->position > $start)
 111        {
 112          $characters = substr($this->rawtext, $start, $this->position - $start);
 113          $this->observer->characters($characters, $this->getCurrentLocation());
 114        }
 115  
 116        $this->position += 1;   // ignore '<' character

 117  
 118        if ($this->_reachedEndOfFile())
 119          return;
 120  
 121        $this->element_pos = $this->position;
 122        $this->position += 1;
 123  
 124        switch($this->rawtext{$this->element_pos})
 125        {
 126          // </tag> cases

 127          case '/':
 128            $start = $this->position;
 129            while ($this->position < $this->length && $this->rawtext{$this->position} != '>')
 130                $this->position++;
 131  
 132            if ($this->_reachedEndOfFile())
 133              return;
 134  
 135            $tag = substr($this->rawtext, $start, $this->position - $start);
 136  
 137            $this->observer->endTag($tag, $this->getCurrentLocation());
 138            $this->position += 1;   // ignore '>' string

 139            break;
 140          // <?php cases

 141          case '?':
 142            $start = $this->position;
 143  
 144            // search instruction type

 145            while ($this->position < $this->length && strpos(" \n\r\t", $this->rawtext{$this->position}) === FALSE)
 146              $this->position++;
 147  
 148            if ($this->_reachedEndOfFile())
 149              return;
 150  
 151            $instruction_type = substr($this->rawtext, $start, $this->position - $start);
 152  
 153            $this->ignoreWhitespace();
 154  
 155            // search instruction end and thus the instruction code

 156            $start = $this->position;
 157            $this->position = strpos($this->rawtext, '?>', $start);
 158  
 159            if ($this->position === FALSE)
 160            {
 161              $this->observer->characters(substr($this->rawtext, $this->element_pos - 1), $this->getCurrentLocation());
 162              return;
 163            }
 164  
 165            $code = substr($this->rawtext, $start, $this->position - $start);
 166            $this->observer->instruction($instruction_type, $code, $this->getCurrentLocation());
 167  
 168            $this->position += 2;   // ignore '? >' string

 169            break;
 170          // <!-- and <% cases

 171          case '!':
 172            $start = $this->position - 2;
 173  
 174            if (substr($this->rawtext, $start, 4) == "<!--")
 175            {
 176              $position = strpos($this->rawtext, '-->', $start);
 177              if ($position !== FALSE)
 178              {
 179                $raw_text = substr($this->rawtext, $start, $position - $start + 3);
 180                $this->observer->characters($raw_text, $this->getCurrentLocation());
 181                $this->position = $position + 3;
 182                break;
 183              }
 184            }
 185  
 186            while ($this->position < $this->length && $this->rawtext{$this->position} != '<')
 187                $this->position++;
 188  
 189            $characters = substr($this->rawtext, $start, $this->position - $start);
 190            $this->observer->characters($characters, $this->getCurrentLocation());
 191            break;
 192          case '%':
 193            $start = $this->position - 2;
 194            while ($this->position < $this->length && $this->rawtext{$this->position} != '<')
 195                $this->position++;
 196  
 197            $characters = substr($this->rawtext, $start, $this->position - $start);
 198            $this->observer->characters($characters, $this->getCurrentLocation());
 199            break;
 200          // <tag or any < case (e.g. compare operator in javascript block)

 201          case ' ':
 202          case "\n":
 203          case "\n":
 204          case "\r":
 205          case "\t":
 206          case "=":
 207            $start = $this->position - 2;
 208            while ($this->position < $this->length && $this->rawtext{$this->position} != '<')
 209                $this->position++;
 210            $characters = substr($this->rawtext, $start, $this->position - $start);
 211            $this->observer->characters($characters, $this->getCurrentLocation());
 212            break;
 213          default:
 214            while ($this->position < $this->length && strpos("/> \n\r\t", $this->rawtext{$this->position}) === FALSE) {
 215              $this->position++;
 216            }
 217  
 218            if ($this->_reachedEndOfFile())
 219              return;
 220  
 221            $tag = substr($this->rawtext, $this->element_pos, $this->position - $this->element_pos);
 222            $Attributes = array();
 223  
 224            $this->ignoreWhitespace();
 225  
 226            // search end of tag

 227            while ( $this->position < $this->length &&
 228                    $this->rawtext{$this->position} != '/' &&
 229                    $this->rawtext{$this->position} != '>')
 230            {
 231                $start = $this->position;
 232                while ($this->position < $this->length && strpos("/>= \n\r\t", $this->rawtext{$this->position}) === FALSE) {
 233                    $this->position++;
 234                }
 235  
 236                if ($this->_reachedEndOfFile())
 237                  return;
 238  
 239                $attributename = substr($this->rawtext, $start, $this->position - $start);
 240                $attributevalue = NULL;
 241  
 242                $this->ignoreWhitespace();
 243  
 244                if ($this->_reachedEndOfFile())
 245                  return;
 246  
 247                if ( $this->rawtext{$this->position} == '=') {
 248                    $attributevalue = "";
 249  
 250                    $this->position++;
 251                    $this->ignoreWhitespace();
 252  
 253                    if ($this->_reachedEndOfFile())
 254                      return;
 255  
 256                    $quote = $this->rawtext{$this->position};
 257                    if ($quote == '"' || $quote == "'")
 258                    {
 259                        $start = $this->position + 1;
 260                        $this->position = strpos($this->rawtext, $quote, $start);
 261  
 262                        if ($this->position === FALSE)
 263                        {
 264                          $this->observer->characters(substr($this->rawtext, $this->element_pos - 1), $this->getCurrentLocation());
 265                          return;
 266                        }
 267  
 268                        $attributevalue = substr($this->rawtext, $start, $this->position - $start);
 269  
 270                        $this->position++;
 271  
 272                        if ($this->_reachedEndOfFile())
 273                          return;
 274  
 275                        if (strpos("/> \n\r\t", $this->rawtext{$this->position}) === FALSE)
 276                          throw new WactException('Invalid tag attribute syntax', array('file' => $this->getFile(),
 277                                                                                        'line' => $this->getLineNumber()));
 278  
 279                    }
 280                    else
 281                    {
 282                        $start = $this->position;
 283                        while ($this->position < $this->length && strpos("/> \n\r\t", $this->rawtext{$this->position}) === FALSE) {
 284                            $this->position++;
 285                        }
 286  
 287                        if ($this->_reachedEndOfFile())
 288                          return;
 289  
 290                        $attributevalue = substr($this->rawtext, $start, $this->position - $start);
 291                    }
 292                }
 293  
 294                $Attributes[$attributename] = $attributevalue;
 295  
 296                $this->ignoreWhitespace();
 297            }
 298  
 299            if ($this->_reachedEndOfFile())
 300              return;
 301  
 302            if ($this->rawtext{$this->position} == '/')
 303            {
 304                $this->position += 1;
 305  
 306                if ($this->_reachedEndOfFile())
 307                  return;
 308  
 309                if ($this->rawtext{$this->position} != '>')
 310                {
 311                  throw new WactException('Invalid tag syntax', array('file' => $this->getFile(),
 312                                                                      'line' => $this->getLineNumber()));
 313                }
 314  
 315                $this->observer->emptyTag($tag, $Attributes, $this->getCurrentLocation());
 316            }
 317            else
 318            {
 319              $this->observer->startTag($tag, $Attributes, $this->getCurrentLocation());
 320            }
 321            $this->position += 1;
 322  
 323          break;
 324        }
 325      }
 326      while ($this->position < $this->length);
 327    }
 328  
 329    protected function _reachedEndOfFile()
 330    {
 331      if ($this->position >= $this->length)
 332      {
 333        $this->observer->characters(substr($this->rawtext, $this->element_pos - 1), $this->getCurrentLocation());
 334        return true;
 335      }
 336      else
 337        return false;
 338    }
 339  
 340  }
 341  ?>