| [ Index ] |
PHP Cross Reference of Limb3 |
[Summary view] [Print] [Text view]
1 <?php 2 /* 3 * Limb PHP Framework 4 * 5 * @link http://limb-project.com 6 * @copyright Copyright © 2004-2007 BIT(http://bit-creative.com) 7 * @license LGPL http://www.gnu.org/copyleft/lesser.html 8 */ 9 10 /** 11 * HTML/XHTML/XML parser 12 * @package wact 13 * @version $Id: WactHTMLParser.class.php 5945 2007-06-06 08:31:43Z pachanga $ 14 */ 15 class WactHTMLParser 16 { 17 protected $file_name; 18 /** 19 * Parser listener 20 * @var WactHTMLParserListener 21 */ 22 protected $observer; 23 /** 24 * XML document being parsed 25 * @var string 26 */ 27 protected $rawtext; 28 /** 29 * Position in XML document relative to start (0) 30 * @var int 31 */ 32 protected $position; 33 /** 34 * Length of the XML document in characters 35 * @var int 36 */ 37 protected $length; 38 39 protected $element_pos; 40 41 /** 42 * @var Observer event handler 43 * @access protected 44 */ 45 function __construct($observer) 46 { 47 $this->observer = $observer; 48 } 49 50 /* 51 * Calculates the line number from the byte index 52 * @return int the current line number 53 * @access private 54 */ 55 function getLineNumber() 56 { 57 return 1 + substr_count(substr($this->rawtext, 0, $this->position), "\n"); 58 } 59 60 function getFile() 61 { 62 return $this->file_name; 63 } 64 65 function getCurrentLocation() 66 { 67 return new WactSourceLocation($this->getFile(), $this->getLineNumber()); 68 } 69 70 /** 71 * Moves the position forward past any whitespace characters 72 * @access protected 73 * @return void 74 */ 75 function ignoreWhitespace() 76 { 77 while ($this->position < $this->length && 78 strpos(" \n\r\t", $this->rawtext{$this->position}) !== FALSE) 79 { 80 $this->position++; 81 } 82 } 83 84 /** 85 * Begins the parsing operation, setting up any decorators, depending on 86 * parse options invoking _parse() to execute parsing 87 * @param string XML document to parse 88 * @access protected 89 * @return void 90 */ 91 function parse($data, $file_name = NULL) 92 { 93 $this->rawtext = $data; 94 $this->length = strlen($data); 95 $this->position = 0; 96 $this->file_name = $file_name; 97 98 do 99 { 100 $start = $this->position; 101 $this->position = strpos($this->rawtext, '<', $start); 102 if ($this->position === FALSE) 103 { 104 if ($start < $this->length) 105 $this->observer->characters(substr($this->rawtext, $start), $this->getCurrentLocation()); 106 return; 107 } 108 109 // any text before < considered as characters 110 if ($this->position > $start) 111 { 112 $characters = substr($this->rawtext, $start, $this->position - $start); 113 $this->observer->characters($characters, $this->getCurrentLocation()); 114 } 115 116 $this->position += 1; // ignore '<' character 117 118 if ($this->_reachedEndOfFile()) 119 return; 120 121 $this->element_pos = $this->position; 122 $this->position += 1; 123 124 switch($this->rawtext{$this->element_pos}) 125 { 126 // </tag> cases 127 case '/': 128 $start = $this->position; 129 while ($this->position < $this->length && $this->rawtext{$this->position} != '>') 130 $this->position++; 131 132 if ($this->_reachedEndOfFile()) 133 return; 134 135 $tag = substr($this->rawtext, $start, $this->position - $start); 136 137 $this->observer->endTag($tag, $this->getCurrentLocation()); 138 $this->position += 1; // ignore '>' string 139 break; 140 // <?php cases 141 case '?': 142 $start = $this->position; 143 144 // search instruction type 145 while ($this->position < $this->length && strpos(" \n\r\t", $this->rawtext{$this->position}) === FALSE) 146 $this->position++; 147 148 if ($this->_reachedEndOfFile()) 149 return; 150 151 $instruction_type = substr($this->rawtext, $start, $this->position - $start); 152 153 $this->ignoreWhitespace(); 154 155 // search instruction end and thus the instruction code 156 $start = $this->position; 157 $this->position = strpos($this->rawtext, '?>', $start); 158 159 if ($this->position === FALSE) 160 { 161 $this->observer->characters(substr($this->rawtext, $this->element_pos - 1), $this->getCurrentLocation()); 162 return; 163 } 164 165 $code = substr($this->rawtext, $start, $this->position - $start); 166 $this->observer->instruction($instruction_type, $code, $this->getCurrentLocation()); 167 168 $this->position += 2; // ignore '? >' string 169 break; 170 // <!-- and <% cases 171 case '!': 172 $start = $this->position - 2; 173 174 if (substr($this->rawtext, $start, 4) == "<!--") 175 { 176 $position = strpos($this->rawtext, '-->', $start); 177 if ($position !== FALSE) 178 { 179 $raw_text = substr($this->rawtext, $start, $position - $start + 3); 180 $this->observer->characters($raw_text, $this->getCurrentLocation()); 181 $this->position = $position + 3; 182 break; 183 } 184 } 185 186 while ($this->position < $this->length && $this->rawtext{$this->position} != '<') 187 $this->position++; 188 189 $characters = substr($this->rawtext, $start, $this->position - $start); 190 $this->observer->characters($characters, $this->getCurrentLocation()); 191 break; 192 case '%': 193 $start = $this->position - 2; 194 while ($this->position < $this->length && $this->rawtext{$this->position} != '<') 195 $this->position++; 196 197 $characters = substr($this->rawtext, $start, $this->position - $start); 198 $this->observer->characters($characters, $this->getCurrentLocation()); 199 break; 200 // <tag or any < case (e.g. compare operator in javascript block) 201 case ' ': 202 case "\n": 203 case "\n": 204 case "\r": 205 case "\t": 206 case "=": 207 $start = $this->position - 2; 208 while ($this->position < $this->length && $this->rawtext{$this->position} != '<') 209 $this->position++; 210 $characters = substr($this->rawtext, $start, $this->position - $start); 211 $this->observer->characters($characters, $this->getCurrentLocation()); 212 break; 213 default: 214 while ($this->position < $this->length && strpos("/> \n\r\t", $this->rawtext{$this->position}) === FALSE) { 215 $this->position++; 216 } 217 218 if ($this->_reachedEndOfFile()) 219 return; 220 221 $tag = substr($this->rawtext, $this->element_pos, $this->position - $this->element_pos); 222 $Attributes = array(); 223 224 $this->ignoreWhitespace(); 225 226 // search end of tag 227 while ( $this->position < $this->length && 228 $this->rawtext{$this->position} != '/' && 229 $this->rawtext{$this->position} != '>') 230 { 231 $start = $this->position; 232 while ($this->position < $this->length && strpos("/>= \n\r\t", $this->rawtext{$this->position}) === FALSE) { 233 $this->position++; 234 } 235 236 if ($this->_reachedEndOfFile()) 237 return; 238 239 $attributename = substr($this->rawtext, $start, $this->position - $start); 240 $attributevalue = NULL; 241 242 $this->ignoreWhitespace(); 243 244 if ($this->_reachedEndOfFile()) 245 return; 246 247 if ( $this->rawtext{$this->position} == '=') { 248 $attributevalue = ""; 249 250 $this->position++; 251 $this->ignoreWhitespace(); 252 253 if ($this->_reachedEndOfFile()) 254 return; 255 256 $quote = $this->rawtext{$this->position}; 257 if ($quote == '"' || $quote == "'") 258 { 259 $start = $this->position + 1; 260 $this->position = strpos($this->rawtext, $quote, $start); 261 262 if ($this->position === FALSE) 263 { 264 $this->observer->characters(substr($this->rawtext, $this->element_pos - 1), $this->getCurrentLocation()); 265 return; 266 } 267 268 $attributevalue = substr($this->rawtext, $start, $this->position - $start); 269 270 $this->position++; 271 272 if ($this->_reachedEndOfFile()) 273 return; 274 275 if (strpos("/> \n\r\t", $this->rawtext{$this->position}) === FALSE) 276 throw new WactException('Invalid tag attribute syntax', array('file' => $this->getFile(), 277 'line' => $this->getLineNumber())); 278 279 } 280 else 281 { 282 $start = $this->position; 283 while ($this->position < $this->length && strpos("/> \n\r\t", $this->rawtext{$this->position}) === FALSE) { 284 $this->position++; 285 } 286 287 if ($this->_reachedEndOfFile()) 288 return; 289 290 $attributevalue = substr($this->rawtext, $start, $this->position - $start); 291 } 292 } 293 294 $Attributes[$attributename] = $attributevalue; 295 296 $this->ignoreWhitespace(); 297 } 298 299 if ($this->_reachedEndOfFile()) 300 return; 301 302 if ($this->rawtext{$this->position} == '/') 303 { 304 $this->position += 1; 305 306 if ($this->_reachedEndOfFile()) 307 return; 308 309 if ($this->rawtext{$this->position} != '>') 310 { 311 throw new WactException('Invalid tag syntax', array('file' => $this->getFile(), 312 'line' => $this->getLineNumber())); 313 } 314 315 $this->observer->emptyTag($tag, $Attributes, $this->getCurrentLocation()); 316 } 317 else 318 { 319 $this->observer->startTag($tag, $Attributes, $this->getCurrentLocation()); 320 } 321 $this->position += 1; 322 323 break; 324 } 325 } 326 while ($this->position < $this->length); 327 } 328 329 protected function _reachedEndOfFile() 330 { 331 if ($this->position >= $this->length) 332 { 333 $this->observer->characters(substr($this->rawtext, $this->element_pos - 1), $this->getCurrentLocation()); 334 return true; 335 } 336 else 337 return false; 338 } 339 340 } 341 ?>