[ Index ]

PHP Cross Reference of Limb3

title

Body

[close]

/web_spider/src/ -> lmbUriExtractor.class.php (source)

   1  <?php
   2  /*
   3   * Limb PHP Framework
   4   *
   5   * @link http://limb-project.com 
   6   * @copyright  Copyright &copy; 2004-2007 BIT(http://bit-creative.com)
   7   * @license    LGPL http://www.gnu.org/copyleft/lesser.html 
   8   */
   9  lmb_require('limb/net/src/lmbUri.class.php');
  10  
  11  /**

  12   * class lmbUriExtractor.

  13   *

  14   * @package web_spider

  15   * @version $Id: lmbUriExtractor.class.php 5945 2007-06-06 08:31:43Z pachanga $

  16   */
  17  class lmbUriExtractor
  18  {
  19    protected function _defineUriRegex()
  20    {
  21      return '/(<a.*?href=(?:"|\'|)([^"\'>\s]+)(?:"|\'|).*?>)(.*?)<\/a>/s';
  22    }
  23  
  24    protected function _defineRegexMatchNumber()
  25    {
  26      return 2;
  27    }
  28  
  29    function &extract($content)
  30    {
  31      preg_match_all($this->_defineUriRegex(),
  32                     $content,
  33                     $matches,
  34                     PREG_SET_ORDER);
  35  
  36      $uris = array();
  37  
  38      $match_number = $this->_defineRegexMatchNumber();
  39  
  40      for ($i=0; $i < sizeof($matches); $i++)
  41        $uris[] = new lmbUri($matches[$i][$match_number]);
  42  
  43      return $uris;
  44    }
  45  }
  46  
  47  ?>


Generated: Mon Dec 1 03:56:46 2008 Cross-referenced by PHPXref 0.7