[ Index ]

PHP Cross Reference of Limb3

title

Body

[close]

/web_spider/tests/cases/ -> lmbWebSpiderTest.class.php (source)

   1  <?php
   2  /*
   3   * Limb PHP Framework
   4   *
   5   * @link http://limb-project.com 
   6   * @copyright  Copyright &copy; 2004-2007 BIT(http://bit-creative.com)
   7   * @license    LGPL http://www.gnu.org/copyleft/lesser.html 
   8   */
   9  lmb_require('limb/net/src/lmbUri.class.php');
  10  lmb_require('limb/web_spider/src/lmbWebSpider.class.php');
  11  lmb_require('limb/web_spider/src/lmbContentTypeFilter.class.php');
  12  lmb_require('limb/web_spider/src/lmbUriFilter.class.php');
  13  lmb_require('limb/web_spider/src/lmbUriExtractor.class.php');
  14  lmb_require('limb/web_spider/src/lmbUriNormalizer.class.php');
  15  lmb_require('limb/web_spider/src/lmbUriContentReader.class.php');
  16  
  17  class TestingSpiderObserver
  18  {
  19    function notify($reader){}
  20  }
  21  
  22  Mock :: generate('TestingSpiderObserver', 'MockWebSpiderObserver');
  23  Mock :: generate('lmbUriExtractor', 'MockUriExtractor');
  24  Mock :: generate('lmbUriNormalizer', 'MockUriNormalizer');
  25  Mock :: generate('lmbUriFilter', 'MockUriFilter');
  26  Mock :: generate('lmbContentTypeFilter', 'MockContentTypeFilter');
  27  Mock :: generate('lmbUriContentReader', 'MockUriContentReader');
  28  
  29  class lmbWebSpiderTest extends UnitTestCase
  30  {
  31    var $spider;
  32    var $observer;
  33    var $extractor;
  34    var $uri_filter;
  35    var $content_type_filter;
  36    var $normalizer;
  37    var $reader;
  38  
  39    function setUp()
  40    {
  41      $this->observer = new MockWebSpiderObserver();
  42      $this->extractor = new MockUriExtractor();
  43      $this->uri_filter = new MockUriFilter();
  44      $this->content_type_filter = new MockContentTypeFilter();
  45      $this->normalizer = new MockUriNormalizer();
  46      $this->reader = new MockUriContentReader();
  47  
  48      $this->spider = new lmbWebSpider();
  49      $this->spider->registerObserver($this->observer);
  50      $this->spider->setUriExtractor($this->extractor);
  51      $this->spider->setUriFilter($this->uri_filter);
  52      $this->spider->setContentTypeFilter($this->content_type_filter);
  53      $this->spider->setUriNormalizer($this->normalizer);
  54      $this->spider->setUriContentReader($this->reader);
  55    }
  56  
  57    function tearDown()
  58    {
  59    }
  60  
  61    function testContentTypeFiltering()
  62    {
  63      $uri = new lmbUri('http://some.host/whatever.html');
  64  
  65      $this->uri_filter->expectOnce('canPass');
  66      $this->uri_filter->setReturnValue('canPass', true, array($uri));
  67      $this->normalizer->expectOnce('process');
  68  
  69      $this->reader->expectOnce('open', array($uri));
  70      $this->reader->setReturnValue('getContentType', $content_type = 'whatever');
  71      $this->reader->expectNever('getContent');
  72  
  73      $this->content_type_filter->expectOnce('canPass', array($content_type));
  74      $this->content_type_filter->setReturnValue('canPass', false, array($content_type));
  75  
  76      $this->extractor->expectNever('extract');
  77      $this->observer->expectNever('notify');
  78  
  79      $this->spider->crawl($uri);
  80    }
  81  
  82    function testNotifyObservers()
  83    {
  84      $one_more_observer = new MockWebSpiderObserver($this);
  85      $this->spider->registerObserver($one_more_observer);
  86  
  87      $uri = new lmbUri('http://some.host/whatever.html');
  88  
  89      $this->uri_filter->expectOnce('canPass');
  90      $this->uri_filter->setReturnValue('canPass', true, array($uri));
  91      $this->normalizer->expectOnce('process');
  92  
  93      $this->reader->expectOnce('open', array($uri));
  94      $this->reader->setReturnValue('getContent', $content = 'whatever');
  95      $this->reader->setReturnValue('getContentType', $content_type = 'whatever');
  96  
  97      $this->content_type_filter->expectOnce('canPass', array($content_type));
  98      $this->content_type_filter->setReturnValue('canPass', true, array($content_type));
  99  
 100      $this->extractor->expectOnce('extract', array($content));
 101      $this->extractor->setReturnValue('extract', array(), array($content));
 102  
 103      $this->observer->expectOnce('notify', array(new IsAExpectation('MockUriContentReader')));
 104      $one_more_observer->expectOnce('notify', array(new IsAExpectation('MockUriContentReader')));
 105  
 106      $this->spider->crawl($uri);
 107  
 108      $one_more_observer->tally();
 109    }
 110  
 111    function testCrawlCacheHitComplexUrl()
 112    {
 113      $this->observer->expectCallCount('notify', 2);
 114  
 115      $uri = new lmbUri('http://example.com/index.html');
 116      $uri_normalized_by_spider = new lmbUri('http://example.com/level1/page1.html');
 117  
 118      $this->uri_filter->expectCallCount('canPass', 2);
 119      $this->uri_filter->setReturnValueAt(0, 'canPass', true, array($uri));
 120      $this->uri_filter->setReturnValueAt(1, 'canPass', true, array($uri_normalized_by_spider));
 121  
 122      $this->normalizer->expectCallCount('process', 5);
 123      $this->normalizer->expectArgumentsAt(0, 'process', array($uri));
 124      $this->normalizer->expectArgumentsAt(1, 'process', array($uri));
 125      $this->normalizer->expectArgumentsAt(2, 'process', array($uri_normalized_by_spider));
 126      $this->normalizer->expectArgumentsAt(3, 'process', array($uri));
 127      $this->normalizer->expectArgumentsAt(4, 'process', array($uri_normalized_by_spider));
 128  
 129      $this->reader->expectCallCount('open', 2);
 130      $this->reader->expectArgumentsAt(0, 'open', array($uri));
 131      $this->reader->expectArgumentsAt(1, 'open', array($uri_normalized_by_spider));
 132  
 133      $this->reader->expectCallCount('getContent', 2);
 134      $this->reader->setReturnValueAt(0, 'getContent', $content1 = 'whatever1');
 135      $this->reader->setReturnValueAt(0, 'getContentType', $content_type1 = 'type1');
 136      $this->reader->setReturnValueAt(1, 'getContent', $content2 = 'whatever2');
 137      $this->reader->setReturnValueAt(1, 'getContentType', $content_type2 = 'type2');
 138  
 139      $this->content_type_filter->expectCallCount('canPass', 2);
 140      $this->content_type_filter->setReturnValueAt(0 ,'canPass', true, array($content_type1));
 141      $this->content_type_filter->setReturnValueAt(1 ,'canPass', true, array($content_type2));
 142  
 143      $links1 = array(new lmbUri('index.html'), new lmbUri('level1/page1.html#anchor'));
 144      $links2 = array(new lmbUri('../index.html'), new lmbUri('page1.html'));
 145  
 146      $this->extractor->expectCallCount('extract', 2);
 147      $this->extractor->setReturnValue('extract', $links1, array($content1));
 148      $this->extractor->setReturnValue('extract', $links2, array($content2));
 149      $this->spider->crawl($uri);
 150    }
 151  }
 152  
 153  ?>


Generated: Mon Dec 1 03:56:46 2008 Cross-referenced by PHPXref 0.7