| [ Index ] |
PHP Cross Reference of Limb3 |
[Summary view] [Print] [Text view]
1 <?php 2 /* 3 * Limb PHP Framework 4 * 5 * @link http://limb-project.com 6 * @copyright Copyright © 2004-2007 BIT(http://bit-creative.com) 7 * @license LGPL http://www.gnu.org/copyleft/lesser.html 8 */ 9 lmb_require('limb/net/src/lmbUri.class.php'); 10 lmb_require('limb/web_spider/src/lmbWebSpider.class.php'); 11 lmb_require('limb/web_spider/src/lmbContentTypeFilter.class.php'); 12 lmb_require('limb/web_spider/src/lmbUriFilter.class.php'); 13 lmb_require('limb/web_spider/src/lmbUriExtractor.class.php'); 14 lmb_require('limb/web_spider/src/lmbUriNormalizer.class.php'); 15 lmb_require('limb/web_spider/src/lmbUriContentReader.class.php'); 16 17 class TestingSpiderObserver 18 { 19 function notify($reader){} 20 } 21 22 Mock :: generate('TestingSpiderObserver', 'MockWebSpiderObserver'); 23 Mock :: generate('lmbUriExtractor', 'MockUriExtractor'); 24 Mock :: generate('lmbUriNormalizer', 'MockUriNormalizer'); 25 Mock :: generate('lmbUriFilter', 'MockUriFilter'); 26 Mock :: generate('lmbContentTypeFilter', 'MockContentTypeFilter'); 27 Mock :: generate('lmbUriContentReader', 'MockUriContentReader'); 28 29 class lmbWebSpiderTest extends UnitTestCase 30 { 31 var $spider; 32 var $observer; 33 var $extractor; 34 var $uri_filter; 35 var $content_type_filter; 36 var $normalizer; 37 var $reader; 38 39 function setUp() 40 { 41 $this->observer = new MockWebSpiderObserver(); 42 $this->extractor = new MockUriExtractor(); 43 $this->uri_filter = new MockUriFilter(); 44 $this->content_type_filter = new MockContentTypeFilter(); 45 $this->normalizer = new MockUriNormalizer(); 46 $this->reader = new MockUriContentReader(); 47 48 $this->spider = new lmbWebSpider(); 49 $this->spider->registerObserver($this->observer); 50 $this->spider->setUriExtractor($this->extractor); 51 $this->spider->setUriFilter($this->uri_filter); 52 $this->spider->setContentTypeFilter($this->content_type_filter); 53 $this->spider->setUriNormalizer($this->normalizer); 54 $this->spider->setUriContentReader($this->reader); 55 } 56 57 function tearDown() 58 { 59 } 60 61 function testContentTypeFiltering() 62 { 63 $uri = new lmbUri('http://some.host/whatever.html'); 64 65 $this->uri_filter->expectOnce('canPass'); 66 $this->uri_filter->setReturnValue('canPass', true, array($uri)); 67 $this->normalizer->expectOnce('process'); 68 69 $this->reader->expectOnce('open', array($uri)); 70 $this->reader->setReturnValue('getContentType', $content_type = 'whatever'); 71 $this->reader->expectNever('getContent'); 72 73 $this->content_type_filter->expectOnce('canPass', array($content_type)); 74 $this->content_type_filter->setReturnValue('canPass', false, array($content_type)); 75 76 $this->extractor->expectNever('extract'); 77 $this->observer->expectNever('notify'); 78 79 $this->spider->crawl($uri); 80 } 81 82 function testNotifyObservers() 83 { 84 $one_more_observer = new MockWebSpiderObserver($this); 85 $this->spider->registerObserver($one_more_observer); 86 87 $uri = new lmbUri('http://some.host/whatever.html'); 88 89 $this->uri_filter->expectOnce('canPass'); 90 $this->uri_filter->setReturnValue('canPass', true, array($uri)); 91 $this->normalizer->expectOnce('process'); 92 93 $this->reader->expectOnce('open', array($uri)); 94 $this->reader->setReturnValue('getContent', $content = 'whatever'); 95 $this->reader->setReturnValue('getContentType', $content_type = 'whatever'); 96 97 $this->content_type_filter->expectOnce('canPass', array($content_type)); 98 $this->content_type_filter->setReturnValue('canPass', true, array($content_type)); 99 100 $this->extractor->expectOnce('extract', array($content)); 101 $this->extractor->setReturnValue('extract', array(), array($content)); 102 103 $this->observer->expectOnce('notify', array(new IsAExpectation('MockUriContentReader'))); 104 $one_more_observer->expectOnce('notify', array(new IsAExpectation('MockUriContentReader'))); 105 106 $this->spider->crawl($uri); 107 108 $one_more_observer->tally(); 109 } 110 111 function testCrawlCacheHitComplexUrl() 112 { 113 $this->observer->expectCallCount('notify', 2); 114 115 $uri = new lmbUri('http://example.com/index.html'); 116 $uri_normalized_by_spider = new lmbUri('http://example.com/level1/page1.html'); 117 118 $this->uri_filter->expectCallCount('canPass', 2); 119 $this->uri_filter->setReturnValueAt(0, 'canPass', true, array($uri)); 120 $this->uri_filter->setReturnValueAt(1, 'canPass', true, array($uri_normalized_by_spider)); 121 122 $this->normalizer->expectCallCount('process', 5); 123 $this->normalizer->expectArgumentsAt(0, 'process', array($uri)); 124 $this->normalizer->expectArgumentsAt(1, 'process', array($uri)); 125 $this->normalizer->expectArgumentsAt(2, 'process', array($uri_normalized_by_spider)); 126 $this->normalizer->expectArgumentsAt(3, 'process', array($uri)); 127 $this->normalizer->expectArgumentsAt(4, 'process', array($uri_normalized_by_spider)); 128 129 $this->reader->expectCallCount('open', 2); 130 $this->reader->expectArgumentsAt(0, 'open', array($uri)); 131 $this->reader->expectArgumentsAt(1, 'open', array($uri_normalized_by_spider)); 132 133 $this->reader->expectCallCount('getContent', 2); 134 $this->reader->setReturnValueAt(0, 'getContent', $content1 = 'whatever1'); 135 $this->reader->setReturnValueAt(0, 'getContentType', $content_type1 = 'type1'); 136 $this->reader->setReturnValueAt(1, 'getContent', $content2 = 'whatever2'); 137 $this->reader->setReturnValueAt(1, 'getContentType', $content_type2 = 'type2'); 138 139 $this->content_type_filter->expectCallCount('canPass', 2); 140 $this->content_type_filter->setReturnValueAt(0 ,'canPass', true, array($content_type1)); 141 $this->content_type_filter->setReturnValueAt(1 ,'canPass', true, array($content_type2)); 142 143 $links1 = array(new lmbUri('index.html'), new lmbUri('level1/page1.html#anchor')); 144 $links2 = array(new lmbUri('../index.html'), new lmbUri('page1.html')); 145 146 $this->extractor->expectCallCount('extract', 2); 147 $this->extractor->setReturnValue('extract', $links1, array($content1)); 148 $this->extractor->setReturnValue('extract', $links2, array($content2)); 149 $this->spider->crawl($uri); 150 } 151 } 152 153 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Mon Dec 1 03:56:46 2008 | Cross-referenced by PHPXref 0.7 |