[ Index ]

PHP Cross Reference of Limb3

title

Body

[close]

/search/src/indexer/ -> lmbSearchTextNormalizer.class.php (source)

   1  <?php
   2  /*
   3   * Limb PHP Framework
   4   *
   5   * @link http://limb-project.com 
   6   * @copyright  Copyright &copy; 2004-2007 BIT(http://bit-creative.com)
   7   * @license    LGPL http://www.gnu.org/copyleft/lesser.html 
   8   */
   9  
  10  /**

  11   * class lmbSearchTextNormalizer.

  12   *

  13   * @package search

  14   * @version $Id: lmbSearchTextNormalizer.class.php 5945 2007-06-06 08:31:43Z pachanga $

  15   */
  16  class lmbSearchTextNormalizer
  17  {
  18    function process($content)
  19    {
  20      $content = lmb_strtolower($content);
  21  
  22      $content = str_replace("\n", ' ', $content );
  23      $content = str_replace("\t", ' ', $content );
  24      $content = str_replace("\r", ' ', $content );
  25  
  26      $search = array (
  27                  "'<script[^>]*?>.*?</script>'siu",      // Strip out javascript
  28                  "'<[\/\!]*?[^<>]*?>'siu",               // Strip out html tags
  29                  "'([\r\n])[\s]+'u"                       // Strip out white space
  30                );
  31  
  32      $replace = array ('',
  33                       ' ',
  34                       ' ');
  35  
  36      $content = preg_replace ($search, $replace, $content);
  37  
  38      $content = preg_replace("#(\.){2,}#", ' ', $content );

  39      $content = preg_replace("#^\.#", ' ', $content);
  40      $content = preg_replace("#\s\.#", ' ', $content );
  41      $content = preg_replace("#\.\s#", ' ', $content);
  42      $content = preg_replace("#\.$#", ' ', $content);
  43  
  44      $content = preg_replace( "#(\s|^)(\"|'|`)(\w)#", '\\1\\3', $content);
  45      $content = preg_replace( "#(\w)(\"|'|`)(\s|$)#", '\\1\\3', $content);
  46  
  47      $content = str_replace("&nbsp;", ' ', $content );
  48      $content = str_replace(":", ' ', $content );
  49      $content = str_replace(",", ' ', $content );
  50      $content = str_replace(";", ' ', $content );
  51      $content = str_replace("(", ' ', $content );
  52      $content = str_replace(")", ' ', $content );
  53      $content = str_replace("-", ' ', $content );
  54      $content = str_replace("+", ' ', $content );
  55      $content = str_replace("/", ' ', $content );
  56      $content = str_replace("!", ' ', $content );
  57      $content = str_replace("?", ' ', $content );
  58      $content = str_replace("[", ' ', $content );
  59      $content = str_replace("]", ' ', $content );
  60      $content = str_replace("$", ' ', $content );
  61      $content = str_replace("\\", ' ', $content );
  62      $content = str_replace("<", ' ', $content );
  63      $content = str_replace(">", ' ', $content );
  64      $content = str_replace("*", ' ', $content );
  65  
  66      $content = trim(preg_replace("(\s+)", ' ', $content));
  67  
  68      return $content;
  69    }
  70  }
  71  
  72  
  73  ?>


Generated: Sat Nov 22 03:48:54 2008 Cross-referenced by PHPXref 0.7