| [ Index ] |
PHP Cross Reference of Limb3 |
[Summary view] [Print] [Text view]
1 <?php 2 /* 3 * Limb PHP Framework 4 * 5 * @link http://limb-project.com 6 * @copyright Copyright © 2004-2007 BIT(http://bit-creative.com) 7 * @license LGPL http://www.gnu.org/copyleft/lesser.html 8 */ 9 10 // This class is based on Harry Fuecks' phputf8 library code(http://sourceforge.net/projects/phputf8) 11 // and original ideas taken from http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php 12 13 /** 14 * class lmbUTF8BaseDriver. 15 * 16 * @package i18n 17 * @version $Id: lmbUTF8BaseDriver.class.php 5945 2007-06-06 08:31:43Z pachanga $ 18 */ 19 class lmbUTF8BaseDriver { 20 /** 21 * URL-Encode a filename to allow unicodecharacters 22 * 23 * Slashes are not encoded 24 * 25 * When the second parameter is true the string will 26 * be encoded only if non ASCII characters are detected - 27 * This makes it safe to run it multiple times on the 28 * same string (default is true) 29 * 30 * @author Andreas Gohr <andi@splitbrain.org> 31 * @see urlencode 32 */ 33 function UTF8EncodeFN($file, $safe = true) { 34 if ($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#', $file)) 35 return $file; 36 37 $file = urlencode($file); 38 $file = str_replace('%2F', '/', $file); 39 return $file; 40 } 41 42 /** 43 * URL-Decode a filename 44 * 45 * This is just a wrapper around urldecode 46 * 47 * @author Andreas Gohr <andi@splitbrain.org> 48 * @see urldecode 49 */ 50 function UTF8DecodeFN($file) { 51 $file = urldecode($file); 52 return $file; 53 } 54 55 /** 56 * Checks if a string contains 7bit ASCII only 57 * 58 * @author Andreas Gohr <andi@splitbrain.org> 59 */ 60 function isASCII($str) { 61 for($i = 0; $i < strlen($str); $i++) 62 if (ord($str{$i}) > 127) return false; 63 64 return true; 65 } 66 67 /** 68 * Strips all highbyte chars 69 * 70 * Returns a pure ASCII7 string 71 * 72 * @author Andreas Gohr <andi@splitbrain.org> 73 */ 74 function UTF8Strip($str) { 75 $ascii = ''; 76 for($i = 0; $i < strlen($str); $i++) { 77 if (ord($str{$i}) < 128) 78 $ascii .= $str{$i}; 79 } 80 return $ascii; 81 } 82 83 /** 84 * Tries to detect if a string is in utf8 encoding 85 * 86 * @author <bmorel@ssi.fr> 87 * @link http://www.php.net/manual/en/function.utf8-encode.php 88 */ 89 function UTF8Check($str) { 90 for($i = 0; $i < strlen($str); $i++) { 91 if (ord($str[$i]) < 0x80) continue; # 0bbbbbbb 92 elseif ((ord($str[$i]) &0xE0) == 0xC0) $n = 1; # 110bbbbb 93 elseif ((ord($str[$i]) &0xF0) == 0xE0) $n = 2; # 1110bbbb 94 elseif ((ord($str[$i]) &0xF8) == 0xF0) $n = 3; # 11110bbb 95 elseif ((ord($str[$i]) &0xFC) == 0xF8) $n = 4; # 111110bb 96 elseif ((ord($str[$i]) &0xFE) == 0xFC) $n = 5; # 1111110b 97 else return false; # Does not match any model 98 99 // n bytes matching 10bbbbbb follow ? 100 for($j = 0; $j < $n; $j++) { 101 if ((++$i == strlen($str)) || ((ord($str[$i]) &0xC0) != 0x80)) 102 return false; 103 } 104 } 105 return true; 106 } 107 108 /** 109 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 110 * 111 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 112 * letters. Default is to deaccent both cases ($case = 0) 113 * 114 * @author Andreas Gohr <andi@splitbrain.org> 115 */ 116 function UTF8Deaccent($string, $case = 0) { 117 if ($case <= 0) { 118 global $UTF8_LOWER_ACCENTS; 119 $string = str_replace(array_keys($UTF8_LOWER_ACCENTS), array_values($UTF8_LOWER_ACCENTS), $string); 120 } 121 if ($case >= 0) { 122 global $UTF8_UPPER_ACCENTS; 123 $string = str_replace(array_keys($UTF8_UPPER_ACCENTS), array_values($UTF8_UPPER_ACCENTS), $string); 124 } 125 return $string; 126 } 127 128 /** 129 * Removes special characters (nonalphanumeric) from a UTF-8 string 130 * 131 * Be sure to specify all specialchars you give in $repl in $keep, too 132 * or it won't work. 133 * 134 * This function adds the controlchars 0x00 to 0x19 to the array of 135 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 136 * 137 * @author Andreas Gohr <andi@splitbrain.org> 138 * @param string $string The UTF8 string to strip of special chars 139 * @param string $repl Replace special with this string 140 * @param string $keep Special chars to keep (in UTF8) 141 */ 142 function UTF8StripSpecials($string, $repl = '', $keep = '') { 143 global $UTF8_SPECIAL_CHARS; 144 if ($keep != '') 145 $specials = array_diff($UTF8_SPECIAL_CHARS, $this->toUnicode($keep)); 146 else 147 $specials = $UTF8_SPECIAL_CHARS; 148 149 $specials = $this->toUTF8($specials); 150 $specials = preg_quote($specials, '/'); 151 152 return preg_replace('/[\x00-\x19' . $specials . ']/u', $repl, $string); 153 } 154 155 /** 156 * UTF8 aware replacement for strlen() 157 * 158 * utf8_decode() converts characters that are not in ISO-8859-1 159 * to '?', which, for the purpose of counting, is alright - It's 160 * even faster than mb_strlen. 161 * 162 * @author <chernyshevsky at hotmail dot com> 163 * @see strlen 164 * @see utf8_decode 165 */ 166 function _strlen($string) { 167 return strlen(utf8_decode($string)); 168 } 169 170 /** 171 * UTF8 aware replacement for substr() 172 * 173 * @todo Handle negative positions etc. 174 * @author Harry Fuecks <hfuecks@gmail.com> 175 * @see substr 176 */ 177 function _substr($str, $start, $length=null) { 178 $start = (int)$start; 179 if (!is_null($length)) $length = (int)$length; 180 181 $strlen = $this->_strlen($str); 182 183 if (!is_null($length) && abs($length) > $strlen) 184 $length = ($length > 0) ? $strlen : -1 * $strlen; 185 186 if ($start < 0) 187 $start = $strlen + $start; 188 189 if ($length < 0) 190 $length = $strlen + $length - $start; 191 192 if (is_null($length) || $length >= $strlen) 193 $length = '*'; 194 else 195 $length = '{0,' . $length . '}'; 196 197 $pattern = '/^.{' . $start . '}(.' . $length . ')/us'; 198 preg_match($pattern, $str, $matches); 199 200 if (isset($matches[1])) 201 return $matches[1]; 202 203 return false; 204 } 205 206 /** 207 * UTF8 aware replacement for strrepalce() 208 * 209 * @todo support PHP5 count (fourth arg) 210 * @author Harry Fuecks <hfuecks@gmail.com> 211 * @see str_replace(); 212 */ 213 function _str_replace($s, $r, $str) { 214 if (!is_array($s)) { 215 $s = '!' . preg_quote($s, '!') . '!u'; 216 } else { 217 foreach ($s as $k => $v) 218 $s[$k] = '!' . preg_quote($v) . '!u'; 219 } 220 return preg_replace($s, $r, $str); 221 } 222 223 /** 224 * UTF8 aware replacement for ltrim() 225 * 226 * @author Andreas Gohr <andi@splitbrain.org> 227 * @see ltrim 228 * @return string 229 */ 230 function _ltrim($str, $charlist = '') { 231 if ($charlist == '') 232 return ltrim($str); 233 234 $chars = preg_split('//u', $charlist, -1, PREG_SPLIT_NO_EMPTY); 235 $regex = '(' . implode('|', array_map('preg_quote', $chars)) . ')'; 236 237 return preg_replace('/^' . $regex . '+/u', '', $str); 238 } 239 240 /** 241 * UTF8 aware replacement for ltrim() 242 * 243 * @author Andreas Gohr <andi@splitbrain.org> 244 * @see rtrim 245 * @return string 246 */ 247 function _rtrim($str, $charlist = '') { 248 if ($charlist == '') 249 return rtrim($str); 250 251 $chars = preg_split('//u', $charlist, -1, PREG_SPLIT_NO_EMPTY); 252 $regex = '(' . implode('|', array_map('preg_quote', $chars)) . ')'; 253 254 return preg_replace('/' . $regex . '+$/u', '', $str); 255 } 256 257 /** 258 * UTF8 aware replacement for trim() 259 * 260 * @author Andreas Gohr <andi@splitbrain.org> 261 * @see trim 262 * @return string 263 */ 264 function _trim($str, $charlist = '') { 265 if ($charlist == '') 266 return trim($str); 267 268 return $this->_ltrim($this->_rtrim($str, $charlist), $charlist); 269 } 270 271 /** 272 * This is a unicode aware replacement for strtolower() 273 * 274 * @author Andreas Gohr <andi@splitbrain.org> 275 * @see strtolower 276 */ 277 function _strtolower($string) { 278 global $UTF8_UPPER_TO_LOWER; 279 $uni = $this->toUnicode($string); 280 for($i = 0; $i < count($uni); $i++) { 281 if (isset($UTF8_UPPER_TO_LOWER[$uni[$i]])) 282 $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; 283 } 284 return $this->toUTF8($uni); 285 } 286 287 /** 288 * This is a unicode aware replacement for strtoupper() 289 * 290 * @author Andreas Gohr <andi@splitbrain.org> 291 * @see strtoupper 292 */ 293 function _strtoupper($string) { 294 global $UTF8_LOWER_TO_UPPER; 295 $uni = $this->toUnicode($string); 296 for($i = 0; $i < count($uni); $i++) { 297 if (isset($UTF8_LOWER_TO_UPPER[$uni[$i]])) 298 $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; 299 } 300 return $this->toUTF8($uni); 301 } 302 303 /** 304 * This is an UTF8 aware replacement for strpos 305 * 306 * @author Harry Fuecks <hfuecks@gmail.com> 307 * @see strpos 308 */ 309 function _strpos($haystack, $needle, $offset=false) { 310 if ($offset === false) { 311 $ar = explode($needle, $haystack); 312 if (count($ar) > 1) 313 return $this->_strlen($ar[0]); 314 315 return false; 316 } else { 317 if (!is_int($offset)) { 318 trigger_error('Offset must be an integer', E_USER_WARNING); 319 return false; 320 } 321 322 $haystack = $this->_substr($haystack, $offset); 323 324 if (false !== ($pos = $this->_strpos($haystack, $needle))) 325 return $pos + $offset; 326 327 return false; 328 } 329 } 330 331 /** 332 * This is an UTF-8 aware alternative to strrpos 333 * 334 * Find position of last occurrence of a char in a string 335 * Note: This will get alot slower if offset is used 336 * @author Harry Fuecks <hfuecks@gmail.com> 337 */ 338 function _strrpos($str, $needle, $offset=false) { 339 if ($offset === false) { 340 $ar = explode($needle, $str); 341 if ( count($ar) > 1 ) { 342 // Pop off the end of the string where the last match was made 343 array_pop($ar); 344 $str = join($needle,$ar); 345 return $this->_strlen($str); 346 } 347 return false; 348 } else { 349 if ( !is_int($offset) ) { 350 trigger_error('_strrpos: Offset must be an integer', E_USER_ERROR); 351 return false; 352 } 353 $str = $this->_substr($str, $offset); 354 if ( false !== ( $pos = $this->_strrpos($str, $needle) ) ) { 355 return $pos + $offset; 356 } 357 return false; 358 } 359 } 360 361 /* 362 * This is UTF-8 aware alternative to ucfirst 363 * 364 * Make a string's first character uppercase 365 * @author Harry Fuecks <hfuecks@gmail.com> 366 */ 367 function _ucfirst($str) { 368 //the regex below doesn't work :( 369 //preg_match('/^(\w{1})(.*)$/us', $str, $matches); 370 preg_match('/^(.)(.*)$/us', $str, $matches); 371 372 if ( isset($matches[1]) && isset($matches[2]) ) { 373 return $this->_strtoupper($matches[1]) . $matches[2]; 374 } else { 375 return $str; 376 } 377 } 378 379 /* 380 * UTF-8 aware alternative to strcasecmp 381 * A case insensivite string comparison 382 * 383 * @author Harry Fuecks <hfuecks@gmail.com> 384 */ 385 function _strcasecmp($strX, $strY) { 386 return strcmp($this->_strtolower($strX), 387 $this->_strtolower($strY)); 388 } 389 390 /** 391 * UTF-8 aware alternative to substr_count 392 * 393 */ 394 function _substr_count($haystack, $needle) { 395 if(preg_match_all('/(' . preg_quote($needle) . ')/u', $haystack, $matches)) { 396 return sizeof($matches[1]); 397 } 398 return 0; 399 } 400 401 /** 402 * UTF-8 aware alternative to str_split 403 * Convert a string to an array 404 * 405 * @author Harry Fuecks <hfuecks@gmail.com> 406 */ 407 function _str_split($str, $split_len=1) { 408 $split_len = (int)$split_len; 409 if ( !preg_match('/^[0-9]+$/',$split_len) || $split_len < 1 ) { 410 return false; 411 } 412 413 $len = $this->_strlen($str); 414 if ( $len <= $split_len ) { 415 return array($str); 416 } 417 418 preg_match_all('/.{'.$split_len.'}|[^\x00]{1,'.$split_len.'}$/us', $str, $ar); 419 return $ar[0]; 420 } 421 422 /* 423 * This is UTF-8 aware alternative to preg_match 424 */ 425 function _preg_match($pattern, $subject, &$matches, $flags=null, $offset=null) { 426 if(!is_null($flags) && !is_null($offset)) { 427 return preg_match($pattern . 'u', $subject, $matches, $flags, $offset); 428 } elseif (is_null($flags) && !is_null($offset)) { 429 return preg_match($pattern .'u', $subject, $matches, $flags); 430 } else { 431 return preg_match($pattern . 'u', $subject, $matches); 432 } 433 } 434 435 /* 436 * This is UTF-8 aware alternative to preg_match_all 437 */ 438 function _preg_match_all($pattern, $subject, &$matches, $flags=null, $offset=null) { 439 if(!is_null($flags) && !is_null($offset)) { 440 return preg_match_all($pattern . 'u', $subject, $matches, $flags, $offset); 441 } elseif (is_null($flags) && !is_null($offset)) { 442 return preg_match_all($pattern .'u', $subject, $matches, $flags); 443 } else { 444 return preg_match_all($pattern . 'u', $subject, $matches); 445 } 446 } 447 448 /* 449 * This is UTF-8 aware alternative to preg_replace 450 */ 451 function _preg_replace($pattern, $replacement, $subject, $limit=null) { 452 if(!is_null($limit)) { 453 return preg_replace($pattern .'u', $replacement, $subject, $limit); 454 } else { 455 return preg_replace($pattern .'u', $replacement, $subject); 456 } 457 } 458 459 /* 460 * This is UTF-8 aware alternative to _preg_replace_callback 461 */ 462 function _preg_replace_callback($pattern, $callback, $subject, $limit=null) { 463 if(!is_null($limit)) { 464 return preg_replace_callback($pattern .'u', $callback, $subject, $limit); 465 } else { 466 return preg_replace_callback($pattern .'u', $callback, $subject); 467 } 468 } 469 470 /* 471 * This is UTF-8 aware alternative to preg_split 472 */ 473 function _preg_split($pattern, $subject, $limit=null, $flags=null) { 474 if(!is_null($limit) && !is_null($flags)) { 475 return preg_split($pattern . 'u', $subject, $limit, $flags); 476 } elseif (is_null($flags) && !is_null($limit)) { 477 return preg_split($pattern .'u', $subject, $limit); 478 } else { 479 return preg_split($pattern . 'u', $subject); 480 } 481 } 482 483 /** 484 * This function returns any UTF-8 encoded text as a list of 485 * Unicode values: 486 * 487 * @author Scott Michael Reynen <scott@randomchaos.com> 488 * @link http://www.randomchaos.com/document.php?source=php_and_unicode 489