 2f8bb3395b
			
		
	
	
		2f8bb3395b
		
	
	
	
	
		
			
			Added the ability to turn UTF-8 encoding checks off to increase performance Separated encoding checks into its own set of classes Encoding checks now use mbstring or iconv depending on availability
		
			
				
	
	
		
			93 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			93 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| namespace Ratchet\WebSocket\Encoding;
 | |
| 
 | |
| /**
 | |
|  * This class handled encoding validation
 | |
|  */
 | |
| class Validator {
 | |
|     const UTF8_ACCEPT = 0;
 | |
|     const UTF8_REJECT = 1;
 | |
| 
 | |
|     /**
 | |
|      * Incremental UTF-8 validator with constant memory consumption (minimal state).
 | |
|      *
 | |
|      * Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
 | |
|      * Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
 | |
|      */
 | |
|     protected static $dfa = array(
 | |
|         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
 | |
|         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
 | |
|         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
 | |
|         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
 | |
|         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
 | |
|         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
 | |
|         8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
 | |
|         0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
 | |
|         0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
 | |
|         0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
 | |
|         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
 | |
|         1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
 | |
|         1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
 | |
|         1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
 | |
|     );
 | |
| 
 | |
|     /**
 | |
|      * Lookup if mbstring is available
 | |
|      * @var bool
 | |
|      */
 | |
|      private $hasMbString = false;
 | |
| 
 | |
|      /**
 | |
|       * Lookup if iconv is available
 | |
|       * @var bool
 | |
|       */
 | |
|      private $hasIconv = false;
 | |
| 
 | |
|     public function __construct() {
 | |
|         $this->hasMbString = extension_loaded('mbstring');
 | |
|         $this->hasIconv    = extension_loaded('iconv');
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * @param string The value to check the encoding
 | |
|      * @param string The type of encoding to check against
 | |
|      * @return bool
 | |
|      */
 | |
|     public function checkEncoding($str, $against) {
 | |
|         if ('UTF-8' == $against) {
 | |
|             return $this->isUtf8($str);
 | |
|         }
 | |
| 
 | |
|         if ($this->hasMbString) {
 | |
|             return mb_check_encoding($str, $against);
 | |
|         } elseif ($this->hasIconv) {
 | |
|             return ($str == iconv($against, "{$against}//IGNORE", $str));
 | |
|         }
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     protected function isUtf8($str) {
 | |
|         if ($this->hasMbString) {
 | |
|             if (false === mb_check_encoding($str, 'UTF-8')) {
 | |
|                 return false;
 | |
|             }
 | |
|         } elseif ($this->hasIconv) {
 | |
|             if ($str != iconv('UTF-8', 'UTF-8//IGNORE', $str)) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         $state = static::UTF8_ACCEPT;
 | |
| 
 | |
|         for ($i = 0, $len   = strlen($str); $i < $len; $i++) {
 | |
|             $state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
 | |
| 
 | |
|             if (static::UTF8_REJECT === $state) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| } |