[WebSocket] Encoding

Added the ability to turn UTF-8 encoding checks off to increase performance
Separated encoding checks into its own set of classes
Encoding checks now use mbstring or iconv depending on availability
This commit is contained in:
Chris Boden 2012-07-14 16:19:16 -04:00
parent bb38fbb14b
commit 2f8bb3395b
6 changed files with 172 additions and 82 deletions

View File

@ -0,0 +1,31 @@
<?php
namespace Ratchet\WebSocket\Encoding;
class ToggleableValidator implements ValidatorInterface {
/**
* Toggle if checkEncoding checks the encoding or not
* @var bool
*/
public $on;
/**
* @var Validator
*/
private $validator;
public function __construct($on = true) {
$this->validator = new Validator;
$this->on = (boolean)$on;
}
/**
* {@inheritdoc}
*/
public function checkEncoding($str, $encoding) {
if (!(boolean)$this->on) {
return true;
}
return $this->validator->checkEncoding($str, $encoding);
}
}

93
Encoding/Validator.php Normal file
View File

@ -0,0 +1,93 @@
<?php
namespace Ratchet\WebSocket\Encoding;
/**
* This class handled encoding validation
*/
class Validator {
const UTF8_ACCEPT = 0;
const UTF8_REJECT = 1;
/**
* Incremental UTF-8 validator with constant memory consumption (minimal state).
*
* Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
* Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
*/
protected static $dfa = array(
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
);
/**
* Lookup if mbstring is available
* @var bool
*/
private $hasMbString = false;
/**
* Lookup if iconv is available
* @var bool
*/
private $hasIconv = false;
public function __construct() {
$this->hasMbString = extension_loaded('mbstring');
$this->hasIconv = extension_loaded('iconv');
}
/**
* @param string The value to check the encoding
* @param string The type of encoding to check against
* @return bool
*/
public function checkEncoding($str, $against) {
if ('UTF-8' == $against) {
return $this->isUtf8($str);
}
if ($this->hasMbString) {
return mb_check_encoding($str, $against);
} elseif ($this->hasIconv) {
return ($str == iconv($against, "{$against}//IGNORE", $str));
}
return true;
}
protected function isUtf8($str) {
if ($this->hasMbString) {
if (false === mb_check_encoding($str, 'UTF-8')) {
return false;
}
} elseif ($this->hasIconv) {
if ($str != iconv('UTF-8', 'UTF-8//IGNORE', $str)) {
return false;
}
}
$state = static::UTF8_ACCEPT;
for ($i = 0, $len = strlen($str); $i < $len; $i++) {
$state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
if (static::UTF8_REJECT === $state) {
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,12 @@
<?php
namespace Ratchet\WebSocket\Encoding;
interface ValidatorInterface {
/**
* Verify a string matches the encoding type
* @param string The string to check
* @param string The encoding type to check against
* @return bool
*/
function checkEncoding($str, $encoding);
}

View File

@ -11,19 +11,4 @@ class HyBi10 extends RFC6455 {
public function getVersionNumber() { public function getVersionNumber() {
return 6; return 6;
} }
/**
* @return HyBi10\Message
* /
public function newMessage() {
return new HyBi10\Message;
}
/**
* @return HyBi10\Frame
* /
public function newFrame() {
return new HyBi10\Frame;
}
/**/
} }

View File

@ -6,6 +6,8 @@ use Ratchet\WebSocket\Version\RFC6455\HandshakeVerifier;
use Ratchet\WebSocket\Version\RFC6455\Message; use Ratchet\WebSocket\Version\RFC6455\Message;
use Ratchet\WebSocket\Version\RFC6455\Frame; use Ratchet\WebSocket\Version\RFC6455\Frame;
use Ratchet\WebSocket\Version\RFC6455\Connection; use Ratchet\WebSocket\Version\RFC6455\Connection;
use Ratchet\WebSocket\Encoding\ValidatorInterface;
use Ratchet\WebSocket\Encoding\Validator;
use Guzzle\Http\Message\RequestInterface; use Guzzle\Http\Message\RequestInterface;
use Guzzle\Http\Message\Response; use Guzzle\Http\Message\Response;
@ -16,32 +18,6 @@ use Guzzle\Http\Message\Response;
class RFC6455 implements VersionInterface { class RFC6455 implements VersionInterface {
const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11'; const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
const UTF8_ACCEPT = 0;
const UTF8_REJECT = 1;
/**
* Incremental UTF-8 validator with constant memory consumption (minimal state).
*
* Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
* Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
*/
public static $dfa = array(
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
);
/** /**
* @var RFC6455\HandshakeVerifier * @var RFC6455\HandshakeVerifier
*/ */
@ -54,16 +30,19 @@ class RFC6455 implements VersionInterface {
private $closeCodes = array(); private $closeCodes = array();
/** /**
* Lookup if mbstring is available * @var Ratchet\WebSocket\Encoding\ValidatorInterface
* @var bool
*/ */
private $hasMbString = false; protected $validator;
public function __construct() { public function __construct(ValidatorInterface $validator = null) {
$this->_verifier = new HandshakeVerifier; $this->_verifier = new HandshakeVerifier;
$this->setCloseCodes(); $this->setCloseCodes();
$this->hasMbString = extension_loaded('mbstring'); if (null === $validator) {
$validator = new Validator;
}
$this->validator = $validator;
} }
/** /**
@ -169,7 +148,7 @@ class RFC6455 implements VersionInterface {
return $from->close($frame::CLOSE_PROTOCOL); return $from->close($frame::CLOSE_PROTOCOL);
} }
if (!$this->isUtf8(substr($bin, 2))) { if (!$this->validator->checkEncoding(substr($bin, 2), 'UTF-8')) {
return $from->close($frame::CLOSE_BAD_PAYLOAD); return $from->close($frame::CLOSE_BAD_PAYLOAD);
} }
@ -214,7 +193,7 @@ class RFC6455 implements VersionInterface {
$parsed = $from->WebSocket->message->getPayload(); $parsed = $from->WebSocket->message->getPayload();
unset($from->WebSocket->message); unset($from->WebSocket->message);
if (!$this->isUtf8($parsed)) { if (!$this->validator->checkEncoding($parsed, 'UTF-8')) {
return $from->close(Frame::CLOSE_BAD_PAYLOAD); return $from->close(Frame::CLOSE_BAD_PAYLOAD);
} }
@ -236,7 +215,7 @@ class RFC6455 implements VersionInterface {
/** /**
* @return RFC6455\Frame * @return RFC6455\Frame
*/ */
public function newFrame($payload = null, $final = true, $opcode = 1) { public function newFrame($payload = null, $final = null, $opcode = null) {
return new Frame($payload, $final, $opcode); return new Frame($payload, $final, $opcode);
} }
@ -284,35 +263,4 @@ class RFC6455 implements VersionInterface {
$this->closeCodes[Frame::CLOSE_SRV_ERR] = true; $this->closeCodes[Frame::CLOSE_SRV_ERR] = true;
//$this->closeCodes[Frame::CLOSE_TLS] = true; //$this->closeCodes[Frame::CLOSE_TLS] = true;
} }
/**
* Determine if a string is a valid UTF-8 string
* @param string
* @return bool
*/
function isUtf8($str) {
if ($this->hasMbString && false === mb_check_encoding($str, 'UTF-8')) {
return false;
}
$len = strlen($str);
// The secondary method of checking is painfully slow
// If the message is more than 10kb, skip UTF-8 checks
if ($len > 10000) {
return true;
}
$state = static::UTF8_ACCEPT;
for ($i = 0; $i < $len; $i++) {
$state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
if (static::UTF8_REJECT === $state) {
return false;
}
}
return true;
}
} }

View File

@ -3,6 +3,7 @@ namespace Ratchet\WebSocket;
use Ratchet\MessageComponentInterface; use Ratchet\MessageComponentInterface;
use Ratchet\ConnectionInterface; use Ratchet\ConnectionInterface;
use Ratchet\WebSocket\Version; use Ratchet\WebSocket\Version;
use Ratchet\WebSocket\Encoding\ToggleableValidator;
use Guzzle\Http\Message\Response; use Guzzle\Http\Message\Response;
/** /**
@ -44,6 +45,11 @@ class WsServer implements MessageComponentInterface {
*/ */
protected $acceptedSubProtocols = array(); protected $acceptedSubProtocols = array();
/**
* @var Ratchet\WebSocket\Encoding\ValidatorInterface
*/
protected $validator;
/** /**
* Flag if we have checked the decorated component for sub-protocols * Flag if we have checked the decorated component for sub-protocols
* @var boolean * @var boolean
@ -56,10 +62,11 @@ class WsServer implements MessageComponentInterface {
public function __construct(MessageComponentInterface $component) { public function __construct(MessageComponentInterface $component) {
$this->reqParser = new HttpRequestParser; $this->reqParser = new HttpRequestParser;
$this->versioner = new VersionManager; $this->versioner = new VersionManager;
$this->validator = new ToggleableValidator;
$this->versioner $this->versioner
->enableVersion(new Version\RFC6455($component)) ->enableVersion(new Version\RFC6455($this->validator))
->enableVersion(new Version\HyBi10($component)) ->enableVersion(new Version\HyBi10($this->validator))
->enableVersion(new Version\Hixie76) ->enableVersion(new Version\Hixie76)
; ;
@ -147,9 +154,23 @@ class WsServer implements MessageComponentInterface {
/** /**
* Disable a specific version of the WebSocket protocol * Disable a specific version of the WebSocket protocol
* @param int Version ID to disable * @param int Version ID to disable
* @return WsServer
*/ */
public function disableVersion($versionId) { public function disableVersion($versionId) {
$this->versioner->disableVersion($versionId); $this->versioner->disableVersion($versionId);
return $this;
}
/**
* Toggle weather to check encoding of incoming messages
* @param bool
* @return WsServer
*/
public function setEncodingChecks($opt) {
$this->validator->on = (boolean)$opt;
return $this;
} }
/** /**