[WebSocket] Encoding
Added the ability to turn UTF-8 encoding checks off to increase performance Separated encoding checks into its own set of classes Encoding checks now use mbstring or iconv depending on availability
This commit is contained in:
parent
bb38fbb14b
commit
2f8bb3395b
31
Encoding/ToggleableValidator.php
Normal file
31
Encoding/ToggleableValidator.php
Normal file
@ -0,0 +1,31 @@
|
||||
<?php
|
||||
namespace Ratchet\WebSocket\Encoding;
|
||||
|
||||
class ToggleableValidator implements ValidatorInterface {
|
||||
/**
|
||||
* Toggle if checkEncoding checks the encoding or not
|
||||
* @var bool
|
||||
*/
|
||||
public $on;
|
||||
|
||||
/**
|
||||
* @var Validator
|
||||
*/
|
||||
private $validator;
|
||||
|
||||
public function __construct($on = true) {
|
||||
$this->validator = new Validator;
|
||||
$this->on = (boolean)$on;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function checkEncoding($str, $encoding) {
|
||||
if (!(boolean)$this->on) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return $this->validator->checkEncoding($str, $encoding);
|
||||
}
|
||||
}
|
93
Encoding/Validator.php
Normal file
93
Encoding/Validator.php
Normal file
@ -0,0 +1,93 @@
|
||||
<?php
|
||||
namespace Ratchet\WebSocket\Encoding;
|
||||
|
||||
/**
|
||||
* This class handled encoding validation
|
||||
*/
|
||||
class Validator {
|
||||
const UTF8_ACCEPT = 0;
|
||||
const UTF8_REJECT = 1;
|
||||
|
||||
/**
|
||||
* Incremental UTF-8 validator with constant memory consumption (minimal state).
|
||||
*
|
||||
* Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
|
||||
* Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
|
||||
*/
|
||||
protected static $dfa = array(
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
|
||||
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
|
||||
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
|
||||
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
|
||||
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
|
||||
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
|
||||
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
|
||||
);
|
||||
|
||||
/**
|
||||
* Lookup if mbstring is available
|
||||
* @var bool
|
||||
*/
|
||||
private $hasMbString = false;
|
||||
|
||||
/**
|
||||
* Lookup if iconv is available
|
||||
* @var bool
|
||||
*/
|
||||
private $hasIconv = false;
|
||||
|
||||
public function __construct() {
|
||||
$this->hasMbString = extension_loaded('mbstring');
|
||||
$this->hasIconv = extension_loaded('iconv');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string The value to check the encoding
|
||||
* @param string The type of encoding to check against
|
||||
* @return bool
|
||||
*/
|
||||
public function checkEncoding($str, $against) {
|
||||
if ('UTF-8' == $against) {
|
||||
return $this->isUtf8($str);
|
||||
}
|
||||
|
||||
if ($this->hasMbString) {
|
||||
return mb_check_encoding($str, $against);
|
||||
} elseif ($this->hasIconv) {
|
||||
return ($str == iconv($against, "{$against}//IGNORE", $str));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected function isUtf8($str) {
|
||||
if ($this->hasMbString) {
|
||||
if (false === mb_check_encoding($str, 'UTF-8')) {
|
||||
return false;
|
||||
}
|
||||
} elseif ($this->hasIconv) {
|
||||
if ($str != iconv('UTF-8', 'UTF-8//IGNORE', $str)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
$state = static::UTF8_ACCEPT;
|
||||
|
||||
for ($i = 0, $len = strlen($str); $i < $len; $i++) {
|
||||
$state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
|
||||
|
||||
if (static::UTF8_REJECT === $state) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
12
Encoding/ValidatorInterface.php
Normal file
12
Encoding/ValidatorInterface.php
Normal file
@ -0,0 +1,12 @@
|
||||
<?php
|
||||
namespace Ratchet\WebSocket\Encoding;
|
||||
|
||||
interface ValidatorInterface {
|
||||
/**
|
||||
* Verify a string matches the encoding type
|
||||
* @param string The string to check
|
||||
* @param string The encoding type to check against
|
||||
* @return bool
|
||||
*/
|
||||
function checkEncoding($str, $encoding);
|
||||
}
|
@ -11,19 +11,4 @@ class HyBi10 extends RFC6455 {
|
||||
public function getVersionNumber() {
|
||||
return 6;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return HyBi10\Message
|
||||
* /
|
||||
public function newMessage() {
|
||||
return new HyBi10\Message;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return HyBi10\Frame
|
||||
* /
|
||||
public function newFrame() {
|
||||
return new HyBi10\Frame;
|
||||
}
|
||||
/**/
|
||||
}
|
@ -6,6 +6,8 @@ use Ratchet\WebSocket\Version\RFC6455\HandshakeVerifier;
|
||||
use Ratchet\WebSocket\Version\RFC6455\Message;
|
||||
use Ratchet\WebSocket\Version\RFC6455\Frame;
|
||||
use Ratchet\WebSocket\Version\RFC6455\Connection;
|
||||
use Ratchet\WebSocket\Encoding\ValidatorInterface;
|
||||
use Ratchet\WebSocket\Encoding\Validator;
|
||||
use Guzzle\Http\Message\RequestInterface;
|
||||
use Guzzle\Http\Message\Response;
|
||||
|
||||
@ -16,32 +18,6 @@ use Guzzle\Http\Message\Response;
|
||||
class RFC6455 implements VersionInterface {
|
||||
const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
|
||||
|
||||
const UTF8_ACCEPT = 0;
|
||||
const UTF8_REJECT = 1;
|
||||
|
||||
/**
|
||||
* Incremental UTF-8 validator with constant memory consumption (minimal state).
|
||||
*
|
||||
* Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
|
||||
* Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
|
||||
*/
|
||||
public static $dfa = array(
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
|
||||
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
|
||||
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
|
||||
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
|
||||
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
|
||||
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
|
||||
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
|
||||
);
|
||||
|
||||
/**
|
||||
* @var RFC6455\HandshakeVerifier
|
||||
*/
|
||||
@ -54,16 +30,19 @@ class RFC6455 implements VersionInterface {
|
||||
private $closeCodes = array();
|
||||
|
||||
/**
|
||||
* Lookup if mbstring is available
|
||||
* @var bool
|
||||
* @var Ratchet\WebSocket\Encoding\ValidatorInterface
|
||||
*/
|
||||
private $hasMbString = false;
|
||||
protected $validator;
|
||||
|
||||
public function __construct() {
|
||||
public function __construct(ValidatorInterface $validator = null) {
|
||||
$this->_verifier = new HandshakeVerifier;
|
||||
$this->setCloseCodes();
|
||||
|
||||
$this->hasMbString = extension_loaded('mbstring');
|
||||
if (null === $validator) {
|
||||
$validator = new Validator;
|
||||
}
|
||||
|
||||
$this->validator = $validator;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -169,7 +148,7 @@ class RFC6455 implements VersionInterface {
|
||||
return $from->close($frame::CLOSE_PROTOCOL);
|
||||
}
|
||||
|
||||
if (!$this->isUtf8(substr($bin, 2))) {
|
||||
if (!$this->validator->checkEncoding(substr($bin, 2), 'UTF-8')) {
|
||||
return $from->close($frame::CLOSE_BAD_PAYLOAD);
|
||||
}
|
||||
|
||||
@ -214,7 +193,7 @@ class RFC6455 implements VersionInterface {
|
||||
$parsed = $from->WebSocket->message->getPayload();
|
||||
unset($from->WebSocket->message);
|
||||
|
||||
if (!$this->isUtf8($parsed)) {
|
||||
if (!$this->validator->checkEncoding($parsed, 'UTF-8')) {
|
||||
return $from->close(Frame::CLOSE_BAD_PAYLOAD);
|
||||
}
|
||||
|
||||
@ -236,7 +215,7 @@ class RFC6455 implements VersionInterface {
|
||||
/**
|
||||
* @return RFC6455\Frame
|
||||
*/
|
||||
public function newFrame($payload = null, $final = true, $opcode = 1) {
|
||||
public function newFrame($payload = null, $final = null, $opcode = null) {
|
||||
return new Frame($payload, $final, $opcode);
|
||||
}
|
||||
|
||||
@ -284,35 +263,4 @@ class RFC6455 implements VersionInterface {
|
||||
$this->closeCodes[Frame::CLOSE_SRV_ERR] = true;
|
||||
//$this->closeCodes[Frame::CLOSE_TLS] = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if a string is a valid UTF-8 string
|
||||
* @param string
|
||||
* @return bool
|
||||
*/
|
||||
function isUtf8($str) {
|
||||
if ($this->hasMbString && false === mb_check_encoding($str, 'UTF-8')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$len = strlen($str);
|
||||
|
||||
// The secondary method of checking is painfully slow
|
||||
// If the message is more than 10kb, skip UTF-8 checks
|
||||
if ($len > 10000) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$state = static::UTF8_ACCEPT;
|
||||
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
|
||||
|
||||
if (static::UTF8_REJECT === $state) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
25
WsServer.php
25
WsServer.php
@ -3,6 +3,7 @@ namespace Ratchet\WebSocket;
|
||||
use Ratchet\MessageComponentInterface;
|
||||
use Ratchet\ConnectionInterface;
|
||||
use Ratchet\WebSocket\Version;
|
||||
use Ratchet\WebSocket\Encoding\ToggleableValidator;
|
||||
use Guzzle\Http\Message\Response;
|
||||
|
||||
/**
|
||||
@ -44,6 +45,11 @@ class WsServer implements MessageComponentInterface {
|
||||
*/
|
||||
protected $acceptedSubProtocols = array();
|
||||
|
||||
/**
|
||||
* @var Ratchet\WebSocket\Encoding\ValidatorInterface
|
||||
*/
|
||||
protected $validator;
|
||||
|
||||
/**
|
||||
* Flag if we have checked the decorated component for sub-protocols
|
||||
* @var boolean
|
||||
@ -56,10 +62,11 @@ class WsServer implements MessageComponentInterface {
|
||||
public function __construct(MessageComponentInterface $component) {
|
||||
$this->reqParser = new HttpRequestParser;
|
||||
$this->versioner = new VersionManager;
|
||||
$this->validator = new ToggleableValidator;
|
||||
|
||||
$this->versioner
|
||||
->enableVersion(new Version\RFC6455($component))
|
||||
->enableVersion(new Version\HyBi10($component))
|
||||
->enableVersion(new Version\RFC6455($this->validator))
|
||||
->enableVersion(new Version\HyBi10($this->validator))
|
||||
->enableVersion(new Version\Hixie76)
|
||||
;
|
||||
|
||||
@ -147,9 +154,23 @@ class WsServer implements MessageComponentInterface {
|
||||
/**
|
||||
* Disable a specific version of the WebSocket protocol
|
||||
* @param int Version ID to disable
|
||||
* @return WsServer
|
||||
*/
|
||||
public function disableVersion($versionId) {
|
||||
$this->versioner->disableVersion($versionId);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggle weather to check encoding of incoming messages
|
||||
* @param bool
|
||||
* @return WsServer
|
||||
*/
|
||||
public function setEncodingChecks($opt) {
|
||||
$this->validator->on = (boolean)$opt;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user