[WebSocket] UTF-8 checks
New method to check UTF-8 All non-binary AB tests passing
This commit is contained in:
parent
fd9e03bd99
commit
89c24bd637
@ -16,6 +16,32 @@ use Guzzle\Http\Message\Response;
|
||||
class RFC6455 implements VersionInterface {
|
||||
const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
|
||||
|
||||
const UTF8_ACCEPT = 0;
|
||||
const UTF8_REJECT = 1;
|
||||
|
||||
/**
|
||||
* Incremental UTF-8 validator with constant memory consumption (minimal state).
|
||||
*
|
||||
* Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
|
||||
* Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
|
||||
*/
|
||||
public static $dfa = array(
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
|
||||
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
|
||||
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
|
||||
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
|
||||
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
|
||||
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
|
||||
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
|
||||
);
|
||||
|
||||
/**
|
||||
* @var RFC6455\HandshakeVerifier
|
||||
*/
|
||||
@ -135,7 +161,7 @@ class RFC6455 implements VersionInterface {
|
||||
return $from->close($frame::CLOSE_PROTOCOL);
|
||||
}
|
||||
|
||||
if (!mb_check_encoding(substr($bin, 2), 'UTF-8')) {
|
||||
if (!$this->isUtf8(substr($bin, 2))) {
|
||||
return $from->close($frame::CLOSE_BAD_PAYLOAD);
|
||||
}
|
||||
|
||||
@ -180,7 +206,7 @@ class RFC6455 implements VersionInterface {
|
||||
$parsed = $from->WebSocket->message->getPayload();
|
||||
unset($from->WebSocket->message);
|
||||
|
||||
if (!mb_check_encoding($parsed, 'UTF-8')) {
|
||||
if (!$this->isUtf8($parsed)) {
|
||||
return $from->close(Frame::CLOSE_BAD_PAYLOAD);
|
||||
}
|
||||
|
||||
@ -254,10 +280,37 @@ class RFC6455 implements VersionInterface {
|
||||
$this->closeCodes[Frame::CLOSE_GOING_AWAY] = true;
|
||||
$this->closeCodes[Frame::CLOSE_PROTOCOL] = true;
|
||||
$this->closeCodes[Frame::CLOSE_BAD_DATA] = true;
|
||||
//$this->closeCodes[Frame::CLOSE_NO_STATUS] = true;
|
||||
//$this->closeCodes[Frame::CLOSE_ABNORMAL] = true;
|
||||
$this->closeCodes[Frame::CLOSE_BAD_PAYLOAD] = true;
|
||||
$this->closeCodes[Frame::CLOSE_POLICY] = true;
|
||||
$this->closeCodes[Frame::CLOSE_TOO_BIG] = true;
|
||||
$this->closeCodes[Frame::CLOSE_MAND_EXT] = true;
|
||||
$this->closeCodes[Frame::CLOSE_SRV_ERR] = true;
|
||||
//$this->closeCodes[Frame::CLOSE_TLS] = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if a string is a valid UTF-8 string
|
||||
* @param string
|
||||
* @return bool
|
||||
*/
|
||||
function isUtf8($str) {
|
||||
if (isset($str[100000])) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$state = static::UTF8_ACCEPT;
|
||||
$len = strlen($str);
|
||||
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
|
||||
|
||||
if (static::UTF8_REJECT === $state) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return mb_check_encoding($str, 'UTF-8');
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user