[WebSocket] UTF-8 checks
New method to check UTF-8 All non-binary AB tests passing
This commit is contained in:
parent
fd9e03bd99
commit
89c24bd637
@ -16,6 +16,32 @@ use Guzzle\Http\Message\Response;
|
|||||||
class RFC6455 implements VersionInterface {
|
class RFC6455 implements VersionInterface {
|
||||||
const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
|
const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
|
||||||
|
|
||||||
|
const UTF8_ACCEPT = 0;
|
||||||
|
const UTF8_REJECT = 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Incremental UTF-8 validator with constant memory consumption (minimal state).
|
||||||
|
*
|
||||||
|
* Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
|
||||||
|
* Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
|
||||||
|
*/
|
||||||
|
public static $dfa = array(
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
|
||||||
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
|
||||||
|
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
|
||||||
|
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
|
||||||
|
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
|
||||||
|
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
|
||||||
|
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
|
||||||
|
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
|
||||||
|
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @var RFC6455\HandshakeVerifier
|
* @var RFC6455\HandshakeVerifier
|
||||||
*/
|
*/
|
||||||
@ -135,7 +161,7 @@ class RFC6455 implements VersionInterface {
|
|||||||
return $from->close($frame::CLOSE_PROTOCOL);
|
return $from->close($frame::CLOSE_PROTOCOL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mb_check_encoding(substr($bin, 2), 'UTF-8')) {
|
if (!$this->isUtf8(substr($bin, 2))) {
|
||||||
return $from->close($frame::CLOSE_BAD_PAYLOAD);
|
return $from->close($frame::CLOSE_BAD_PAYLOAD);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,7 +206,7 @@ class RFC6455 implements VersionInterface {
|
|||||||
$parsed = $from->WebSocket->message->getPayload();
|
$parsed = $from->WebSocket->message->getPayload();
|
||||||
unset($from->WebSocket->message);
|
unset($from->WebSocket->message);
|
||||||
|
|
||||||
if (!mb_check_encoding($parsed, 'UTF-8')) {
|
if (!$this->isUtf8($parsed)) {
|
||||||
return $from->close(Frame::CLOSE_BAD_PAYLOAD);
|
return $from->close(Frame::CLOSE_BAD_PAYLOAD);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -254,10 +280,37 @@ class RFC6455 implements VersionInterface {
|
|||||||
$this->closeCodes[Frame::CLOSE_GOING_AWAY] = true;
|
$this->closeCodes[Frame::CLOSE_GOING_AWAY] = true;
|
||||||
$this->closeCodes[Frame::CLOSE_PROTOCOL] = true;
|
$this->closeCodes[Frame::CLOSE_PROTOCOL] = true;
|
||||||
$this->closeCodes[Frame::CLOSE_BAD_DATA] = true;
|
$this->closeCodes[Frame::CLOSE_BAD_DATA] = true;
|
||||||
|
//$this->closeCodes[Frame::CLOSE_NO_STATUS] = true;
|
||||||
|
//$this->closeCodes[Frame::CLOSE_ABNORMAL] = true;
|
||||||
$this->closeCodes[Frame::CLOSE_BAD_PAYLOAD] = true;
|
$this->closeCodes[Frame::CLOSE_BAD_PAYLOAD] = true;
|
||||||
$this->closeCodes[Frame::CLOSE_POLICY] = true;
|
$this->closeCodes[Frame::CLOSE_POLICY] = true;
|
||||||
$this->closeCodes[Frame::CLOSE_TOO_BIG] = true;
|
$this->closeCodes[Frame::CLOSE_TOO_BIG] = true;
|
||||||
$this->closeCodes[Frame::CLOSE_MAND_EXT] = true;
|
$this->closeCodes[Frame::CLOSE_MAND_EXT] = true;
|
||||||
$this->closeCodes[Frame::CLOSE_SRV_ERR] = true;
|
$this->closeCodes[Frame::CLOSE_SRV_ERR] = true;
|
||||||
|
//$this->closeCodes[Frame::CLOSE_TLS] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine if a string is a valid UTF-8 string
|
||||||
|
* @param string
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
function isUtf8($str) {
|
||||||
|
if (isset($str[100000])) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$state = static::UTF8_ACCEPT;
|
||||||
|
$len = strlen($str);
|
||||||
|
|
||||||
|
for ($i = 0; $i < $len; $i++) {
|
||||||
|
$state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
|
||||||
|
|
||||||
|
if (static::UTF8_REJECT === $state) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mb_check_encoding($str, 'UTF-8');
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user