[WebSocket] UTF-8 checks

New method to check UTF-8
All non-binary AB tests passing
This commit is contained in:
Chris Boden 2012-07-12 12:12:08 -04:00
parent fd9e03bd99
commit 89c24bd637

View File

@ -16,6 +16,32 @@ use Guzzle\Http\Message\Response;
class RFC6455 implements VersionInterface { class RFC6455 implements VersionInterface {
const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11'; const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
const UTF8_ACCEPT = 0;
const UTF8_REJECT = 1;
/**
* Incremental UTF-8 validator with constant memory consumption (minimal state).
*
* Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
* Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
*/
public static $dfa = array(
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
);
/** /**
* @var RFC6455\HandshakeVerifier * @var RFC6455\HandshakeVerifier
*/ */
@ -135,7 +161,7 @@ class RFC6455 implements VersionInterface {
return $from->close($frame::CLOSE_PROTOCOL); return $from->close($frame::CLOSE_PROTOCOL);
} }
if (!mb_check_encoding(substr($bin, 2), 'UTF-8')) { if (!$this->isUtf8(substr($bin, 2))) {
return $from->close($frame::CLOSE_BAD_PAYLOAD); return $from->close($frame::CLOSE_BAD_PAYLOAD);
} }
@ -180,7 +206,7 @@ class RFC6455 implements VersionInterface {
$parsed = $from->WebSocket->message->getPayload(); $parsed = $from->WebSocket->message->getPayload();
unset($from->WebSocket->message); unset($from->WebSocket->message);
if (!mb_check_encoding($parsed, 'UTF-8')) { if (!$this->isUtf8($parsed)) {
return $from->close(Frame::CLOSE_BAD_PAYLOAD); return $from->close(Frame::CLOSE_BAD_PAYLOAD);
} }
@ -254,10 +280,37 @@ class RFC6455 implements VersionInterface {
$this->closeCodes[Frame::CLOSE_GOING_AWAY] = true; $this->closeCodes[Frame::CLOSE_GOING_AWAY] = true;
$this->closeCodes[Frame::CLOSE_PROTOCOL] = true; $this->closeCodes[Frame::CLOSE_PROTOCOL] = true;
$this->closeCodes[Frame::CLOSE_BAD_DATA] = true; $this->closeCodes[Frame::CLOSE_BAD_DATA] = true;
//$this->closeCodes[Frame::CLOSE_NO_STATUS] = true;
//$this->closeCodes[Frame::CLOSE_ABNORMAL] = true;
$this->closeCodes[Frame::CLOSE_BAD_PAYLOAD] = true; $this->closeCodes[Frame::CLOSE_BAD_PAYLOAD] = true;
$this->closeCodes[Frame::CLOSE_POLICY] = true; $this->closeCodes[Frame::CLOSE_POLICY] = true;
$this->closeCodes[Frame::CLOSE_TOO_BIG] = true; $this->closeCodes[Frame::CLOSE_TOO_BIG] = true;
$this->closeCodes[Frame::CLOSE_MAND_EXT] = true; $this->closeCodes[Frame::CLOSE_MAND_EXT] = true;
$this->closeCodes[Frame::CLOSE_SRV_ERR] = true; $this->closeCodes[Frame::CLOSE_SRV_ERR] = true;
//$this->closeCodes[Frame::CLOSE_TLS] = true;
}
/**
* Determine if a string is a valid UTF-8 string
* @param string
* @return bool
*/
function isUtf8($str) {
if (isset($str[100000])) {
return true;
}
$state = static::UTF8_ACCEPT;
$len = strlen($str);
for ($i = 0; $i < $len; $i++) {
$state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
if (static::UTF8_REJECT === $state) {
return false;
}
}
return mb_check_encoding($str, 'UTF-8');
} }
} }