From 89c24bd6377fe6354e151ecfab5f974235b200ef Mon Sep 17 00:00:00 2001 From: Chris Boden Date: Thu, 12 Jul 2012 12:12:08 -0400 Subject: [PATCH] [WebSocket] UTF-8 checks New method to check UTF-8 All non-binary AB tests passing --- src/Ratchet/WebSocket/Version/RFC6455.php | 57 ++++++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/src/Ratchet/WebSocket/Version/RFC6455.php b/src/Ratchet/WebSocket/Version/RFC6455.php index 5552742..06c32c3 100644 --- a/src/Ratchet/WebSocket/Version/RFC6455.php +++ b/src/Ratchet/WebSocket/Version/RFC6455.php @@ -16,6 +16,32 @@ use Guzzle\Http\Message\Response; class RFC6455 implements VersionInterface { const GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11'; + const UTF8_ACCEPT = 0; + const UTF8_REJECT = 1; + + /** + * Incremental UTF-8 validator with constant memory consumption (minimal state). + * + * Implements the algorithm "Flexible and Economical UTF-8 Decoder" by + * Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/). + */ + public static $dfa = array( + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2 + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4 + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6 + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8 + ); + /** * @var RFC6455\HandshakeVerifier */ @@ -135,7 +161,7 @@ class RFC6455 implements VersionInterface { return $from->close($frame::CLOSE_PROTOCOL); } - if (!mb_check_encoding(substr($bin, 2), 'UTF-8')) { + if (!$this->isUtf8(substr($bin, 2))) { return $from->close($frame::CLOSE_BAD_PAYLOAD); } @@ -180,7 +206,7 @@ class RFC6455 implements VersionInterface { $parsed = $from->WebSocket->message->getPayload(); unset($from->WebSocket->message); - if (!mb_check_encoding($parsed, 'UTF-8')) { + if (!$this->isUtf8($parsed)) { return $from->close(Frame::CLOSE_BAD_PAYLOAD); } @@ -254,10 +280,37 @@ class RFC6455 implements VersionInterface { $this->closeCodes[Frame::CLOSE_GOING_AWAY] = true; $this->closeCodes[Frame::CLOSE_PROTOCOL] = true; $this->closeCodes[Frame::CLOSE_BAD_DATA] = true; + //$this->closeCodes[Frame::CLOSE_NO_STATUS] = true; + //$this->closeCodes[Frame::CLOSE_ABNORMAL] = true; $this->closeCodes[Frame::CLOSE_BAD_PAYLOAD] = true; $this->closeCodes[Frame::CLOSE_POLICY] = true; $this->closeCodes[Frame::CLOSE_TOO_BIG] = true; $this->closeCodes[Frame::CLOSE_MAND_EXT] = true; $this->closeCodes[Frame::CLOSE_SRV_ERR] = true; + //$this->closeCodes[Frame::CLOSE_TLS] = true; + } + + /** + * Determine if a string is a valid UTF-8 string + * @param string + * @return bool + */ + function isUtf8($str) { + if (isset($str[100000])) { + return true; + } + + $state = static::UTF8_ACCEPT; + $len = strlen($str); + + for ($i = 0; $i < $len; $i++) { + $state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]]; + + if (static::UTF8_REJECT === $state) { + return false; + } + } + + return mb_check_encoding($str, 'UTF-8'); } } \ No newline at end of file