From c15c3244244453a78980f607ba9c487c2687b06a Mon Sep 17 00:00:00 2001 From: Chris Boden Date: Thu, 12 Jul 2012 13:29:12 -0400 Subject: [PATCH] [WebSocket] UTF-8 checking mb_check_encoding first, as it's much faster, fail fast Only do the other check if message is under 10k characters Large payloads crawl otherwise --- src/Ratchet/WebSocket/Version/RFC6455.php | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Ratchet/WebSocket/Version/RFC6455.php b/src/Ratchet/WebSocket/Version/RFC6455.php index 06c32c3..1252666 100644 --- a/src/Ratchet/WebSocket/Version/RFC6455.php +++ b/src/Ratchet/WebSocket/Version/RFC6455.php @@ -266,10 +266,6 @@ class RFC6455 implements VersionInterface { } return false; - - if (empty($val)) { - return false; - } } /** @@ -296,12 +292,19 @@ class RFC6455 implements VersionInterface { * @return bool */ function isUtf8($str) { - if (isset($str[100000])) { + if (false === mb_check_encoding($str, 'UTF-8')) { + return false; + } + + $len = strlen($str); + + // The secondary method of checking is painfully slow + // If the message is more than 10kb, skip UTF-8 checks + if ($len > 10000) { return true; } $state = static::UTF8_ACCEPT; - $len = strlen($str); for ($i = 0; $i < $len; $i++) { $state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]]; @@ -311,6 +314,6 @@ class RFC6455 implements VersionInterface { } } - return mb_check_encoding($str, 'UTF-8'); + return true; } } \ No newline at end of file