From 5dafcbd90d164da5012004917ca0f4301bfbfb19 Mon Sep 17 00:00:00 2001 From: Sebastian Krupinski Date: Fri, 20 Feb 2026 23:34:30 -0500 Subject: [PATCH] feat: speed improvements Signed-off-by: Sebastian Krupinski --- .../Protocol/Response/Parser/Parser.php | 30 ++++++++- lib/Client/Protocol/ResponseHandler.php | 66 +++++++++++++++---- 2 files changed, 82 insertions(+), 14 deletions(-) diff --git a/lib/Client/Protocol/Response/Parser/Parser.php b/lib/Client/Protocol/Response/Parser/Parser.php index bc6e6c5..f1b6cc9 100644 --- a/lib/Client/Protocol/Response/Parser/Parser.php +++ b/lib/Client/Protocol/Response/Parser/Parser.php @@ -33,22 +33,39 @@ use Gricob\IMAP\Protocol\Response\Line\Status\Code\UnseenCode; use Gricob\IMAP\Protocol\Response\Line\Status\Status; use Gricob\IMAP\Protocol\Response\Line\Status\StatusType; -readonly class Parser +class Parser { private Lexer $lexer; + /** + * Preloaded literal streams for the current parse() call. + * Populated by parse() when ResponseHandler has already read large + * literals into php://temp resources to keep them out of the lexer. + * + * @var list + */ + private array $literalStreams = []; + + /** Sequential index into $literalStreams consumed by literal(). */ + private int $nextLiteralIndex = 0; + public function __construct() { $this->lexer = new Lexer(); } /** + * @param list $literalStreams Pre-extracted large literal streams + * (see ResponseHandler::readNextRaw). * @throws ParseError */ - public function parse(string $raw): Line + public function parse(string $raw, array $literalStreams = []): Line { $raw = $this->sanitizeInvalidEncoding($raw); + $this->literalStreams = $literalStreams; + $this->nextLiteralIndex = 0; + $this->lexer->setInput($raw); $this->lexer->moveNext(); @@ -1076,6 +1093,15 @@ readonly class Parser $this->getToken(TokenType::CLOSE_BRACES); $this->getToken(TokenType::CRLF); + // If ResponseHandler preloaded this literal (because it was too large + // to tokenise safely), consume from the php://temp resource instead + // of reading token-by-token through the lexer. + if (isset($this->literalStreams[$this->nextLiteralIndex])) { + $resource = $this->literalStreams[$this->nextLiteralIndex++]; + rewind($resource); + return (string) stream_get_contents($resource); + } + $value = ''; while (strlen($value) < $size) { $value .= $this->getToken()->value; diff --git a/lib/Client/Protocol/ResponseHandler.php b/lib/Client/Protocol/ResponseHandler.php index 9940fae..2a6aecf 100644 --- a/lib/Client/Protocol/ResponseHandler.php +++ b/lib/Client/Protocol/ResponseHandler.php @@ -17,21 +17,67 @@ use RuntimeException; readonly class ResponseHandler { + /** + * Literals larger than this threshold (in bytes) are streamed into a + * temporary file instead of being held as a PHP string. This prevents + * the Doctrine Lexer from running preg_split() over multi-megabyte bodies, + * which is the root cause of OOM errors on large mailboxes. + */ + private const LARGE_LITERAL_THRESHOLD = 524288; // 512 KB + public function __construct(private Parser $parser) { } + /** + * Reads the next complete IMAP response line from $stream. + * + * Large literals (>= LARGE_LITERAL_THRESHOLD bytes) are read in 8 KB + * chunks into php://temp resources instead of being appended to $raw, + * so the body content never reaches the lexer as a plain string. + * + * @return array{string, list} [$raw, $preloadedLiterals] + */ + private function readNextRaw(ResponseStream $stream): array + { + $raw = $stream->readLine(); + $preloaded = []; + + while (preg_match('/\{(?\d+)}\r\n$/', $raw, $matches)) { + $literalSize = (int) $matches['bytes']; + + if ($literalSize >= self::LARGE_LITERAL_THRESHOLD) { + // Stream into a temp file to avoid holding a huge string in + // memory. php://temp uses RAM up to 2 MB then spills to disk. + $tmp = fopen('php://temp', 'r+'); + $remaining = $literalSize; + while ($remaining > 0) { + $chunk = $stream->read(min(8192, $remaining)); + fwrite($tmp, $chunk); + $remaining -= strlen($chunk); + } + rewind($tmp); + $preloaded[] = $tmp; + // Keep the {N}\r\n header in $raw so the parser can read the + // literal size, but do NOT append the N bytes — the parser + // will pull them from the preloaded resource instead. + } else { + $raw .= $stream->read($literalSize); + } + + $raw .= $stream->readLine(); + } + + return [$raw, $preloaded]; + } + public function handle(string $statusTag, ResponseStream $stream, ContinuationHandler $continuationHandler): Response { $responseBuilder = new ResponseBuilder($statusTag); do { - $raw = $stream->readLine(); - while (preg_match('/\{(?\d+)}\r\n$/', $raw, $matches)) { - $raw .= $stream->read((int) $matches['bytes']); - $raw .= $stream->readLine(); - } - $line = $this->parser->parse($raw); + [$raw, $preloaded] = $this->readNextRaw($stream); + $line = $this->parser->parse($raw, $preloaded); if ($line instanceof CommandContinuation) { $continuationHandler->continue(); @@ -59,12 +105,8 @@ readonly class ResponseHandler $status = null; do { - $raw = $stream->readLine(); - while (preg_match('/\{(?\d+)}\r\n$/', $raw, $matches)) { - $raw .= $stream->read((int) $matches['bytes']); - $raw .= $stream->readLine(); - } - $line = $this->parser->parse($raw); + [$raw, $preloaded] = $this->readNextRaw($stream); + $line = $this->parser->parse($raw, $preloaded); if ($line instanceof CommandContinuation) { $continuationHandler->continue(); -- 2.39.5