feat: speed improvements

Signed-off-by: Sebastian Krupinski <root@LAPTOP-7DVOR6NC>
This commit is contained in:
Sebastian Krupinski
2026-02-20 23:34:30 -05:00
committed by Sebastian Krupinski
parent e51c65bf19
commit 6fac63b7d2
38 changed files with 730 additions and 1100 deletions

View File

@@ -16,6 +16,7 @@ use Gricob\IMAP\Mime\Part\MultiPart;
use Gricob\IMAP\Mime\Part\Part;
use Gricob\IMAP\Mime\Part\SinglePart;
use Gricob\IMAP\Protocol\Command\AppendCommand;
use Gricob\IMAP\Protocol\Command\Argument\QuotedString;
use Gricob\IMAP\Protocol\Command\Argument\Search\Criteria;
use Gricob\IMAP\Protocol\Command\Argument\SequenceSet;
use Gricob\IMAP\Protocol\Command\Argument\Store\Flags;
@@ -27,6 +28,7 @@ use Gricob\IMAP\Protocol\Command\ExpungeCommand;
use Gricob\IMAP\Protocol\Command\FetchCommand;
use Gricob\IMAP\Protocol\Command\ListCommand;
use Gricob\IMAP\Protocol\Command\LogInCommand;
use Gricob\IMAP\Protocol\Command\SearchCommand;
use Gricob\IMAP\Protocol\Command\SelectCommand;
use Gricob\IMAP\Protocol\Command\StoreCommand;
use Gricob\IMAP\Protocol\Imap;
@@ -238,6 +240,43 @@ class Client
}
}
/**
* Stream every message in the currently-selected mailbox using a 1:*
* sequence set, yielding uid (or sequence number) => FetchData as each
* FETCH response arrives off the socket.
*
* @param string $mailbox Mailbox to select before fetching
* @param string[] $items IMAP FETCH data items
* @return Generator<int, FetchData>
*/
public function streamAll(
string $mailbox,
array $items = ['FLAGS', 'ENVELOPE', 'INTERNALDATE', 'RFC822.SIZE', 'BODYSTRUCTURE', 'UID'],
): Generator {
$this->select($mailbox);
$gen = $this->imap->sendStreaming(
new FetchCommand(
$this->configuration->useUid,
SequenceSet::all(),
$items,
)
);
foreach ($gen as $line) {
if (!$line instanceof FetchData) {
continue;
}
$id = $line->id;
if ($this->configuration->useUid) {
$id = $line->uid ?? throw new RuntimeException('Unable to get uid from message ' . $line->id);
}
yield $id => $line;
}
}
/**
* Stream messages from a sequence range as a Generator, yielding each
* LazyMessage as soon as its FETCH response line arrives off the socket —
@@ -378,6 +417,75 @@ class Client
$this->send(new CreateCommand($name));
}
/** Delete a mailbox by name. */
public function deleteMailbox(string $name): void
{
$this->send(new Command('DELETE', new QuotedString($name)));
}
/** Rename a mailbox. */
public function renameMailbox(string $oldName, string $newName): void
{
$this->send(new Command('RENAME', new QuotedString($oldName), new QuotedString($newName)));
}
/**
* Copy messages to a destination mailbox.
*
* @param int[] $uids
*/
public function copyMessages(string $mailbox, array $uids, string $destination): void
{
$this->select($mailbox);
$this->send(new Command('UID COPY', new SequenceSet(...$uids), new QuotedString($destination)));
}
/**
* Set, add, or remove flags on a set of messages in a single round-trip.
*
* @param string $action '+' to add, '-' to remove, '' to replace
* @param string[] $flags e.g. ['\\Seen', '\\Flagged']
* @param int[] $uids
*/
public function storeFlags(string $mailbox, array $uids, string $action, array $flags): void
{
$this->select($mailbox);
$this->send(new StoreCommand(
$this->configuration->useUid,
new SequenceSet(...$uids),
new Flags($flags, $action),
));
}
/**
* Permanently delete messages by UID (marks \\Deleted then EXPUNGEs).
*
* @param int[] $uids
*/
public function deleteMessages(string $mailbox, array $uids): void
{
$this->storeFlags($mailbox, $uids, '+', ['\\Deleted']);
$this->send(new ExpungeCommand());
}
/**
* Search a mailbox with the given criteria and return matching UIDs (or
* sequence numbers when useUid is false).
*
* @param Criteria[] $criteria Pass no criteria to match ALL messages.
* @return int[]
*/
public function searchMessages(string $mailbox, array $criteria = []): array
{
$this->select($mailbox);
$response = $this->send(new SearchCommand($this->configuration->useUid, ...$criteria));
$ids = [];
foreach ($response->getData(SearchData::class) as $searchData) {
array_push($ids, ...$searchData->numbers);
}
return $ids;
}
/**
* @param list<string>|null $flags
*/
@@ -412,7 +520,7 @@ class Client
public function doSearch(array $criteria, ?PreFetchOptions $preFetchOptions = null): array
{
$response = $this->send(
new Protocol\Command\SearchCommand(
new SearchCommand(
$this->configuration->useUid,
...$criteria
)

View File

@@ -0,0 +1,15 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class Body implements Criteria
{
public function __construct(private string $value) {}
public function __toString(): string
{
return 'BODY "' . str_replace(['"', '\\'], ['\\"', '\\\\'], $this->value) . '"';
}
}

View File

@@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class Flagged implements Criteria
{
public function __toString(): string
{
return 'FLAGGED';
}
}

View File

@@ -0,0 +1,15 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class From implements Criteria
{
public function __construct(private string $value) {}
public function __toString(): string
{
return 'FROM "' . str_replace(['"', '\\'], ['\\"', '\\\\'], $this->value) . '"';
}
}

View File

@@ -0,0 +1,15 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class Larger implements Criteria
{
public function __construct(private int $size) {}
public function __toString(): string
{
return 'LARGER ' . $this->size;
}
}

View File

@@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class Seen implements Criteria
{
public function __toString(): string
{
return 'SEEN';
}
}

View File

@@ -0,0 +1,15 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class Smaller implements Criteria
{
public function __construct(private int $size) {}
public function __toString(): string
{
return 'SMALLER ' . $this->size;
}
}

View File

@@ -0,0 +1,15 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class Subject implements Criteria
{
public function __construct(private string $value) {}
public function __toString(): string
{
return 'SUBJECT "' . str_replace(['"', '\\'], ['\\"', '\\\\'], $this->value) . '"';
}
}

View File

@@ -0,0 +1,15 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class To implements Criteria
{
public function __construct(private string $value) {}
public function __toString(): string
{
return 'TO "' . str_replace(['"', '\\'], ['\\"', '\\\\'], $this->value) . '"';
}
}

View File

@@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class Unflagged implements Criteria
{
public function __toString(): string
{
return 'UNFLAGGED';
}
}

View File

@@ -0,0 +1,13 @@
<?php
declare(strict_types=1);
namespace Gricob\IMAP\Protocol\Command\Argument\Search;
final readonly class Unseen implements Criteria
{
public function __toString(): string
{
return 'UNSEEN';
}
}

View File

@@ -25,6 +25,56 @@ final class SequenceSet implements Argument
return $set;
}
/**
* Build a SequenceSet that matches every message in the mailbox (1:*).
*/
public static function all(): self
{
$set = new self();
$set->range = '1:*';
return $set;
}
/**
* Build a SequenceSet from a flat array of UIDs, collapsing consecutive
* values into n:m ranges.
*
* Examples:
* [1, 2, 3, 5, 6, 10] → "1:3,5:6,10"
* [42] → "42"
* [7, 3, 4, 5] → "3:5,7"
*
* @param int[] $uids
*/
public static function list(array $uids): self
{
if (empty($uids)) {
return new self();
}
$uids = array_unique($uids);
sort($uids);
$ranges = [];
$start = $end = $uids[0];
for ($i = 1, $count = count($uids); $i <= $count; $i++) {
$current = $uids[$i] ?? null;
if ($current !== null && $current === $end + 1) {
$end = $current;
} else {
$ranges[] = $start === $end ? (string) $start : $start . ':' . $end;
if ($current !== null) {
$start = $end = $current;
}
}
}
$set = new self();
$set->range = implode(',', $ranges);
return $set;
}
public function __toString(): string
{
if ($this->range !== null) {

View File

@@ -4,6 +4,7 @@ namespace Gricob\IMAP\Protocol\Response\Parser;
use DateTimeImmutable;
use Doctrine\Common\Lexer\Token;
use Gricob\IMAP\Mime\Part\Body;
use Gricob\IMAP\Protocol\Response\Line\CommandContinuation;
use Gricob\IMAP\Protocol\Response\Line\Data\CapabilityData;
use Gricob\IMAP\Protocol\Response\Line\Data\ExistsData;
@@ -11,6 +12,9 @@ use Gricob\IMAP\Protocol\Response\Line\Data\ExpungeData;
use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\Address;
use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodySection;
use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodyStructure;
use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodyStructure\MultiPart;
use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodyStructure\Part;
use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodyStructure\SinglePart;
use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\Envelope;
use Gricob\IMAP\Protocol\Response\Line\Data\FetchData;
use Gricob\IMAP\Protocol\Response\Line\Data\FlagsData;
@@ -29,22 +33,39 @@ use Gricob\IMAP\Protocol\Response\Line\Status\Code\UnseenCode;
use Gricob\IMAP\Protocol\Response\Line\Status\Status;
use Gricob\IMAP\Protocol\Response\Line\Status\StatusType;
readonly class Parser
class Parser
{
private Lexer $lexer;
/**
* Preloaded literal streams for the current parse() call.
* Populated by parse() when ResponseHandler has already read large
* literals into php://temp resources to keep them out of the lexer.
*
* @var list<resource>
*/
private array $literalStreams = [];
/** Sequential index into $literalStreams consumed by literal(). */
private int $nextLiteralIndex = 0;
public function __construct()
{
$this->lexer = new Lexer();
}
/**
* @param list<resource> $literalStreams Pre-extracted large literal streams
* (see ResponseHandler::readNextRaw).
* @throws ParseError
*/
public function parse(string $raw): Line
public function parse(string $raw, array $literalStreams = []): Line
{
$raw = $this->sanitizeInvalidEncoding($raw);
$this->literalStreams = $literalStreams;
$this->nextLiteralIndex = 0;
$this->lexer->setInput($raw);
$this->lexer->moveNext();
@@ -353,7 +374,7 @@ readonly class Parser
$this->space();
$text = $this->literal();
$bodySections[] = new BodySection($section, $text);
$bodySections = $this->fetchBody($bodyStructure, $text);
}
break;
case TokenType::ENVELOPE:
@@ -384,6 +405,145 @@ readonly class Parser
);
}
/**
* @return BodySection[]
*/
private function fetchBody(?BodyStructure $node, string $data): array {
return $this->fetchBodyNode($node->part, $data);
}
/**
* @return BodySection[]
*/
private function fetchBodyNode(?Part $node, string $data, string $partId = ''): array {
if ($node instanceof MultiPart) {
return $this->fetchBodyMultipart($node, $data, $partId);
}
if ($node instanceof SinglePart) {
return [$this->fetchBodySinglePart($data, $partId)];
}
return [];
}
/**
* @return BodySection
*/
private function fetchBodySinglePart(string $data, string $partId = ''): BodySection
{
$partId = empty($partId) ? '1' : $partId;
return new BodySection($partId, $data);
}
/**
* @return BodySection[]
*/
private function fetchBodyMultipart(MultiPart $structure, string $data, string $partId = ''): array
{
$boundary = null;
foreach ($structure->attributes as $key => $value) {
if (strtolower($key) === 'boundary') {
$boundary = $value;
break;
}
}
if ($boundary === null) {
throw new \RuntimeException('Multipart missing boundary attribute');
}
$chunks = $this->splitOnBoundary($data, $boundary);
$parts = [];
foreach ($structure->parts as $i => $childStructure) {
$chunk = $chunks[$i] ?? '';
$chunk = $this->stripPartHeaders($chunk);
$id = empty($partId) ? (string)($i + 1) : $partId . '.' . ($i + 1);
$parts = array_merge($parts, $this->fetchBodyNode($childStructure, $chunk, $id));
}
return $parts;
}
/**
* Split $raw on MIME boundary delimiter lines, returning one string per
* body part. The preamble (before the first delimiter) and epilogue
* (after the close delimiter) are discarded.
*
* @return string[]
*/
private function splitOnBoundary(string $raw, string $boundary): array
{
$delimiter = '--' . $boundary;
$closeDelimiter = '--' . $boundary . '--';
$parts = [];
$current = null;
// Handle both CRLF and bare-LF line endings
$lines = preg_split('/\r?\n/', $raw);
foreach ($lines as $line) {
$trimmed = rtrim($line);
if ($trimmed === $closeDelimiter) {
if ($current !== null) {
$parts[] = rtrim($current, "\r\n");
}
break;
}
if ($trimmed === $delimiter) {
if ($current !== null) {
$parts[] = rtrim($current, "\r\n");
}
$current = '';
continue;
}
if ($current !== null) {
$current .= $line . "\r\n";
}
// Lines before the first delimiter are preamble — ignored
}
// If the close delimiter was absent, flush whatever is buffered
if ($current !== null && $current !== '') {
$trimmed = rtrim($current, "\r\n");
if (!in_array($trimmed, $parts, true)) {
$parts[] = $trimmed;
}
}
return $parts;
}
/**
* Strip MIME part headers from a body chunk.
*
* Each part chunk begins with its own headers (Content-Type,
* Content-Transfer-Encoding, etc.) followed by a blank line.
* Since BODYSTRUCTURE already supplies all encoding/charset info,
* we discard the part headers and return the raw body bytes only.
*/
private function stripPartHeaders(string $raw): string
{
// Try CRLF blank line first, then bare LF
$crlfPos = strpos($raw, "\r\n\r\n");
$lfPos = strpos($raw, "\n\n");
if ($crlfPos !== false && ($lfPos === false || $crlfPos <= $lfPos)) {
return substr($raw, $crlfPos + 4);
}
if ($lfPos !== false) {
return substr($raw, $lfPos + 2);
}
return $raw;
}
/**
* @throws ParseError
*/
@@ -933,6 +1093,15 @@ readonly class Parser
$this->getToken(TokenType::CLOSE_BRACES);
$this->getToken(TokenType::CRLF);
// If ResponseHandler preloaded this literal (because it was too large
// to tokenise safely), consume from the php://temp resource instead
// of reading token-by-token through the lexer.
if (isset($this->literalStreams[$this->nextLiteralIndex])) {
$resource = $this->literalStreams[$this->nextLiteralIndex++];
rewind($resource);
return (string) stream_get_contents($resource);
}
$value = '';
while (strlen($value) < $size) {
$value .= $this->getToken()->value;
@@ -1005,13 +1174,48 @@ readonly class Parser
return $raw;
}
for ($i = 0; $i < strlen($raw); $i++) {
$character = $raw[$i];
if (!mb_check_encoding($character, 'US-ASCII')) {
$raw[$i] = ' ';
$result = '';
$pos = 0;
$len = strlen($raw);
while ($pos < $len) {
if (preg_match('/\{(\d+)\}\r\n/', $raw, $m, PREG_OFFSET_CAPTURE, $pos)) {
$braceOff = (int) $m[0][1];
$literalLen = (int) $m[1][0];
$headerLen = strlen($m[0][0]);
// Sanitize structural text that precedes this literal
$result .= $this->sanitizeChunk(substr($raw, $pos, $braceOff - $pos));
// Preserve the {N}\r\n marker verbatim
$result .= $m[0][0];
// Preserve the literal body bytes verbatim (may be UTF-8 / 8-bit)
$result .= substr($raw, $braceOff + $headerLen, $literalLen);
$pos = $braceOff + $headerLen + $literalLen;
} else {
// No more literals — sanitize the remainder
$result .= $this->sanitizeChunk(substr($raw, $pos));
break;
}
}
return $raw;
return $result;
}
private function sanitizeChunk(string $chunk): string
{
if (mb_check_encoding($chunk, 'US-ASCII')) {
return $chunk;
}
for ($i = 0, $len = strlen($chunk); $i < $len; $i++) {
if (!mb_check_encoding($chunk[$i], 'US-ASCII')) {
$chunk[$i] = ' ';
}
}
return $chunk;
}
}

View File

@@ -17,21 +17,67 @@ use RuntimeException;
readonly class ResponseHandler
{
/**
* Literals larger than this threshold (in bytes) are streamed into a
* temporary file instead of being held as a PHP string. This prevents
* the Doctrine Lexer from running preg_split() over multi-megabyte bodies,
* which is the root cause of OOM errors on large mailboxes.
*/
private const LARGE_LITERAL_THRESHOLD = 524288; // 512 KB
public function __construct(private Parser $parser)
{
}
/**
* Reads the next complete IMAP response line from $stream.
*
* Large literals (>= LARGE_LITERAL_THRESHOLD bytes) are read in 8 KB
* chunks into php://temp resources instead of being appended to $raw,
* so the body content never reaches the lexer as a plain string.
*
* @return array{string, list<resource>} [$raw, $preloadedLiterals]
*/
private function readNextRaw(ResponseStream $stream): array
{
$raw = $stream->readLine();
$preloaded = [];
while (preg_match('/\{(?<bytes>\d+)}\r\n$/', $raw, $matches)) {
$literalSize = (int) $matches['bytes'];
if ($literalSize >= self::LARGE_LITERAL_THRESHOLD) {
// Stream into a temp file to avoid holding a huge string in
// memory. php://temp uses RAM up to 2 MB then spills to disk.
$tmp = fopen('php://temp', 'r+');
$remaining = $literalSize;
while ($remaining > 0) {
$chunk = $stream->read(min(8192, $remaining));
fwrite($tmp, $chunk);
$remaining -= strlen($chunk);
}
rewind($tmp);
$preloaded[] = $tmp;
// Keep the {N}\r\n header in $raw so the parser can read the
// literal size, but do NOT append the N bytes — the parser
// will pull them from the preloaded resource instead.
} else {
$raw .= $stream->read($literalSize);
}
$raw .= $stream->readLine();
}
return [$raw, $preloaded];
}
public function handle(string $statusTag, ResponseStream $stream, ContinuationHandler $continuationHandler): Response
{
$responseBuilder = new ResponseBuilder($statusTag);
do {
$raw = $stream->readLine();
while (preg_match('/\{(?<bytes>\d+)}\r\n$/', $raw, $matches)) {
$raw .= $stream->read((int) $matches['bytes']);
$raw .= $stream->readLine();
}
$line = $this->parser->parse($raw);
[$raw, $preloaded] = $this->readNextRaw($stream);
$line = $this->parser->parse($raw, $preloaded);
if ($line instanceof CommandContinuation) {
$continuationHandler->continue();
@@ -59,12 +105,8 @@ readonly class ResponseHandler
$status = null;
do {
$raw = $stream->readLine();
while (preg_match('/\{(?<bytes>\d+)}\r\n$/', $raw, $matches)) {
$raw .= $stream->read((int) $matches['bytes']);
$raw .= $stream->readLine();
}
$line = $this->parser->parse($raw);
[$raw, $preloaded] = $this->readNextRaw($stream);
$line = $this->parser->parse($raw, $preloaded);
if ($line instanceof CommandContinuation) {
$continuationHandler->continue();