feat: speed improvements

Signed-off-by: Sebastian Krupinski <root@LAPTOP-7DVOR6NC>
2026-02-20 23:34:30 -05:00
parent e51c65bf19
commit 7446edced3
37 changed files with 648 additions and 1086 deletions
--- a/lib/Client/Protocol/Response/Parser/Parser.php
+++ b/lib/Client/Protocol/Response/Parser/Parser.php
@@ -4,6 +4,7 @@ namespace Gricob\IMAP\Protocol\Response\Parser;

 use DateTimeImmutable;
 use Doctrine\Common\Lexer\Token;
+use Gricob\IMAP\Mime\Part\Body;
 use Gricob\IMAP\Protocol\Response\Line\CommandContinuation;
 use Gricob\IMAP\Protocol\Response\Line\Data\CapabilityData;
 use Gricob\IMAP\Protocol\Response\Line\Data\ExistsData;
@@ -11,6 +12,9 @@ use Gricob\IMAP\Protocol\Response\Line\Data\ExpungeData;
 use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\Address;
 use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodySection;
 use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodyStructure;
+use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodyStructure\MultiPart;
+use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodyStructure\Part;
+use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\BodyStructure\SinglePart;
 use Gricob\IMAP\Protocol\Response\Line\Data\Fetch\Envelope;
 use Gricob\IMAP\Protocol\Response\Line\Data\FetchData;
 use Gricob\IMAP\Protocol\Response\Line\Data\FlagsData;
@@ -353,7 +357,7 @@ readonly class Parser
                        $this->space();
                        $text = $this->literal();

-                        $bodySections[] = new BodySection($section, $text);
+                        $bodySections = $this->fetchBody($bodyStructure, $text);
                    }
                    break;
                case TokenType::ENVELOPE:
@@ -384,6 +388,145 @@ readonly class Parser
        );
    }

+    /**
+     * @return BodySection[]
+     */
+    private function fetchBody(?BodyStructure $node, string $data): array {
+        return $this->fetchBodyNode($node->part, $data);
+    }
+
+    /**
+     * @return BodySection[]
+     */
+    private function fetchBodyNode(?Part $node, string $data, string $partId = ''): array {
+        if ($node instanceof MultiPart) {
+            return $this->fetchBodyMultipart($node, $data, $partId);
+        }
+
+        if ($node instanceof SinglePart) {
+            return [$this->fetchBodySinglePart($data, $partId)];
+        }
+
+        return [];
+    }
+    
+    /**
+     * @return BodySection
+     */
+    private function fetchBodySinglePart(string $data, string $partId = ''): BodySection
+    {
+        $partId = empty($partId) ? '1' : $partId;
+        return new BodySection($partId, $data);
+    }
+
+    /**
+     * @return BodySection[]
+     */
+    private function fetchBodyMultipart(MultiPart $structure, string $data, string $partId = ''): array
+    {
+        $boundary = null;
+        foreach ($structure->attributes as $key => $value) {
+            if (strtolower($key) === 'boundary') {
+                $boundary = $value;
+                break;
+            }
+        }
+
+        if ($boundary === null) {
+            throw new \RuntimeException('Multipart missing boundary attribute');
+        }
+
+        $chunks = $this->splitOnBoundary($data, $boundary);
+
+        $parts = [];
+        foreach ($structure->parts as $i => $childStructure) {
+            $chunk = $chunks[$i] ?? '';
+            $chunk = $this->stripPartHeaders($chunk);
+            $id = empty($partId) ? (string)($i + 1) : $partId . '.' . ($i + 1);
+            $parts = array_merge($parts, $this->fetchBodyNode($childStructure, $chunk, $id));
+        }
+
+        return $parts;
+    }
+
+        /**
+     * Split $raw on MIME boundary delimiter lines, returning one string per
+     * body part.  The preamble (before the first delimiter) and epilogue
+     * (after the close delimiter) are discarded.
+     *
+     * @return string[]
+     */
+    private function splitOnBoundary(string $raw, string $boundary): array
+    {
+        $delimiter      = '--' . $boundary;
+        $closeDelimiter = '--' . $boundary . '--';
+
+        $parts   = [];
+        $current = null;
+
+        // Handle both CRLF and bare-LF line endings
+        $lines = preg_split('/\r?\n/', $raw);
+
+        foreach ($lines as $line) {
+            $trimmed = rtrim($line);
+
+            if ($trimmed === $closeDelimiter) {
+                if ($current !== null) {
+                    $parts[] = rtrim($current, "\r\n");
+                }
+                break;
+            }
+
+            if ($trimmed === $delimiter) {
+                if ($current !== null) {
+                    $parts[] = rtrim($current, "\r\n");
+                }
+                $current = '';
+                continue;
+            }
+
+            if ($current !== null) {
+                $current .= $line . "\r\n";
+            }
+            // Lines before the first delimiter are preamble — ignored
+        }
+
+        // If the close delimiter was absent, flush whatever is buffered
+        if ($current !== null && $current !== '') {
+            $trimmed = rtrim($current, "\r\n");
+            if (!in_array($trimmed, $parts, true)) {
+                $parts[] = $trimmed;
+            }
+        }
+
+        return $parts;
+    }
+
+    /**
+     * Strip MIME part headers from a body chunk.
+     *
+     * Each part chunk begins with its own headers (Content-Type,
+     * Content-Transfer-Encoding, etc.) followed by a blank line.
+     * Since BODYSTRUCTURE already supplies all encoding/charset info,
+     * we discard the part headers and return the raw body bytes only.
+     */
+    private function stripPartHeaders(string $raw): string
+    {
+        // Try CRLF blank line first, then bare LF
+        $crlfPos = strpos($raw, "\r\n\r\n");
+        $lfPos   = strpos($raw, "\n\n");
+
+        if ($crlfPos !== false && ($lfPos === false || $crlfPos <= $lfPos)) {
+            return substr($raw, $crlfPos + 4);
+        }
+
+        if ($lfPos !== false) {
+            return substr($raw, $lfPos + 2);
+        }
+
+        return $raw;
+    }
+
    /**
     * @throws ParseError
     */
@@ -1005,13 +1148,48 @@ readonly class Parser
            return $raw;
        }

-        for ($i = 0; $i < strlen($raw); $i++) {
-            $character = $raw[$i];
-            if (!mb_check_encoding($character, 'US-ASCII')) {
-                $raw[$i] = ' ';
+        $result = '';
+        $pos    = 0;
+        $len    = strlen($raw);
+
+        while ($pos < $len) {
+            if (preg_match('/\{(\d+)\}\r\n/', $raw, $m, PREG_OFFSET_CAPTURE, $pos)) {
+                $braceOff   = (int) $m[0][1];
+                $literalLen = (int) $m[1][0];
+                $headerLen  = strlen($m[0][0]);
+
+                // Sanitize structural text that precedes this literal
+                $result .= $this->sanitizeChunk(substr($raw, $pos, $braceOff - $pos));
+
+                // Preserve the {N}\r\n marker verbatim
+                $result .= $m[0][0];
+
+                // Preserve the literal body bytes verbatim (may be UTF-8 / 8-bit)
+                $result .= substr($raw, $braceOff + $headerLen, $literalLen);
+
+                $pos = $braceOff + $headerLen + $literalLen;
+            } else {
+                // No more literals — sanitize the remainder
+                $result .= $this->sanitizeChunk(substr($raw, $pos));
+                break;
            }
        }

-        return $raw;
+        return $result;
+    }
+
+    private function sanitizeChunk(string $chunk): string
+    {
+        if (mb_check_encoding($chunk, 'US-ASCII')) {
+            return $chunk;
+        }
+
+        for ($i = 0, $len = strlen($chunk); $i < $len; $i++) {
+            if (!mb_check_encoding($chunk[$i], 'US-ASCII')) {
+                $chunk[$i] = ' ';
+            }
+        }
+
+        return $chunk;
    }
 }