Files
provider_imap/lib/Client/MessageParser.php
2026-05-08 00:16:43 -04:00

665 lines
20 KiB
PHP

<?php
declare(strict_types=1);
namespace KTXM\ProviderImap\Client;
final class MessageParser
{
public static function isFetchMessage(string $payload): bool
{
return str_contains(strtoupper($payload), 'FETCH (');
}
public static function parse(string $raw): Message
{
if (!preg_match('/^\*\s+(\d+)\s+FETCH\s+\((.*)\)$/is', $raw, $matches)) {
throw new ImapException('Unable to parse FETCH response: ' . $raw);
}
$sequence = (int) $matches[1];
$attributes = self::parseAttributes($matches[2]);
$uid = self::toInt($attributes['UID'] ?? null, 'FETCH response is missing UID: ' . $raw);
$envelope = is_array($attributes['ENVELOPE'] ?? null) ? $attributes['ENVELOPE'] : null;
$bodyStructure = isset($attributes['BODYSTRUCTURE']) ? self::parseBodyPart($attributes['BODYSTRUCTURE'], '') : null;
$bodySections = self::parseBodySections($attributes, $bodyStructure);
return new Message(
$sequence,
$uid,
self::toOptionalInt($attributes['RFC822.SIZE'] ?? null) ?? 0,
self::toNullableString($attributes['INTERNALDATE'] ?? null),
self::parseFlags($attributes['FLAGS'] ?? null),
self::decodeMimeHeader(self::envelopeString($envelope, 1)),
self::envelopeString($envelope, 0),
self::trimAngles(self::envelopeString($envelope, 9)),
self::envelopeString($envelope, 8),
self::parseAddressList($envelope[2] ?? null),
self::parseAddressList($envelope[3] ?? null),
self::parseAddressList($envelope[4] ?? null),
self::parseAddressList($envelope[5] ?? null),
self::parseAddressList($envelope[6] ?? null),
self::parseAddressList($envelope[7] ?? null),
$bodyStructure,
$bodySections,
);
}
/**
* @return array<string, mixed>
*/
private static function parseAttributes(string $payload): array
{
$attributes = [];
$offset = 0;
$length = strlen($payload);
while ($offset < $length) {
self::skipWhitespace($payload, $offset);
if ($offset >= $length) {
break;
}
$name = self::parseAttributeName($payload, $offset);
if (!is_string($name) || $name === '') {
throw new ImapException('Unable to parse FETCH attribute name: ' . $payload);
}
self::skipWhitespace($payload, $offset);
$attributes[strtoupper($name)] = self::parseToken($payload, $offset);
}
return $attributes;
}
private static function parseAttributeName(string $payload, int &$offset): string
{
self::skipWhitespace($payload, $offset);
if (preg_match('/\GBODY(?:\.PEEK)?\[/Ai', $payload, $matches, 0, $offset) === 1) {
$start = $offset;
$offset += strlen($matches[0]);
$depth = 1;
$length = strlen($payload);
while ($offset < $length) {
$char = $payload[$offset];
if ($char === '[') {
$depth++;
} elseif ($char === ']') {
$depth--;
if ($depth === 0) {
$offset++;
return substr($payload, $start, $offset - $start);
}
}
$offset++;
}
throw new ImapException('Unterminated FETCH BODY section attribute.');
}
$name = self::parseToken($payload, $offset);
if (!is_string($name)) {
throw new ImapException('Invalid FETCH attribute name.');
}
return $name;
}
private static function parseToken(string $payload, int &$offset): mixed
{
self::skipWhitespace($payload, $offset);
$length = strlen($payload);
if ($offset >= $length) {
throw new ImapException('Unexpected end of FETCH response.');
}
$char = $payload[$offset];
if ($char === '(') {
$offset++;
$items = [];
while (true) {
self::skipWhitespace($payload, $offset);
if ($offset >= $length) {
throw new ImapException('Unterminated FETCH list response.');
}
if ($payload[$offset] === ')') {
$offset++;
return $items;
}
$items[] = self::parseToken($payload, $offset);
}
}
if ($char === '"') {
return self::parseQuotedString($payload, $offset);
}
if ($char === '{') {
return self::parseLiteral($payload, $offset);
}
$start = $offset;
while ($offset < $length && !ctype_space($payload[$offset]) && $payload[$offset] !== '(' && $payload[$offset] !== ')') {
$offset++;
}
$atom = substr($payload, $start, $offset - $start);
if (strtoupper($atom) === 'NIL') {
return null;
}
return $atom;
}
private static function parseLiteral(string $payload, int &$offset): string
{
if (preg_match('/\G\{(\d+)\}\r\n/As', $payload, $matches, 0, $offset) !== 1
&& preg_match('/\G\{(\d+)\}\n/As', $payload, $matches, 0, $offset) !== 1) {
throw new ImapException('Invalid FETCH literal marker.');
}
$offset += strlen($matches[0]);
$length = (int) $matches[1];
$literal = substr($payload, $offset, $length);
if (strlen($literal) !== $length) {
throw new ImapException('FETCH literal length does not match payload.');
}
$offset += $length;
return $literal;
}
private static function parseQuotedString(string $payload, int &$offset): string
{
$offset++;
$length = strlen($payload);
$value = '';
while ($offset < $length) {
$char = $payload[$offset];
if ($char === '\\') {
$offset++;
if ($offset >= $length) {
break;
}
$value .= $payload[$offset];
$offset++;
continue;
}
if ($char === '"') {
$offset++;
return $value;
}
$value .= $char;
$offset++;
}
throw new ImapException('Unterminated quoted FETCH string.');
}
private static function skipWhitespace(string $payload, int &$offset): void
{
$length = strlen($payload);
while ($offset < $length && ctype_space($payload[$offset])) {
$offset++;
}
}
private static function toInt(mixed $value, string $message): int
{
if ($value === null || !preg_match('/^\d+$/', (string) $value)) {
throw new ImapException($message);
}
return (int) $value;
}
private static function toOptionalInt(mixed $value): ?int
{
if ($value === null || !preg_match('/^\d+$/', (string) $value)) {
return null;
}
return (int) $value;
}
/**
* @return list<string>
*/
private static function parseFlags(mixed $value): array
{
if (!is_array($value)) {
return [];
}
return array_values(array_filter(array_map(
static fn (mixed $flag): ?string => is_string($flag) && $flag !== '' ? $flag : null,
$value,
)));
}
private static function toNullableString(mixed $value): ?string
{
return is_string($value) && $value !== '' ? $value : null;
}
private static function envelopeString(?array $envelope, int $index): ?string
{
if ($envelope === null) {
return null;
}
return self::toNullableString($envelope[$index] ?? null);
}
private static function decodeMimeHeader(?string $value): ?string
{
if ($value === null || $value === '') {
return $value;
}
return function_exists('mb_decode_mimeheader') ? mb_decode_mimeheader($value) : $value;
}
private static function trimAngles(?string $value): ?string
{
if ($value === null) {
return null;
}
return trim($value, '<>');
}
/**
* @return list<MessageAddress>
*/
private static function parseAddressList(mixed $value): array
{
if (!is_array($value)) {
return [];
}
$addresses = [];
foreach ($value as $address) {
if (!is_array($address)) {
continue;
}
$addresses[] = new MessageAddress(
self::decodeMimeHeader(self::toNullableString($address[0] ?? null)),
self::toNullableString($address[2] ?? null),
self::toNullableString($address[3] ?? null),
);
}
return $addresses;
}
private static function parseBodyPart(mixed $value, string $partId): ?MessagePart
{
if (!is_array($value) || $value === []) {
return null;
}
if (is_array($value[0] ?? null)) {
$parts = [];
$index = 0;
while (isset($value[$index]) && is_array($value[$index])) {
$childPartId = $partId === '' ? (string) ($index + 1) : $partId . '.' . ($index + 1);
$child = self::parseBodyPart($value[$index], $childPartId);
if ($child !== null) {
$parts[] = $child;
}
$index++;
}
$subtype = strtolower(self::toNullableString($value[$index] ?? null) ?? 'mixed');
$parameters = self::parsePairs($value[$index + 1] ?? null);
[$disposition, $dispositionParameters] = self::parseDisposition($value[$index + 2] ?? null);
$language = self::parseStringList($value[$index + 3] ?? null);
$location = self::toNullableString($value[$index + 4] ?? null);
return new MessagePart(
$partId,
'multipart/' . $subtype,
$parameters,
null,
null,
null,
null,
$disposition,
$dispositionParameters,
$language,
$location,
null,
$parts,
);
}
$type = strtolower(self::toNullableString($value[0] ?? null) ?? 'application');
$subtype = strtolower(self::toNullableString($value[1] ?? null) ?? 'octet-stream');
$parameters = self::parsePairs($value[2] ?? null);
$contentId = self::trimAngles(self::toNullableString($value[3] ?? null));
$description = self::toNullableString($value[4] ?? null);
$encoding = self::toNullableString($value[5] ?? null);
$size = self::toOptionalInt($value[6] ?? null);
$tailOffset = in_array($type, ['text', 'message'], true) ? 8 : 7;
[$disposition, $dispositionParameters] = self::parseDisposition($value[$tailOffset + 1] ?? null);
$language = self::parseStringList($value[$tailOffset + 2] ?? null);
$location = self::toNullableString($value[$tailOffset + 3] ?? null);
return new MessagePart(
$partId === '' ? '1' : $partId,
$type . '/' . $subtype,
$parameters,
$contentId,
$description,
$encoding,
$size,
$disposition,
$dispositionParameters,
$language,
$location,
null,
[],
);
}
/**
* @param array<string, mixed> $attributes
* @return array<string, string>
*/
private static function parseBodySections(array $attributes, ?MessagePart $bodyStructure = null): array
{
$sections = [];
foreach ($attributes as $name => $value) {
if (!preg_match('/^BODY(?:\.PEEK)?\[(.*)\]$/i', $name, $matches)) {
continue;
}
if (!is_string($value)) {
continue;
}
$section = strtoupper(trim($matches[1]));
if ($section === '') {
continue;
}
if (preg_match('/^(\d+(?:\.\d+)*)\.TEXT$/', $section, $partMatches) === 1) {
$section = $partMatches[1];
}
$sections[$section] = $value;
}
if ($bodyStructure === null || !isset($sections['TEXT'])) {
return $bodyStructure === null ? $sections : self::decodeSections($sections, $bodyStructure);
}
if ($bodyStructure->isMultipart()) {
$derivedSections = self::sectionsFromBodyText($sections['TEXT'], $bodyStructure);
unset($sections['TEXT']);
foreach ($derivedSections as $section => $content) {
$sections[$section] ??= $content;
}
return self::decodeSections($sections, $bodyStructure);
}
if (str_starts_with($bodyStructure->mimeType(), 'text/')) {
$sections[$bodyStructure->partId()] ??= $sections['TEXT'];
unset($sections['TEXT']);
}
return self::decodeSections($sections, $bodyStructure);
}
/**
* @param array<string, string> $sections
* @return array<string, string>
*/
private static function decodeSections(array $sections, MessagePart $bodyStructure): array
{
$decodedSections = [];
foreach ($sections as $section => $content) {
$part = self::findBodyPart($bodyStructure, (string) $section);
if ($part === null || !str_starts_with($part->mimeType(), 'text/')) {
$decodedSections[$section] = $content;
continue;
}
$decodedSections[$section] = self::decodeSectionContent(
$content,
$part->encoding(),
$part->parameters()['charset'] ?? 'us-ascii',
);
}
return $decodedSections;
}
/**
* @return array<string, string>
*/
private static function sectionsFromBodyText(string $content, MessagePart $part): array
{
if ($part->isMultipart()) {
$boundary = $part->parameters()['boundary'] ?? '';
if ($boundary === '') {
return [];
}
$sections = [];
$segments = self::splitMultipartBody($content, $boundary);
foreach ($part->parts() as $index => $childPart) {
if (!isset($segments[$index])) {
break;
}
foreach (self::sectionsFromMimeEntity($segments[$index], $childPart) as $section => $childContent) {
$sections[$section] = $childContent;
}
}
return $sections;
}
if (!str_starts_with($part->mimeType(), 'text/')) {
return [];
}
return [$part->partId() => $content];
}
/**
* @return array<string, string>
*/
private static function sectionsFromMimeEntity(string $content, MessagePart $part): array
{
[, $body] = self::splitMimeEntity($content);
if ($part->isMultipart()) {
return self::sectionsFromBodyText($body, $part);
}
if (!str_starts_with($part->mimeType(), 'text/')) {
return [];
}
return [$part->partId() => $body];
}
private static function findBodyPart(MessagePart $part, string $section): ?MessagePart
{
if ($part->partId() === $section) {
return $part;
}
foreach ($part->parts() as $childPart) {
$match = self::findBodyPart($childPart, $section);
if ($match !== null) {
return $match;
}
}
return null;
}
/**
* @return list<string>
*/
private static function splitMultipartBody(string $content, string $boundary): array
{
$pattern = '/(?:^|\r\n|\n)--' . preg_quote($boundary, '/') . '(--)?[ \t]*(?:\r\n|\n|$)/';
if (preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE) < 1) {
return [];
}
$segments = [];
$segmentStart = null;
foreach ($matches[0] as $index => [$match, $offset]) {
if ($segmentStart !== null) {
$segments[] = substr($content, $segmentStart, $offset - $segmentStart);
}
$isClosing = isset($matches[1][$index][1])
&& $matches[1][$index][1] !== -1
&& $matches[1][$index][0] === '--';
if ($isClosing) {
break;
}
$segmentStart = $offset + strlen($match);
}
return $segments;
}
/**
* @return array{0: string, 1: string}
*/
private static function splitMimeEntity(string $content): array
{
foreach (["\r\n\r\n", "\n\n"] as $separator) {
$position = strpos($content, $separator);
if ($position === false) {
continue;
}
return [
substr($content, 0, $position),
substr($content, $position + strlen($separator)),
];
}
return ['', $content];
}
private static function decodeSectionContent(string $content, ?string $encoding, string $charset): string
{
$decoded = match (strtolower($encoding ?? '7bit')) {
'quoted-printable' => quoted_printable_decode($content),
'base64' => base64_decode($content, true) ?: '',
default => $content,
};
if ($charset === '' || in_array(strtolower($charset), ['utf-8', 'utf8'], true)) {
return mb_convert_encoding($decoded, 'UTF-8', 'UTF-8');
}
try {
$converted = mb_convert_encoding($decoded, 'UTF-8', $charset);
if ($converted !== false) {
return $converted;
}
} catch (\ValueError) {
}
$converted = @iconv($charset, 'UTF-8//TRANSLIT//IGNORE', $decoded);
$decoded = $converted !== false ? $converted : $decoded;
return mb_convert_encoding($decoded, 'UTF-8', 'UTF-8');
}
/**
* @return array<string, string>
*/
private static function parsePairs(mixed $value): array
{
if (!is_array($value)) {
return [];
}
$pairs = [];
for ($index = 0; $index < count($value); $index += 2) {
$name = self::toNullableString($value[$index] ?? null);
if ($name === null) {
continue;
}
$pairs[strtolower($name)] = self::toNullableString($value[$index + 1] ?? null) ?? '';
}
return $pairs;
}
/**
* @return array{0: ?string, 1: array<string, string>}
*/
private static function parseDisposition(mixed $value): array
{
if (!is_array($value)) {
return [null, []];
}
return [
strtolower(self::toNullableString($value[0] ?? null) ?? ''),
self::parsePairs($value[1] ?? null),
];
}
/**
* @return list<string>
*/
private static function parseStringList(mixed $value): array
{
if ($value === null) {
return [];
}
if (is_string($value)) {
return [$value];
}
if (!is_array($value)) {
return [];
}
return array_values(array_filter(array_map(
static fn (mixed $item): ?string => is_string($item) && $item !== '' ? $item : null,
$value,
)));
}
}