* SPDX-License-Identifier: AGPL-3.0-or-later */ namespace KTXF\Blob; use finfo; /** * Signature - Analyzes binary content to determine MIME type and format * * This utility only requires the first bytes of a file to detect its format, * making it compatible with streams, chunked uploads, and remote storage backends like S3. * * Uses PHP's built-in finfo extension (libmagic) for reliable detection with * fallback to custom magic byte detection if finfo is unavailable. */ class Signature { /** Minimum bytes needed for reliable detection */ public const HEADER_SIZE = 256; /** * Fallback magic byte signatures for when finfo is unavailable */ private const SIGNATURES = [ ['offset' => 0, 'bytes' => 'FFD8FF', 'format' => 'jpeg'], ['offset' => 0, 'bytes' => '89504E470D0A1A0A', 'format' => 'png'], ['offset' => 0, 'bytes' => '47494638', 'format' => 'gif'], ['offset' => 0, 'bytes' => '25504446', 'format' => 'pdf'], ['offset' => 0, 'bytes' => '504B0304', 'format' => 'zip'], ['offset' => 0, 'bytes' => '1F8B08', 'format' => 'gzip'], ['offset' => 4, 'bytes' => '66747970', 'format' => 'mp4'], ['offset' => 0, 'bytes' => '494433', 'format' => 'mp3'], ['offset' => 0, 'bytes' => 'FFFB', 'format' => 'mp3'], ['offset' => 0, 'bytes' => '52494646', 'format' => 'riff'], // WAV/AVI/WEBP ]; /** Cached finfo instance */ private static ?finfo $finfo = null; /** * Detect both MIME type and format from content bytes in a single operation * * @param string $headerBytes First bytes of the file content (256 recommended) * @return array{mime: string, format: string} Array with 'mime' and 'format' keys */ public static function detect(string $headerBytes): array { if (strlen($headerBytes) === 0) { return ['mime' => MimeTypes::MIME_BINARY, 'format' => MimeTypes::FORMAT_BINARY]; } $mime = null; $format = null; // Try finfo first (most reliable) if (extension_loaded('fileinfo')) { $mime = self::detectMimeType($headerBytes); if ($mime !== null) { // Get format from MIME $format = MimeTypes::toFormat($mime); if ($format === null && $mime !== MimeTypes::MIME_BINARY) { $format = MimeTypes::parseFormat($mime); } } } // Fallback to magic bytes if format not determined if ($format === null) { $format = self::detectFromMagicBytes($headerBytes); } // Ensure MIME type is set if ($mime === null || $mime === MimeTypes::MIME_BINARY) { $mime = MimeTypes::toMime($format) ?? MimeTypes::MIME_BINARY; } return ['mime' => $mime, 'format' => $format]; } /** * Detect both MIME type and format from a stream in a single operation * * @param resource $stream File stream * @return array{mime: string, format: string} Array with 'mime' and 'format' keys */ public static function detectFromStream($stream): array { $position = ftell($stream); $headerBytes = fread($stream, self::HEADER_SIZE); fseek($stream, $position); if ($headerBytes === false || $headerBytes === '') { return ['mime' => MimeTypes::MIME_BINARY, 'format' => MimeTypes::FORMAT_BINARY]; } return self::detect($headerBytes); } /** * Detect file format from content bytes * * @param string $headerBytes First bytes of the file content (256 recommended) * @return string Detected format (e.g., 'jpeg', 'png', 'pdf') or 'binary' if unknown */ public static function detectFormat(string $headerBytes): string { return self::detect($headerBytes)['format']; } /** * Detect MIME type from content bytes using finfo * * @param string $headerBytes Content bytes * @return string|null MIME type or null on failure */ public static function detectMimeType(string $headerBytes): ?string { if (!extension_loaded('fileinfo')) { return null; } if (self::$finfo === null) { self::$finfo = new finfo(FILEINFO_MIME_TYPE); } $mime = self::$finfo->buffer($headerBytes); return $mime !== false ? $mime : null; } /** * Detect file format from a stream * * Reads the header bytes, detects format, and rewinds the stream. * * @param resource $stream File stream * @return string Detected format */ public static function detectFormatFromStream($stream): string { $position = ftell($stream); $headerBytes = fread($stream, self::HEADER_SIZE); fseek($stream, $position); if ($headerBytes === false || $headerBytes === '') { return MimeTypes::FORMAT_BINARY; } return self::detectFormat($headerBytes); } /** * Detect MIME type from a stream * * @param resource $stream File stream * @return string|null MIME type or null */ public static function detectMimeTypeFromStream($stream): ?string { $position = ftell($stream); $headerBytes = fread($stream, self::HEADER_SIZE); fseek($stream, $position); if ($headerBytes === false || $headerBytes === '') { return null; } return self::detectMimeType($headerBytes); } /** * Fallback detection using magic bytes * * @param string $headerBytes Content bytes * @return string Detected format or 'binary' */ private static function detectFromMagicBytes(string $headerBytes): string { $headerHex = strtoupper(bin2hex($headerBytes)); foreach (self::SIGNATURES as $sig) { $offset = $sig['offset'] * 2; $sigBytes = strtoupper($sig['bytes']); $sigLength = strlen($sigBytes); if (strlen($headerHex) < $offset + $sigLength) { continue; } $slice = substr($headerHex, $offset, $sigLength); if ($slice === $sigBytes) { return $sig['format']; } } // Check if likely text if (self::isLikelyText($headerBytes)) { return 'text'; } return MimeTypes::FORMAT_BINARY; } /** * Check if content appears to be text * * @param string $bytes Content bytes * @return bool */ private static function isLikelyText(string $bytes): bool { // Check for UTF-8 BOM if (str_starts_with($bytes, "\xEF\xBB\xBF")) { return true; } $length = min(strlen($bytes), 256); $printableCount = 0; for ($i = 0; $i < $length; $i++) { $byte = ord($bytes[$i]); if (($byte >= 32 && $byte <= 126) || $byte === 9 || $byte === 10 || $byte === 13) { $printableCount++; } elseif ($byte >= 128 && $byte <= 247) { $printableCount++; // UTF-8 bytes } } return ($printableCount / $length) > 0.9; } }