Initial Version
This commit is contained in:
219
shared/lib/Blob/MimeTypes.php
Normal file
219
shared/lib/Blob/MimeTypes.php
Normal file
@@ -0,0 +1,219 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* SPDX-FileCopyrightText: Sebastian Krupinski <krupinski01@gmail.com>
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
namespace KTXF\Blob;
|
||||
|
||||
/**
|
||||
* MimeTypes - MIME type and format resolution utility
|
||||
*
|
||||
* Provides bidirectional mapping between MIME types and file format identifiers.
|
||||
*/
|
||||
class MimeTypes {
|
||||
|
||||
/** Default MIME type for unknown/binary content */
|
||||
public const MIME_BINARY = 'application/octet-stream';
|
||||
|
||||
/** Default format for unknown/binary content */
|
||||
public const FORMAT_BINARY = 'binary';
|
||||
|
||||
/**
|
||||
* MIME type to format mapping
|
||||
*/
|
||||
private const MIME_TO_FORMAT = [
|
||||
// Images
|
||||
'image/jpeg' => 'jpeg',
|
||||
'image/png' => 'png',
|
||||
'image/gif' => 'gif',
|
||||
'image/webp' => 'webp',
|
||||
'image/bmp' => 'bmp',
|
||||
'image/x-ms-bmp' => 'bmp',
|
||||
'image/tiff' => 'tiff',
|
||||
'image/x-icon' => 'ico',
|
||||
'image/vnd.microsoft.icon' => 'ico',
|
||||
'image/svg+xml' => 'svg',
|
||||
'image/heic' => 'heic',
|
||||
'image/heif' => 'heif',
|
||||
'image/avif' => 'avif',
|
||||
|
||||
// Documents
|
||||
'application/pdf' => 'pdf',
|
||||
'application/rtf' => 'rtf',
|
||||
'text/rtf' => 'rtf',
|
||||
'application/msword' => 'doc',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
|
||||
'application/vnd.ms-excel' => 'xls',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlsx',
|
||||
'application/vnd.ms-powerpoint' => 'ppt',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pptx',
|
||||
'application/vnd.oasis.opendocument.text' => 'odt',
|
||||
'application/vnd.oasis.opendocument.spreadsheet' => 'ods',
|
||||
'application/vnd.oasis.opendocument.presentation' => 'odp',
|
||||
|
||||
// Archives
|
||||
'application/zip' => 'zip',
|
||||
'application/x-zip-compressed' => 'zip',
|
||||
'application/gzip' => 'gzip',
|
||||
'application/x-gzip' => 'gzip',
|
||||
'application/x-bzip2' => 'bzip2',
|
||||
'application/x-xz' => 'xz',
|
||||
'application/x-rar-compressed' => 'rar',
|
||||
'application/vnd.rar' => 'rar',
|
||||
'application/x-7z-compressed' => '7z',
|
||||
'application/x-tar' => 'tar',
|
||||
|
||||
// Audio
|
||||
'audio/mpeg' => 'mp3',
|
||||
'audio/mp3' => 'mp3',
|
||||
'audio/ogg' => 'ogg',
|
||||
'audio/flac' => 'flac',
|
||||
'audio/x-flac' => 'flac',
|
||||
'audio/wav' => 'wav',
|
||||
'audio/x-wav' => 'wav',
|
||||
'audio/aac' => 'aac',
|
||||
'audio/mp4' => 'm4a',
|
||||
'audio/x-m4a' => 'm4a',
|
||||
'audio/webm' => 'webm',
|
||||
|
||||
// Video
|
||||
'video/mp4' => 'mp4',
|
||||
'video/webm' => 'webm',
|
||||
'video/x-msvideo' => 'avi',
|
||||
'video/mpeg' => 'mpeg',
|
||||
'video/quicktime' => 'mov',
|
||||
'video/x-matroska' => 'mkv',
|
||||
'video/x-flv' => 'flv',
|
||||
'video/3gpp' => '3gp',
|
||||
|
||||
// Fonts
|
||||
'font/woff' => 'woff',
|
||||
'font/woff2' => 'woff2',
|
||||
'font/ttf' => 'ttf',
|
||||
'font/otf' => 'otf',
|
||||
'application/font-woff' => 'woff',
|
||||
'application/font-woff2' => 'woff2',
|
||||
'application/x-font-ttf' => 'ttf',
|
||||
'application/x-font-otf' => 'otf',
|
||||
|
||||
// Text/Code
|
||||
'text/plain' => 'text',
|
||||
'text/html' => 'html',
|
||||
'text/css' => 'css',
|
||||
'text/csv' => 'csv',
|
||||
'text/xml' => 'xml',
|
||||
'application/xml' => 'xml',
|
||||
'application/json' => 'json',
|
||||
'application/javascript' => 'js',
|
||||
'text/javascript' => 'js',
|
||||
'application/x-httpd-php' => 'php',
|
||||
'text/x-php' => 'php',
|
||||
'text/markdown' => 'md',
|
||||
'text/x-python' => 'py',
|
||||
'application/x-python-code' => 'py',
|
||||
|
||||
// Other
|
||||
'application/epub+zip' => 'epub',
|
||||
'application/x-sqlite3' => 'sqlite',
|
||||
'application/wasm' => 'wasm',
|
||||
'application/octet-stream' => 'binary',
|
||||
];
|
||||
|
||||
/** Cached reverse mapping (format -> mime) */
|
||||
private static ?array $formatToMime = null;
|
||||
|
||||
/**
|
||||
* Get format from MIME type
|
||||
*
|
||||
* @param string $mime MIME type
|
||||
* @return string|null Format or null if not found
|
||||
*/
|
||||
public static function toFormat(string $mime): ?string {
|
||||
return self::MIME_TO_FORMAT[$mime] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get MIME type from format
|
||||
*
|
||||
* @param string $format Format identifier
|
||||
* @return string|null MIME type or null if not found
|
||||
*/
|
||||
public static function toMime(string $format): ?string {
|
||||
if (self::$formatToMime === null) {
|
||||
self::$formatToMime = [];
|
||||
foreach (self::MIME_TO_FORMAT as $mime => $fmt) {
|
||||
// Keep first occurrence (most canonical MIME type)
|
||||
if (!isset(self::$formatToMime[$fmt])) {
|
||||
self::$formatToMime[$fmt] = $mime;
|
||||
}
|
||||
}
|
||||
}
|
||||
return self::$formatToMime[$format] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract format from MIME type string (with fallback parsing)
|
||||
*
|
||||
* @param string $mime MIME type
|
||||
* @return string|null Format or null
|
||||
*/
|
||||
public static function parseFormat(string $mime): ?string {
|
||||
// Check direct mapping first
|
||||
if (isset(self::MIME_TO_FORMAT[$mime])) {
|
||||
return self::MIME_TO_FORMAT[$mime];
|
||||
}
|
||||
|
||||
// Try to extract from MIME subtype (e.g., "image/jpeg" -> "jpeg")
|
||||
$parts = explode('/', $mime, 2);
|
||||
if (count($parts) === 2) {
|
||||
$subtype = $parts[1];
|
||||
// Remove x- prefix and any parameters
|
||||
$subtype = preg_replace('/^x-/', '', $subtype);
|
||||
$subtype = explode(';', $subtype)[0];
|
||||
$subtype = explode('+', $subtype)[0];
|
||||
|
||||
if (strlen($subtype) > 0 && strlen($subtype) <= 10) {
|
||||
return strtolower($subtype);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if MIME type is known
|
||||
*
|
||||
* @param string $mime MIME type
|
||||
* @return bool
|
||||
*/
|
||||
public static function isKnownMime(string $mime): bool {
|
||||
return isset(self::MIME_TO_FORMAT[$mime]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if format is known
|
||||
*
|
||||
* @param string $format Format identifier
|
||||
* @return bool
|
||||
*/
|
||||
public static function isKnownFormat(string $format): bool {
|
||||
if (self::$formatToMime === null) {
|
||||
self::toMime($format); // Initialize cache
|
||||
}
|
||||
return isset(self::$formatToMime[$format]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all known MIME types
|
||||
*
|
||||
* @return array<string, string> MIME type to format mapping
|
||||
*/
|
||||
public static function all(): array {
|
||||
return self::MIME_TO_FORMAT;
|
||||
}
|
||||
|
||||
}
|
||||
230
shared/lib/Blob/Signature.php
Normal file
230
shared/lib/Blob/Signature.php
Normal file
@@ -0,0 +1,230 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* SPDX-FileCopyrightText: Sebastian Krupinski <krupinski01@gmail.com>
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
namespace KTXF\Blob;
|
||||
|
||||
use finfo;
|
||||
|
||||
/**
|
||||
* Signature - Analyzes binary content to determine MIME type and format
|
||||
*
|
||||
* This utility only requires the first bytes of a file to detect its format,
|
||||
* making it compatible with streams, chunked uploads, and remote storage backends like S3.
|
||||
*
|
||||
* Uses PHP's built-in finfo extension (libmagic) for reliable detection with
|
||||
* fallback to custom magic byte detection if finfo is unavailable.
|
||||
*/
|
||||
class Signature {
|
||||
|
||||
/** Minimum bytes needed for reliable detection */
|
||||
public const HEADER_SIZE = 256;
|
||||
|
||||
/**
|
||||
* Fallback magic byte signatures for when finfo is unavailable
|
||||
*/
|
||||
private const SIGNATURES = [
|
||||
['offset' => 0, 'bytes' => 'FFD8FF', 'format' => 'jpeg'],
|
||||
['offset' => 0, 'bytes' => '89504E470D0A1A0A', 'format' => 'png'],
|
||||
['offset' => 0, 'bytes' => '47494638', 'format' => 'gif'],
|
||||
['offset' => 0, 'bytes' => '25504446', 'format' => 'pdf'],
|
||||
['offset' => 0, 'bytes' => '504B0304', 'format' => 'zip'],
|
||||
['offset' => 0, 'bytes' => '1F8B08', 'format' => 'gzip'],
|
||||
['offset' => 4, 'bytes' => '66747970', 'format' => 'mp4'],
|
||||
['offset' => 0, 'bytes' => '494433', 'format' => 'mp3'],
|
||||
['offset' => 0, 'bytes' => 'FFFB', 'format' => 'mp3'],
|
||||
['offset' => 0, 'bytes' => '52494646', 'format' => 'riff'], // WAV/AVI/WEBP
|
||||
];
|
||||
|
||||
/** Cached finfo instance */
|
||||
private static ?finfo $finfo = null;
|
||||
|
||||
/**
|
||||
* Detect both MIME type and format from content bytes in a single operation
|
||||
*
|
||||
* @param string $headerBytes First bytes of the file content (256 recommended)
|
||||
* @return array{mime: string, format: string} Array with 'mime' and 'format' keys
|
||||
*/
|
||||
public static function detect(string $headerBytes): array {
|
||||
if (strlen($headerBytes) === 0) {
|
||||
return ['mime' => MimeTypes::MIME_BINARY, 'format' => MimeTypes::FORMAT_BINARY];
|
||||
}
|
||||
|
||||
$mime = null;
|
||||
$format = null;
|
||||
|
||||
// Try finfo first (most reliable)
|
||||
if (extension_loaded('fileinfo')) {
|
||||
$mime = self::detectMimeType($headerBytes);
|
||||
if ($mime !== null) {
|
||||
// Get format from MIME
|
||||
$format = MimeTypes::toFormat($mime);
|
||||
if ($format === null && $mime !== MimeTypes::MIME_BINARY) {
|
||||
$format = MimeTypes::parseFormat($mime);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to magic bytes if format not determined
|
||||
if ($format === null) {
|
||||
$format = self::detectFromMagicBytes($headerBytes);
|
||||
}
|
||||
|
||||
// Ensure MIME type is set
|
||||
if ($mime === null || $mime === MimeTypes::MIME_BINARY) {
|
||||
$mime = MimeTypes::toMime($format) ?? MimeTypes::MIME_BINARY;
|
||||
}
|
||||
|
||||
return ['mime' => $mime, 'format' => $format];
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect both MIME type and format from a stream in a single operation
|
||||
*
|
||||
* @param resource $stream File stream
|
||||
* @return array{mime: string, format: string} Array with 'mime' and 'format' keys
|
||||
*/
|
||||
public static function detectFromStream($stream): array {
|
||||
$position = ftell($stream);
|
||||
$headerBytes = fread($stream, self::HEADER_SIZE);
|
||||
fseek($stream, $position);
|
||||
|
||||
if ($headerBytes === false || $headerBytes === '') {
|
||||
return ['mime' => MimeTypes::MIME_BINARY, 'format' => MimeTypes::FORMAT_BINARY];
|
||||
}
|
||||
|
||||
return self::detect($headerBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect file format from content bytes
|
||||
*
|
||||
* @param string $headerBytes First bytes of the file content (256 recommended)
|
||||
* @return string Detected format (e.g., 'jpeg', 'png', 'pdf') or 'binary' if unknown
|
||||
*/
|
||||
public static function detectFormat(string $headerBytes): string {
|
||||
return self::detect($headerBytes)['format'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect MIME type from content bytes using finfo
|
||||
*
|
||||
* @param string $headerBytes Content bytes
|
||||
* @return string|null MIME type or null on failure
|
||||
*/
|
||||
public static function detectMimeType(string $headerBytes): ?string {
|
||||
if (!extension_loaded('fileinfo')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (self::$finfo === null) {
|
||||
self::$finfo = new finfo(FILEINFO_MIME_TYPE);
|
||||
}
|
||||
|
||||
$mime = self::$finfo->buffer($headerBytes);
|
||||
return $mime !== false ? $mime : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect file format from a stream
|
||||
*
|
||||
* Reads the header bytes, detects format, and rewinds the stream.
|
||||
*
|
||||
* @param resource $stream File stream
|
||||
* @return string Detected format
|
||||
*/
|
||||
public static function detectFormatFromStream($stream): string {
|
||||
$position = ftell($stream);
|
||||
$headerBytes = fread($stream, self::HEADER_SIZE);
|
||||
fseek($stream, $position);
|
||||
|
||||
if ($headerBytes === false || $headerBytes === '') {
|
||||
return MimeTypes::FORMAT_BINARY;
|
||||
}
|
||||
|
||||
return self::detectFormat($headerBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect MIME type from a stream
|
||||
*
|
||||
* @param resource $stream File stream
|
||||
* @return string|null MIME type or null
|
||||
*/
|
||||
public static function detectMimeTypeFromStream($stream): ?string {
|
||||
$position = ftell($stream);
|
||||
$headerBytes = fread($stream, self::HEADER_SIZE);
|
||||
fseek($stream, $position);
|
||||
|
||||
if ($headerBytes === false || $headerBytes === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
return self::detectMimeType($headerBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback detection using magic bytes
|
||||
*
|
||||
* @param string $headerBytes Content bytes
|
||||
* @return string Detected format or 'binary'
|
||||
*/
|
||||
private static function detectFromMagicBytes(string $headerBytes): string {
|
||||
$headerHex = strtoupper(bin2hex($headerBytes));
|
||||
|
||||
foreach (self::SIGNATURES as $sig) {
|
||||
$offset = $sig['offset'] * 2;
|
||||
$sigBytes = strtoupper($sig['bytes']);
|
||||
$sigLength = strlen($sigBytes);
|
||||
|
||||
if (strlen($headerHex) < $offset + $sigLength) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$slice = substr($headerHex, $offset, $sigLength);
|
||||
if ($slice === $sigBytes) {
|
||||
return $sig['format'];
|
||||
}
|
||||
}
|
||||
|
||||
// Check if likely text
|
||||
if (self::isLikelyText($headerBytes)) {
|
||||
return 'text';
|
||||
}
|
||||
|
||||
return MimeTypes::FORMAT_BINARY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if content appears to be text
|
||||
*
|
||||
* @param string $bytes Content bytes
|
||||
* @return bool
|
||||
*/
|
||||
private static function isLikelyText(string $bytes): bool {
|
||||
// Check for UTF-8 BOM
|
||||
if (str_starts_with($bytes, "\xEF\xBB\xBF")) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$length = min(strlen($bytes), 256);
|
||||
$printableCount = 0;
|
||||
|
||||
for ($i = 0; $i < $length; $i++) {
|
||||
$byte = ord($bytes[$i]);
|
||||
if (($byte >= 32 && $byte <= 126) || $byte === 9 || $byte === 10 || $byte === 13) {
|
||||
$printableCount++;
|
||||
} elseif ($byte >= 128 && $byte <= 247) {
|
||||
$printableCount++; // UTF-8 bytes
|
||||
}
|
||||
}
|
||||
|
||||
return ($printableCount / $length) > 0.9;
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user