Initial Version

This commit is contained in:
root
2025-12-21 10:09:54 -05:00
commit 4ae6befc7b
422 changed files with 47225 additions and 0 deletions

View File

@@ -0,0 +1,219 @@
<?php
declare(strict_types=1);
/**
* SPDX-FileCopyrightText: Sebastian Krupinski <krupinski01@gmail.com>
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
namespace KTXF\Blob;
/**
* MimeTypes - MIME type and format resolution utility
*
* Provides bidirectional mapping between MIME types and file format identifiers.
*/
class MimeTypes {
/** Default MIME type for unknown/binary content */
public const MIME_BINARY = 'application/octet-stream';
/** Default format for unknown/binary content */
public const FORMAT_BINARY = 'binary';
/**
* MIME type to format mapping
*/
private const MIME_TO_FORMAT = [
// Images
'image/jpeg' => 'jpeg',
'image/png' => 'png',
'image/gif' => 'gif',
'image/webp' => 'webp',
'image/bmp' => 'bmp',
'image/x-ms-bmp' => 'bmp',
'image/tiff' => 'tiff',
'image/x-icon' => 'ico',
'image/vnd.microsoft.icon' => 'ico',
'image/svg+xml' => 'svg',
'image/heic' => 'heic',
'image/heif' => 'heif',
'image/avif' => 'avif',
// Documents
'application/pdf' => 'pdf',
'application/rtf' => 'rtf',
'text/rtf' => 'rtf',
'application/msword' => 'doc',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
'application/vnd.ms-excel' => 'xls',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlsx',
'application/vnd.ms-powerpoint' => 'ppt',
'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pptx',
'application/vnd.oasis.opendocument.text' => 'odt',
'application/vnd.oasis.opendocument.spreadsheet' => 'ods',
'application/vnd.oasis.opendocument.presentation' => 'odp',
// Archives
'application/zip' => 'zip',
'application/x-zip-compressed' => 'zip',
'application/gzip' => 'gzip',
'application/x-gzip' => 'gzip',
'application/x-bzip2' => 'bzip2',
'application/x-xz' => 'xz',
'application/x-rar-compressed' => 'rar',
'application/vnd.rar' => 'rar',
'application/x-7z-compressed' => '7z',
'application/x-tar' => 'tar',
// Audio
'audio/mpeg' => 'mp3',
'audio/mp3' => 'mp3',
'audio/ogg' => 'ogg',
'audio/flac' => 'flac',
'audio/x-flac' => 'flac',
'audio/wav' => 'wav',
'audio/x-wav' => 'wav',
'audio/aac' => 'aac',
'audio/mp4' => 'm4a',
'audio/x-m4a' => 'm4a',
'audio/webm' => 'webm',
// Video
'video/mp4' => 'mp4',
'video/webm' => 'webm',
'video/x-msvideo' => 'avi',
'video/mpeg' => 'mpeg',
'video/quicktime' => 'mov',
'video/x-matroska' => 'mkv',
'video/x-flv' => 'flv',
'video/3gpp' => '3gp',
// Fonts
'font/woff' => 'woff',
'font/woff2' => 'woff2',
'font/ttf' => 'ttf',
'font/otf' => 'otf',
'application/font-woff' => 'woff',
'application/font-woff2' => 'woff2',
'application/x-font-ttf' => 'ttf',
'application/x-font-otf' => 'otf',
// Text/Code
'text/plain' => 'text',
'text/html' => 'html',
'text/css' => 'css',
'text/csv' => 'csv',
'text/xml' => 'xml',
'application/xml' => 'xml',
'application/json' => 'json',
'application/javascript' => 'js',
'text/javascript' => 'js',
'application/x-httpd-php' => 'php',
'text/x-php' => 'php',
'text/markdown' => 'md',
'text/x-python' => 'py',
'application/x-python-code' => 'py',
// Other
'application/epub+zip' => 'epub',
'application/x-sqlite3' => 'sqlite',
'application/wasm' => 'wasm',
'application/octet-stream' => 'binary',
];
/** Cached reverse mapping (format -> mime) */
private static ?array $formatToMime = null;
/**
* Get format from MIME type
*
* @param string $mime MIME type
* @return string|null Format or null if not found
*/
public static function toFormat(string $mime): ?string {
return self::MIME_TO_FORMAT[$mime] ?? null;
}
/**
* Get MIME type from format
*
* @param string $format Format identifier
* @return string|null MIME type or null if not found
*/
public static function toMime(string $format): ?string {
if (self::$formatToMime === null) {
self::$formatToMime = [];
foreach (self::MIME_TO_FORMAT as $mime => $fmt) {
// Keep first occurrence (most canonical MIME type)
if (!isset(self::$formatToMime[$fmt])) {
self::$formatToMime[$fmt] = $mime;
}
}
}
return self::$formatToMime[$format] ?? null;
}
/**
* Extract format from MIME type string (with fallback parsing)
*
* @param string $mime MIME type
* @return string|null Format or null
*/
public static function parseFormat(string $mime): ?string {
// Check direct mapping first
if (isset(self::MIME_TO_FORMAT[$mime])) {
return self::MIME_TO_FORMAT[$mime];
}
// Try to extract from MIME subtype (e.g., "image/jpeg" -> "jpeg")
$parts = explode('/', $mime, 2);
if (count($parts) === 2) {
$subtype = $parts[1];
// Remove x- prefix and any parameters
$subtype = preg_replace('/^x-/', '', $subtype);
$subtype = explode(';', $subtype)[0];
$subtype = explode('+', $subtype)[0];
if (strlen($subtype) > 0 && strlen($subtype) <= 10) {
return strtolower($subtype);
}
}
return null;
}
/**
* Check if MIME type is known
*
* @param string $mime MIME type
* @return bool
*/
public static function isKnownMime(string $mime): bool {
return isset(self::MIME_TO_FORMAT[$mime]);
}
/**
* Check if format is known
*
* @param string $format Format identifier
* @return bool
*/
public static function isKnownFormat(string $format): bool {
if (self::$formatToMime === null) {
self::toMime($format); // Initialize cache
}
return isset(self::$formatToMime[$format]);
}
/**
* Get all known MIME types
*
* @return array<string, string> MIME type to format mapping
*/
public static function all(): array {
return self::MIME_TO_FORMAT;
}
}

View File

@@ -0,0 +1,230 @@
<?php
declare(strict_types=1);
/**
* SPDX-FileCopyrightText: Sebastian Krupinski <krupinski01@gmail.com>
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
namespace KTXF\Blob;
use finfo;
/**
* Signature - Analyzes binary content to determine MIME type and format
*
* This utility only requires the first bytes of a file to detect its format,
* making it compatible with streams, chunked uploads, and remote storage backends like S3.
*
* Uses PHP's built-in finfo extension (libmagic) for reliable detection with
* fallback to custom magic byte detection if finfo is unavailable.
*/
class Signature {
/** Minimum bytes needed for reliable detection */
public const HEADER_SIZE = 256;
/**
* Fallback magic byte signatures for when finfo is unavailable
*/
private const SIGNATURES = [
['offset' => 0, 'bytes' => 'FFD8FF', 'format' => 'jpeg'],
['offset' => 0, 'bytes' => '89504E470D0A1A0A', 'format' => 'png'],
['offset' => 0, 'bytes' => '47494638', 'format' => 'gif'],
['offset' => 0, 'bytes' => '25504446', 'format' => 'pdf'],
['offset' => 0, 'bytes' => '504B0304', 'format' => 'zip'],
['offset' => 0, 'bytes' => '1F8B08', 'format' => 'gzip'],
['offset' => 4, 'bytes' => '66747970', 'format' => 'mp4'],
['offset' => 0, 'bytes' => '494433', 'format' => 'mp3'],
['offset' => 0, 'bytes' => 'FFFB', 'format' => 'mp3'],
['offset' => 0, 'bytes' => '52494646', 'format' => 'riff'], // WAV/AVI/WEBP
];
/** Cached finfo instance */
private static ?finfo $finfo = null;
/**
* Detect both MIME type and format from content bytes in a single operation
*
* @param string $headerBytes First bytes of the file content (256 recommended)
* @return array{mime: string, format: string} Array with 'mime' and 'format' keys
*/
public static function detect(string $headerBytes): array {
if (strlen($headerBytes) === 0) {
return ['mime' => MimeTypes::MIME_BINARY, 'format' => MimeTypes::FORMAT_BINARY];
}
$mime = null;
$format = null;
// Try finfo first (most reliable)
if (extension_loaded('fileinfo')) {
$mime = self::detectMimeType($headerBytes);
if ($mime !== null) {
// Get format from MIME
$format = MimeTypes::toFormat($mime);
if ($format === null && $mime !== MimeTypes::MIME_BINARY) {
$format = MimeTypes::parseFormat($mime);
}
}
}
// Fallback to magic bytes if format not determined
if ($format === null) {
$format = self::detectFromMagicBytes($headerBytes);
}
// Ensure MIME type is set
if ($mime === null || $mime === MimeTypes::MIME_BINARY) {
$mime = MimeTypes::toMime($format) ?? MimeTypes::MIME_BINARY;
}
return ['mime' => $mime, 'format' => $format];
}
/**
* Detect both MIME type and format from a stream in a single operation
*
* @param resource $stream File stream
* @return array{mime: string, format: string} Array with 'mime' and 'format' keys
*/
public static function detectFromStream($stream): array {
$position = ftell($stream);
$headerBytes = fread($stream, self::HEADER_SIZE);
fseek($stream, $position);
if ($headerBytes === false || $headerBytes === '') {
return ['mime' => MimeTypes::MIME_BINARY, 'format' => MimeTypes::FORMAT_BINARY];
}
return self::detect($headerBytes);
}
/**
* Detect file format from content bytes
*
* @param string $headerBytes First bytes of the file content (256 recommended)
* @return string Detected format (e.g., 'jpeg', 'png', 'pdf') or 'binary' if unknown
*/
public static function detectFormat(string $headerBytes): string {
return self::detect($headerBytes)['format'];
}
/**
* Detect MIME type from content bytes using finfo
*
* @param string $headerBytes Content bytes
* @return string|null MIME type or null on failure
*/
public static function detectMimeType(string $headerBytes): ?string {
if (!extension_loaded('fileinfo')) {
return null;
}
if (self::$finfo === null) {
self::$finfo = new finfo(FILEINFO_MIME_TYPE);
}
$mime = self::$finfo->buffer($headerBytes);
return $mime !== false ? $mime : null;
}
/**
* Detect file format from a stream
*
* Reads the header bytes, detects format, and rewinds the stream.
*
* @param resource $stream File stream
* @return string Detected format
*/
public static function detectFormatFromStream($stream): string {
$position = ftell($stream);
$headerBytes = fread($stream, self::HEADER_SIZE);
fseek($stream, $position);
if ($headerBytes === false || $headerBytes === '') {
return MimeTypes::FORMAT_BINARY;
}
return self::detectFormat($headerBytes);
}
/**
* Detect MIME type from a stream
*
* @param resource $stream File stream
* @return string|null MIME type or null
*/
public static function detectMimeTypeFromStream($stream): ?string {
$position = ftell($stream);
$headerBytes = fread($stream, self::HEADER_SIZE);
fseek($stream, $position);
if ($headerBytes === false || $headerBytes === '') {
return null;
}
return self::detectMimeType($headerBytes);
}
/**
* Fallback detection using magic bytes
*
* @param string $headerBytes Content bytes
* @return string Detected format or 'binary'
*/
private static function detectFromMagicBytes(string $headerBytes): string {
$headerHex = strtoupper(bin2hex($headerBytes));
foreach (self::SIGNATURES as $sig) {
$offset = $sig['offset'] * 2;
$sigBytes = strtoupper($sig['bytes']);
$sigLength = strlen($sigBytes);
if (strlen($headerHex) < $offset + $sigLength) {
continue;
}
$slice = substr($headerHex, $offset, $sigLength);
if ($slice === $sigBytes) {
return $sig['format'];
}
}
// Check if likely text
if (self::isLikelyText($headerBytes)) {
return 'text';
}
return MimeTypes::FORMAT_BINARY;
}
/**
* Check if content appears to be text
*
* @param string $bytes Content bytes
* @return bool
*/
private static function isLikelyText(string $bytes): bool {
// Check for UTF-8 BOM
if (str_starts_with($bytes, "\xEF\xBB\xBF")) {
return true;
}
$length = min(strlen($bytes), 256);
$printableCount = 0;
for ($i = 0; $i < $length; $i++) {
$byte = ord($bytes[$i]);
if (($byte >= 32 && $byte <= 126) || $byte === 9 || $byte === 10 || $byte === 13) {
$printableCount++;
} elseif ($byte >= 128 && $byte <= 247) {
$printableCount++; // UTF-8 bytes
}
}
return ($printableCount / $length) > 0.9;
}
}