From 67040a196f8777838e3eb9bbf4d88715d24a405d Mon Sep 17 00:00:00 2001 From: Ashvin <76151462+ashvinctrl@users.noreply.github.com> Date: Sat, 27 Jun 2026 22:01:46 +0530 Subject: [PATCH] fix(docker): install python-magic and libmagic for upload MIME sniffing Install libmagic1 and image-scoped python-magic in the Docker image so upload MIME detection can use content sniffing. Add regression coverage for the Dockerfile dependency pair and the libmagic-present sniffing path. --- Dockerfile | 14 ++++++ tests/test_upload_content_detection_magic.py | 46 ++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 tests/test_upload_content_detection_magic.py diff --git a/Dockerfile b/Dockerfile index 221d462d1..9b305569c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,6 +32,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libgl1 \ libglib2.0-0t64 \ libxcb1 \ + libmagic1 \ && rm -rf /var/lib/apt/lists/* # libgl1/libglib2.0-0t64/libxcb1 are runtime shared libs (libGL.so.1, @@ -40,6 +41,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # and dies with `libxcb.so.1: cannot open shared object file` despite a clean # pip install. Using full opencv-python (not -headless) because basicsr/gfpgan/ # facexlib/realesrgan all depend on the `opencv-python` distribution by name. +# +# libmagic1 is the shared lib (libmagic.so.1) that python-magic dlopens for +# content-based MIME sniffing in src/upload_handler.py. We install both here +# (libmagic1 + the python-magic wrapper, below) rather than in requirements.txt +# because python-magic resolves libmagic at import time: where the lib is +# absent the import can block or raise, so keeping it image-only avoids +# regressing pip/venv installs on hosts without libmagic. Debian always has the +# lib here, so the import is instant and detection actually works. # Docker CLI (client only — daemon stays on the host via the # /var/run/docker.sock mount). The Debian `docker.io` package ships @@ -67,6 +76,11 @@ COPY requirements.txt requirements-optional.txt ./ RUN pip install --no-cache-dir -r requirements.txt \ && if [ "$INSTALL_OPTIONAL" = "true" ]; then pip install --no-cache-dir -r requirements-optional.txt; fi +# python-magic powers content-based MIME sniffing in src/upload_handler.py. +# Image-only (not in requirements.txt) because it needs the libmagic1 system +# lib installed above; see the apt note near the top of this stage. +RUN pip install --no-cache-dir python-magic==0.4.27 + # Pre-install the patched basicsr/gfpgan/facexlib wheels built in the # realesrgan-wheels stage (--no-deps keeps the image lean — torch & friends are # pulled only when realesrgan is actually installed). With these dists already diff --git a/tests/test_upload_content_detection_magic.py b/tests/test_upload_content_detection_magic.py new file mode 100644 index 000000000..d5ae6a350 --- /dev/null +++ b/tests/test_upload_content_detection_magic.py @@ -0,0 +1,46 @@ +"""Regression for #4875: the official Docker image shipped without python-magic +(and without the libmagic system lib), so content-based MIME detection in +src/upload_handler.py was dead and uploads were typed by extension only. + +python-magic resolves libmagic at import time and can block/raise when the lib +is absent, so it's installed in the Docker image (which always has libmagic1) +rather than in the shared requirements.txt. These tests pin: + 1. the Dockerfile installs both libmagic1 (apt) and python-magic (pip); + 2. when libmagic is actually present, detect_content_type sniffs the MIME + from the bytes and overrides a misleading/missing extension. +""" +import io +import os + +import pytest + +from src.upload_handler import UploadHandler + +# 1x1 PNG (header is enough for libmagic to report image/png). +_PNG = ( + b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01" + b"\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00" + b"\x01\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82" +) + +_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +def test_dockerfile_installs_libmagic_and_python_magic(): + with open(os.path.join(_REPO_ROOT, "Dockerfile"), encoding="utf-8") as f: + dockerfile = f.read() + # The C library python-magic dlopens, installed via apt... + assert "libmagic1" in dockerfile + # ...and the wrapper itself, installed via pip in the image. + assert "python-magic" in dockerfile + + +def test_content_detection_overrides_misleading_extension(tmp_path): + handler = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path)) + if handler.file_detector is None: + pytest.skip("libmagic/python-magic not installed in this environment") + + # PNG bytes behind a .bin name: extension sniffing can't help, so a correct + # image/png result proves content-based detection is doing the work. + detected = handler.detect_content_type(io.BytesIO(_PNG), "payload.bin") + assert detected == "image/png"