fix(docker): install python-magic and libmagic for upload MIME sniffing

Install libmagic1 and image-scoped python-magic in the Docker image so upload MIME detection can use content sniffing. Add regression coverage for the Dockerfile dependency pair and the libmagic-present sniffing path.
This commit is contained in:
Ashvin
2026-06-27 22:01:46 +05:30
committed by GitHub
parent 497c391f84
commit 67040a196f
2 changed files with 60 additions and 0 deletions
+14
View File
@@ -32,6 +32,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgl1 \
libglib2.0-0t64 \
libxcb1 \
libmagic1 \
&& rm -rf /var/lib/apt/lists/*
# libgl1/libglib2.0-0t64/libxcb1 are runtime shared libs (libGL.so.1,
@@ -40,6 +41,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# and dies with `libxcb.so.1: cannot open shared object file` despite a clean
# pip install. Using full opencv-python (not -headless) because basicsr/gfpgan/
# facexlib/realesrgan all depend on the `opencv-python` distribution by name.
#
# libmagic1 is the shared lib (libmagic.so.1) that python-magic dlopens for
# content-based MIME sniffing in src/upload_handler.py. We install both here
# (libmagic1 + the python-magic wrapper, below) rather than in requirements.txt
# because python-magic resolves libmagic at import time: where the lib is
# absent the import can block or raise, so keeping it image-only avoids
# regressing pip/venv installs on hosts without libmagic. Debian always has the
# lib here, so the import is instant and detection actually works.
# Docker CLI (client only — daemon stays on the host via the
# /var/run/docker.sock mount). The Debian `docker.io` package ships
@@ -67,6 +76,11 @@ COPY requirements.txt requirements-optional.txt ./
RUN pip install --no-cache-dir -r requirements.txt \
&& if [ "$INSTALL_OPTIONAL" = "true" ]; then pip install --no-cache-dir -r requirements-optional.txt; fi
# python-magic powers content-based MIME sniffing in src/upload_handler.py.
# Image-only (not in requirements.txt) because it needs the libmagic1 system
# lib installed above; see the apt note near the top of this stage.
RUN pip install --no-cache-dir python-magic==0.4.27
# Pre-install the patched basicsr/gfpgan/facexlib wheels built in the
# realesrgan-wheels stage (--no-deps keeps the image lean — torch & friends are
# pulled only when realesrgan is actually installed). With these dists already
@@ -0,0 +1,46 @@
"""Regression for #4875: the official Docker image shipped without python-magic
(and without the libmagic system lib), so content-based MIME detection in
src/upload_handler.py was dead and uploads were typed by extension only.
python-magic resolves libmagic at import time and can block/raise when the lib
is absent, so it's installed in the Docker image (which always has libmagic1)
rather than in the shared requirements.txt. These tests pin:
1. the Dockerfile installs both libmagic1 (apt) and python-magic (pip);
2. when libmagic is actually present, detect_content_type sniffs the MIME
from the bytes and overrides a misleading/missing extension.
"""
import io
import os
import pytest
from src.upload_handler import UploadHandler
# 1x1 PNG (header is enough for libmagic to report image/png).
_PNG = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
b"\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00"
b"\x01\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82"
)
_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def test_dockerfile_installs_libmagic_and_python_magic():
with open(os.path.join(_REPO_ROOT, "Dockerfile"), encoding="utf-8") as f:
dockerfile = f.read()
# The C library python-magic dlopens, installed via apt...
assert "libmagic1" in dockerfile
# ...and the wrapper itself, installed via pip in the image.
assert "python-magic" in dockerfile
def test_content_detection_overrides_misleading_extension(tmp_path):
handler = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path))
if handler.file_detector is None:
pytest.skip("libmagic/python-magic not installed in this environment")
# PNG bytes behind a .bin name: extension sniffing can't help, so a correct
# image/png result proves content-based detection is doing the work.
detected = handler.detect_content_type(io.BytesIO(_PNG), "payload.bin")
assert detected == "image/png"