change pip to uv and update dependencies

This commit is contained in:
ILay
2026-04-27 18:18:38 +02:00
parent 0847026b32
commit bdfbe3d310
3 changed files with 78 additions and 14 deletions

View File

@@ -1,15 +1,67 @@
FROM python:3.12-slim-bookworm FROM python:3.12-slim-bookworm
# Install git for git-based dependencies # --- Install uv (pinned for reproducibility) ---
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/* COPY --from=ghcr.io/astral-sh/uv:0.10.8 /uv /uvx /bin/
WORKDIR /app WORKDIR /app
COPY pyproject.toml . # Create non-root user with explicit UID/GID 1000
COPY src/ src/ RUN addgroup --gid 1000 appgroup && \
adduser --uid 1000 --gid 1000 --disabled-password --gecos "" appuser
# Install the package and all dependencies # Install system dependencies:
RUN pip install --no-cache-dir . # - git: required to fetch util-services from GitLab (tool.uv.sources)
# - build-essential / gcc / g++ / python3-dev / cmake: native extensions
# (scrubadub-spacy → spaCy, pycanon, etc.)
# - curl: optional healthcheck / runtime tooling
RUN apt-get update && apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
build-essential=12.9 \
cmake=3.25.1-1 \
gcc=4:12.2.0-3 \
g++=4:12.2.0-3 \
python3-dev=3.11.2-1+b1 \
git=1:2.39.5-0+deb12u3 \
curl=7.88.1-10+deb12u14 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/* \
&& rm -rf /var/tmp/*
# Pre-own /app so appuser can write to it
RUN chown -R appuser:appgroup /app
# Copy project metadata and source
COPY pyproject.toml .
COPY src/ ./src/
# uv environment knobs:
# UV_COMPILE_BYTECODE → compile .pyc files at install time for faster cold start
# UV_LINK_MODE=copy → copy files instead of symlinks (required in Docker layers)
# UV_SYSTEM_PYTHON=1 → install into the system Python (no extra venv needed)
ENV UV_COMPILE_BYTECODE=1
ENV UV_LINK_MODE=copy
ENV UV_SYSTEM_PYTHON=1
# Install the project and all dependencies, respecting [tool.uv.sources]
# (git source for util-services and pytorch-cpu index for torch)
# BuildKit cache mount keeps the uv package cache across builds
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install .
ENV PYTHONPATH="/app/src"
# Make /app writable for the non-root user (e.g. spaCy model downloads)
RUN chown -R 1000:1000 /app && chmod -R u+w /app
# Provide a real home directory for appuser
RUN mkdir -p /home/appuser && chown -R 1000:1000 /home/appuser
ENV HOME=/home/appuser
USER appuser
# Sanity-check: fail the build early if the dagster CLI is missing
RUN dagster --version
EXPOSE 4000 EXPOSE 4000

View File

@@ -1 +1 @@
PROJECT_VERSION_NUMBER="0.0.1" PROJECT_VERSION_NUMBER="0.1.0"

View File

@@ -19,7 +19,7 @@ dependencies = [
"lxml>=6.0", "lxml>=6.0",
"xmltodict>=1.0", "xmltodict>=1.0",
"rdflib>=7.6", "rdflib>=7.6",
"openpyxl", "openpyxl>=3.1.0",
"xlrd>=2.0.1", "xlrd>=2.0.1",
"tabulate==0.8.10", "tabulate==0.8.10",
"pyspellchecker>=0.8.4", "pyspellchecker>=0.8.4",
@@ -35,14 +35,26 @@ dependencies = [
"pycanon==1.0.1.post2", "pycanon==1.0.1.post2",
"anjana>=1.0.0", "anjana>=1.0.0",
# Field-level pseudo-anonymisation # Field-level pseudo-anonymisation
"scrubadub", "scrubadub>=2.0.0",
"scrubadub_spacy", "scrubadub_spacy>=1.0.0",
"hvac", "hvac>=2.0.0",
"cryptography", "cryptography>=42.0.0",
# Util services (git dependency) # Util services — resolved via [tool.uv.sources] (git)
"util-services @ git+https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git@v0.4.1", "util-services",
] ]
[tool.uv]
exclude-dependencies = ["transformers", "spacy-transformers"]
[tool.uv.sources]
torch = { index = "pytorch-cpu" }
util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "feature/SIMPL-24631" }
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[project.optional-dependencies] [project.optional-dependencies]
dev = [ dev = [
"pytest>=8.0.0", "pytest>=8.0.0",