From bdfbe3d3102227e0859f655372dfadc2be976d31 Mon Sep 17 00:00:00 2001 From: ILay Date: Mon, 27 Apr 2026 18:18:38 +0200 Subject: [PATCH] change pip to uv and update dependencies --- Dockerfile | 64 +++++++++++++++++++++++++++++++++++++++---- pipeline.variables.sh | 2 +- pyproject.toml | 26 +++++++++++++----- 3 files changed, 78 insertions(+), 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index fd4e780..0c997fb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,67 @@ FROM python:3.12-slim-bookworm -# Install git for git-based dependencies -RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/* +# --- Install uv (pinned for reproducibility) --- +COPY --from=ghcr.io/astral-sh/uv:0.10.8 /uv /uvx /bin/ WORKDIR /app -COPY pyproject.toml . -COPY src/ src/ +# Create non-root user with explicit UID/GID 1000 +RUN addgroup --gid 1000 appgroup && \ + adduser --uid 1000 --gid 1000 --disabled-password --gecos "" appuser -# Install the package and all dependencies -RUN pip install --no-cache-dir . +# Install system dependencies: +# - git: required to fetch util-services from GitLab (tool.uv.sources) +# - build-essential / gcc / g++ / python3-dev / cmake: native extensions +# (scrubadub-spacy → spaCy, pycanon, etc.) +# - curl: optional healthcheck / runtime tooling +RUN apt-get update && apt-get upgrade -y \ + && apt-get install -y --no-install-recommends \ + build-essential=12.9 \ + cmake=3.25.1-1 \ + gcc=4:12.2.0-3 \ + g++=4:12.2.0-3 \ + python3-dev=3.11.2-1+b1 \ + git=1:2.39.5-0+deb12u3 \ + curl=7.88.1-10+deb12u14 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /tmp/* \ + && rm -rf /var/tmp/* + +# Pre-own /app so appuser can write to it +RUN chown -R appuser:appgroup /app + +# Copy project metadata and source +COPY pyproject.toml . +COPY src/ ./src/ + +# uv environment knobs: +# UV_COMPILE_BYTECODE → compile .pyc files at install time for faster cold start +# UV_LINK_MODE=copy → copy files instead of symlinks (required in Docker layers) +# UV_SYSTEM_PYTHON=1 → install into the system Python (no extra venv needed) +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_SYSTEM_PYTHON=1 + +# Install the project and all dependencies, respecting [tool.uv.sources] +# (git source for util-services and pytorch-cpu index for torch) +# BuildKit cache mount keeps the uv package cache across builds +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install . + +ENV PYTHONPATH="/app/src" + +# Make /app writable for the non-root user (e.g. spaCy model downloads) +RUN chown -R 1000:1000 /app && chmod -R u+w /app + +# Provide a real home directory for appuser +RUN mkdir -p /home/appuser && chown -R 1000:1000 /home/appuser +ENV HOME=/home/appuser + +USER appuser + +# Sanity-check: fail the build early if the dagster CLI is missing +RUN dagster --version EXPOSE 4000 diff --git a/pipeline.variables.sh b/pipeline.variables.sh index 3292612..4a3f9c4 100644 --- a/pipeline.variables.sh +++ b/pipeline.variables.sh @@ -1 +1 @@ -PROJECT_VERSION_NUMBER="0.0.1" \ No newline at end of file +PROJECT_VERSION_NUMBER="0.1.0" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4c6f2dc..7897316 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "lxml>=6.0", "xmltodict>=1.0", "rdflib>=7.6", - "openpyxl", + "openpyxl>=3.1.0", "xlrd>=2.0.1", "tabulate==0.8.10", "pyspellchecker>=0.8.4", @@ -35,14 +35,26 @@ dependencies = [ "pycanon==1.0.1.post2", "anjana>=1.0.0", # Field-level pseudo-anonymisation - "scrubadub", - "scrubadub_spacy", - "hvac", - "cryptography", - # Util services (git dependency) - "util-services @ git+https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git@v0.4.1", + "scrubadub>=2.0.0", + "scrubadub_spacy>=1.0.0", + "hvac>=2.0.0", + "cryptography>=42.0.0", + # Util services — resolved via [tool.uv.sources] (git) + "util-services", ] +[tool.uv] +exclude-dependencies = ["transformers", "spacy-transformers"] + +[tool.uv.sources] +torch = { index = "pytorch-cpu" } +util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "feature/SIMPL-24631" } + +[[tool.uv.index]] +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" +explicit = true + [project.optional-dependencies] dev = [ "pytest>=8.0.0",