minimal example code location
Some checks failed
Build and Push Docker Image / build-and-push (push) Has been cancelled
Some checks failed
Build and Push Docker Image / build-and-push (push) Has been cancelled
This commit is contained in:
45
.gitea/workflows/docker-publish.yml
Normal file
45
.gitea/workflows/docker-publish.yml
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
name: Build and Push Docker Image
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
REGISTRY: gitea.dataprovider01.sandbox-cat-dat.simpl-europe.eu
|
||||||
|
IMAGE_REPO: gitea.dataprovider01.sandbox-cat-dat.simpl-europe.eu/j.r/template-code-location
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Validate registry secrets
|
||||||
|
run: |
|
||||||
|
if [ -z "${{ secrets.REGISTRY_USERNAME }}" ] || [ -z "${{ secrets.REGISTRY_PASSWORD }}" ]; then
|
||||||
|
echo "Missing REGISTRY_USERNAME or REGISTRY_PASSWORD secret"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Login to registry
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login "${REGISTRY}" \
|
||||||
|
-u "${{ secrets.REGISTRY_USERNAME }}" --password-stdin
|
||||||
|
|
||||||
|
- name: Build image
|
||||||
|
run: |
|
||||||
|
COMMIT_SHA="${GITHUB_SHA:-$GITEA_SHA}"
|
||||||
|
SHORT_SHA="$(echo "${COMMIT_SHA}" | cut -c1-12)"
|
||||||
|
docker build \
|
||||||
|
-t "${IMAGE_REPO}:latest" \
|
||||||
|
-t "${IMAGE_REPO}:${SHORT_SHA}" \
|
||||||
|
.
|
||||||
|
|
||||||
|
- name: Push image tags
|
||||||
|
run: |
|
||||||
|
COMMIT_SHA="${GITHUB_SHA:-$GITEA_SHA}"
|
||||||
|
SHORT_SHA="$(echo "${COMMIT_SHA}" | cut -c1-12)"
|
||||||
|
docker push "${IMAGE_REPO}:latest"
|
||||||
|
docker push "${IMAGE_REPO}:${SHORT_SHA}"
|
||||||
37
Dockerfile
37
Dockerfile
@@ -11,16 +11,9 @@ RUN addgroup --gid 1000 appgroup && \
|
|||||||
|
|
||||||
# Install system dependencies:
|
# Install system dependencies:
|
||||||
# - git: required to fetch util-services from GitLab (tool.uv.sources)
|
# - git: required to fetch util-services from GitLab (tool.uv.sources)
|
||||||
# - build-essential / gcc / g++ / python3-dev / cmake: native extensions
|
|
||||||
# (scrubadub-spacy → spaCy, pycanon, etc.)
|
|
||||||
# - curl: optional healthcheck / runtime tooling
|
# - curl: optional healthcheck / runtime tooling
|
||||||
RUN apt-get update && apt-get upgrade -y \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends \
|
&& apt-get install -y --no-install-recommends \
|
||||||
build-essential=12.9 \
|
|
||||||
cmake=3.25.1-1 \
|
|
||||||
gcc=4:12.2.0-3 \
|
|
||||||
g++=4:12.2.0-3 \
|
|
||||||
python3-dev=3.11.2-1+b1 \
|
|
||||||
git=1:2.39.5-0+deb12u3 \
|
git=1:2.39.5-0+deb12u3 \
|
||||||
curl=7.88.1-10+deb12u14 \
|
curl=7.88.1-10+deb12u14 \
|
||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
@@ -28,41 +21,35 @@ RUN apt-get update && apt-get upgrade -y \
|
|||||||
&& rm -rf /tmp/* \
|
&& rm -rf /tmp/* \
|
||||||
&& rm -rf /var/tmp/*
|
&& rm -rf /var/tmp/*
|
||||||
|
|
||||||
# Pre-own /app so appuser can write to it
|
# Ensure appuser can create the project virtual environment in /app
|
||||||
RUN chown -R appuser:appgroup /app
|
RUN chown appuser:appgroup /app
|
||||||
|
|
||||||
# Copy project metadata and source
|
# Copy project metadata and source
|
||||||
COPY pyproject.toml .
|
COPY --chown=appuser:appgroup pyproject.toml .
|
||||||
COPY uv.lock .
|
COPY --chown=appuser:appgroup uv.lock .
|
||||||
COPY src/ ./src/
|
COPY --chown=appuser:appgroup src/ ./src/
|
||||||
|
|
||||||
# uv environment knobs:
|
# uv environment knobs:
|
||||||
# UV_COMPILE_BYTECODE → compile .pyc files at install time for faster cold start
|
# UV_COMPILE_BYTECODE → disable .pyc precompile to reduce image size
|
||||||
# UV_LINK_MODE=copy → copy files instead of symlinks (required in Docker layers)
|
# UV_LINK_MODE=copy → copy files instead of symlinks (required in Docker layers)
|
||||||
# UV_SYSTEM_PYTHON=1 → install into the system Python (no extra venv needed)
|
# UV_SYSTEM_PYTHON=1 → install into the system Python (no extra venv needed)
|
||||||
ENV UV_COMPILE_BYTECODE=1
|
ENV UV_COMPILE_BYTECODE=0
|
||||||
ENV UV_LINK_MODE=copy
|
ENV UV_LINK_MODE=copy
|
||||||
ENV UV_SYSTEM_PYTHON=1
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
|
|
||||||
# Install the project and all dependencies, respecting [tool.uv.sources]
|
# Install the project and all dependencies, respecting [tool.uv.sources]
|
||||||
# (git source for util-services and pytorch-cpu index for torch)
|
# (git source for util-services and pytorch-cpu index for torch)
|
||||||
# BuildKit cache mount keeps the uv package cache across builds
|
# BuildKit cache mount keeps the uv package cache across builds
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
USER appuser
|
||||||
uv sync --frozen --no-dev
|
|
||||||
|
RUN --mount=type=cache,target=/home/appuser/.cache/uv,uid=1000,gid=1000 \
|
||||||
|
uv sync --frozen --no-dev --no-install-package torch
|
||||||
|
|
||||||
# Put the project's venv on PATH (matches WORKDIR)
|
# Put the project's venv on PATH (matches WORKDIR)
|
||||||
ENV PATH="/app/.venv/bin:${PATH}"
|
ENV PATH="/app/.venv/bin:${PATH}"
|
||||||
ENV PYTHONPATH="/app/src"
|
ENV PYTHONPATH="/app/src"
|
||||||
|
|
||||||
# Make /app writable for the non-root user (e.g. spaCy model downloads)
|
|
||||||
RUN chown -R 1000:1000 /app && chmod -R u+w /app
|
|
||||||
|
|
||||||
# Provide a real home directory for appuser
|
|
||||||
RUN mkdir -p /home/appuser && chown -R 1000:1000 /home/appuser
|
|
||||||
ENV HOME=/home/appuser
|
ENV HOME=/home/appuser
|
||||||
|
|
||||||
USER appuser
|
|
||||||
|
|
||||||
# Sanity-check: fail the build early if the dagster CLI is missing
|
# Sanity-check: fail the build early if the dagster CLI is missing
|
||||||
RUN dagster --version
|
RUN dagster --version
|
||||||
|
|
||||||
|
|||||||
@@ -10,28 +10,16 @@ requires-python = ">=3.12"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"dagster>=1.8.13",
|
"dagster>=1.8.13",
|
||||||
"util-services",
|
"util-services",
|
||||||
"data-processing",
|
|
||||||
"dataframe-level-anonymisation",
|
|
||||||
"field-level-pseudo-anonymisation",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.uv]
|
[tool.uv]
|
||||||
exclude-dependencies = ["transformers", "spacy-transformers"]
|
exclude-dependencies = ["transformers", "spacy-transformers", "torch"]
|
||||||
override-dependencies = [
|
override-dependencies = [
|
||||||
"util-services @ git+https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git@v0.7.0",
|
"util-services @ git+https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git@v0.7.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.uv.sources]
|
[tool.uv.sources]
|
||||||
torch = { index = "pytorch-cpu" }
|
|
||||||
util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "v0.7.0" }
|
util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "v0.7.0" }
|
||||||
data-processing = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/data-processing.git", rev = "v0.4.0" }
|
|
||||||
dataframe-level-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/dataframe-level-anonymisation.git", rev = "v0.6.0" }
|
|
||||||
field-level-pseudo-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/field-level-pseudo-anonymisation.git", rev = "v0.7.0" }
|
|
||||||
|
|
||||||
[[tool.uv.index]]
|
|
||||||
name = "pytorch-cpu"
|
|
||||||
url = "https://download.pytorch.org/whl/cpu"
|
|
||||||
explicit = true
|
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
dev = [
|
dev = [
|
||||||
|
|||||||
@@ -1,70 +1,14 @@
|
|||||||
from dagster import Definitions
|
from dagster import Definitions
|
||||||
from util_services.resources import s3_resource
|
|
||||||
from util_services.sensors import (
|
|
||||||
notify_success,
|
|
||||||
notify_failure,
|
|
||||||
notify_canceled
|
|
||||||
)
|
|
||||||
from util_services.custom_json_logger import simpl_json_logger
|
from util_services.custom_json_logger import simpl_json_logger
|
||||||
|
from util_services.resources import s3_resource
|
||||||
# Data processing jobs
|
|
||||||
from data_processing.jobs import (
|
|
||||||
remove_duplicates_job_s3,
|
|
||||||
fill_missing_values_job_s3,
|
|
||||||
standardize_categorical_values_job_s3,
|
|
||||||
correct_typos_job_s3,
|
|
||||||
normalize_numeric_min_max_job_s3,
|
|
||||||
normalize_datetime_job_s3,
|
|
||||||
normalize_coordinates_job_s3,
|
|
||||||
add_global_aggregations_job_s3,
|
|
||||||
filter_dataset_job_s3,
|
|
||||||
quality_job_s3
|
|
||||||
)
|
|
||||||
|
|
||||||
# Dataframe-level anonymisation jobs
|
|
||||||
from dataframe_level_anonymisation.jobs import (
|
|
||||||
k_anonymity_job_s3,
|
|
||||||
l_diversity_job_s3,
|
|
||||||
t_closeness_job_s3,
|
|
||||||
read_write_semistructured_job_s3,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Field-level pseudo-anonymisation jobs
|
|
||||||
from field_level_pseudo_anonymisation.jobs import (
|
|
||||||
anonymise_pseudonymise_structured_job_s3,
|
|
||||||
depseudonymise_structured_job_s3,
|
|
||||||
anonymise_pseudonymise_unstructured_job_s3,
|
|
||||||
depseudonymise_unstructured_job_s3,
|
|
||||||
)
|
|
||||||
|
|
||||||
from template_code_location.jobs.jobs import data_processing_job
|
from template_code_location.jobs.jobs import data_processing_job
|
||||||
|
|
||||||
defs = Definitions(
|
defs = Definitions(
|
||||||
jobs=[
|
jobs=[
|
||||||
data_processing_job,
|
data_processing_job
|
||||||
# Data processing
|
|
||||||
remove_duplicates_job_s3,
|
|
||||||
fill_missing_values_job_s3,
|
|
||||||
standardize_categorical_values_job_s3,
|
|
||||||
correct_typos_job_s3,
|
|
||||||
normalize_numeric_min_max_job_s3,
|
|
||||||
normalize_datetime_job_s3,
|
|
||||||
normalize_coordinates_job_s3,
|
|
||||||
add_global_aggregations_job_s3,
|
|
||||||
filter_dataset_job_s3,
|
|
||||||
quality_job_s3,
|
|
||||||
# Dataframe-level anonymisation
|
|
||||||
k_anonymity_job_s3,
|
|
||||||
l_diversity_job_s3,
|
|
||||||
t_closeness_job_s3,
|
|
||||||
read_write_semistructured_job_s3,
|
|
||||||
# Field-level pseudo-anonymisation
|
|
||||||
anonymise_pseudonymise_structured_job_s3,
|
|
||||||
depseudonymise_structured_job_s3,
|
|
||||||
anonymise_pseudonymise_unstructured_job_s3,
|
|
||||||
depseudonymise_unstructured_job_s3,
|
|
||||||
],
|
],
|
||||||
sensors=[notify_success, notify_failure, notify_canceled],
|
sensors=[],
|
||||||
resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},
|
resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},
|
||||||
loggers={"simpl": simpl_json_logger},
|
loggers={"simpl": simpl_json_logger},
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user