added deletion check, and added workflows #2
113
.gitea/workflows/check-deleted-workflows.yml
Normal file
113
.gitea/workflows/check-deleted-workflows.yml
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
name: Check Deleted Workflows
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
types:
|
||||||
|
- opened
|
||||||
|
- synchronize
|
||||||
|
- reopened
|
||||||
|
- ready_for_review
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
check-deleted-workflows:
|
||||||
|
runs-on: orchestration-platform
|
||||||
|
container:
|
||||||
|
image: python:3.12-slim
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
steps:
|
||||||
|
- name: Install git
|
||||||
|
run: |
|
||||||
|
apt-get update -qq
|
||||||
|
apt-get install -y --no-install-recommends git
|
||||||
|
|
||||||
|
- name: Checkout repository
|
||||||
|
run: |
|
||||||
|
REPO_DIR="repo"
|
||||||
|
REPO_CLONE_URL="https://gitea.dataprovider01.sandbox-cat-dat.simpl-europe.eu/dataprovider01/template-code-location.git"
|
||||||
|
CLONE_USER="${{ secrets.REGISTRY_USERNAME }}"
|
||||||
|
CLONE_PASS="${{ secrets.REGISTRY_PASSWORD }}"
|
||||||
|
|
||||||
|
if [ -z "${CLONE_USER}" ] || [ -z "${CLONE_PASS}" ]; then
|
||||||
|
echo "Missing REGISTRY_USERNAME or REGISTRY_PASSWORD secret"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -rf "${REPO_DIR}"
|
||||||
|
AUTH_HEADER="$(printf '%s:%s' "${CLONE_USER}" "${CLONE_PASS}" | base64 | tr -d '\n')"
|
||||||
|
HEAD_BRANCH="${GITHUB_HEAD_REF:-${GITHUB_REF_NAME:-develop}}"
|
||||||
|
git clone \
|
||||||
|
--branch "${HEAD_BRANCH}" \
|
||||||
|
-c "http.extraHeader=Authorization: Basic ${AUTH_HEADER}" \
|
||||||
|
"${REPO_CLONE_URL}" \
|
||||||
|
"${REPO_DIR}"
|
||||||
|
|
||||||
|
- name: Install runtime tools
|
||||||
|
run: |
|
||||||
|
apt-get update -qq
|
||||||
|
apt-get install -y --no-install-recommends git jq curl gcc librdkafka-dev libpython3-dev
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
ln -sf "${HOME}/.local/bin/uv" /usr/local/bin/uv
|
||||||
|
|
||||||
|
- name: Install project dependencies
|
||||||
|
run: |
|
||||||
|
cd repo
|
||||||
|
uv sync --frozen --no-dev --no-install-package torch
|
||||||
|
|
||||||
|
- name: Compute deleted workflows/jobs against main
|
||||||
|
run: |
|
||||||
|
cd repo
|
||||||
|
PATH="$PWD/.venv/bin:$PATH" \
|
||||||
|
BASE_REF="${GITHUB_BASE_REF:-main}" \
|
||||||
|
HEAD_REF="${GITHUB_HEAD_REF:-HEAD}" \
|
||||||
|
REPOSITORY_FILE="src/template_code_location/repository.py" \
|
||||||
|
DIFF_OUTPUT="deleted_workflows.txt" \
|
||||||
|
FAIL_ON_DELETION="false" \
|
||||||
|
bash .gitea/workflows/list_jobs.sh
|
||||||
|
|
||||||
|
- name: Compute active workflows list
|
||||||
|
run: |
|
||||||
|
cd repo
|
||||||
|
PATH="$PWD/.venv/bin:$PATH" \
|
||||||
|
ONLY_ACTIVE="true" \
|
||||||
|
REGISTRY_USERNAME="${{ secrets.REGISTRY_USERNAME }}" \
|
||||||
|
REGISTRY_PASSWORD="${{ secrets.REGISTRY_PASSWORD }}" \
|
||||||
|
bash .gitea/workflows/check_active_workflows.sh > active_workflows.txt
|
||||||
|
echo "--- Active workflows ---"
|
||||||
|
cat active_workflows.txt || echo "(none)"
|
||||||
|
|
||||||
|
- name: Fail only on overlap with active workflows
|
||||||
|
run: |
|
||||||
|
cd repo
|
||||||
|
|
||||||
|
echo "--- Deleted workflows ---"
|
||||||
|
cat deleted_workflows.txt 2>/dev/null || echo "(none)"
|
||||||
|
|
||||||
|
if [ ! -s deleted_workflows.txt ]; then
|
||||||
|
echo "No deleted workflows/jobs found."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -s active_workflows.txt ]; then
|
||||||
|
echo "Active workflows list is empty; no overlap to block on."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
awk 'length($0) > 0' deleted_workflows.txt | sort -u > deleted_normalized.txt
|
||||||
|
awk 'length($0) > 0' active_workflows.txt | sort -u > active_normalized.txt
|
||||||
|
comm -12 deleted_normalized.txt active_normalized.txt > overlapping_workflows.txt
|
||||||
|
|
||||||
|
if [ -s overlapping_workflows.txt ]; then
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
echo "DELETED ACTIVE WORKFLOWS DETECTED"
|
||||||
|
echo "The following deleted workflows/jobs are currently active:"
|
||||||
|
cat overlapping_workflows.txt
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "No overlap between deleted workflows/jobs and active workflows."
|
||||||
66
.gitea/workflows/check_active_workflows.sh
Normal file
66
.gitea/workflows/check_active_workflows.sh
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Default configuration (override via env vars).
|
||||||
|
BASE_URL="${BASE_URL:-https://participant.be.dataprovider01.sandbox-cat-dat.simpl-europe.eu}"
|
||||||
|
AUTH_BASE="${AUTH_BASE:-${BASE_URL}/auth}"
|
||||||
|
REALM="${REALM:-participant}"
|
||||||
|
USERNAME="${USERNAME:-${REGISTRY_USERNAME:-}}"
|
||||||
|
PASSWORD="${PASSWORD:-${REGISTRY_PASSWORD:-}}"
|
||||||
|
CLIENT_ID="${CLIENT_ID:-frontend-cli}"
|
||||||
|
WORKFLOW_URL="${WORKFLOW_URL:-${BASE_URL}/asset-orchestrator/v1/workflowDefinitions}"
|
||||||
|
ONLY_ACTIVE="${ONLY_ACTIVE:-true}"
|
||||||
|
|
||||||
|
TOKEN_URL="${AUTH_BASE}/realms/${REALM}/protocol/openid-connect/token"
|
||||||
|
|
||||||
|
error() {
|
||||||
|
printf "%s\n" "$1" >&2
|
||||||
|
}
|
||||||
|
|
||||||
|
command -v jq >/dev/null 2>&1 || {
|
||||||
|
error "jq is required"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
TOKEN_RESPONSE=$(curl -sS -X POST "${TOKEN_URL}" \
|
||||||
|
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||||
|
--data-urlencode "grant_type=password" \
|
||||||
|
--data-urlencode "client_id=${CLIENT_ID}" \
|
||||||
|
--data-urlencode "username=${USERNAME}" \
|
||||||
|
--data-urlencode "password=${PASSWORD}")
|
||||||
|
|
||||||
|
ACCESS_TOKEN=$(printf '%s' "$TOKEN_RESPONSE" | jq -r '.access_token // empty')
|
||||||
|
|
||||||
|
if [ -z "${USERNAME:-}" ] || [ -z "${PASSWORD:-}" ]; then
|
||||||
|
error "USERNAME/PASSWORD (or REGISTRY_USERNAME/REGISTRY_PASSWORD) must be set"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$ACCESS_TOKEN" ]; then
|
||||||
|
error "Failed to obtain access token"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
TMP_BODY=$(mktemp)
|
||||||
|
trap 'rm -f "$TMP_BODY"' EXIT
|
||||||
|
|
||||||
|
HTTP_STATUS=$(curl -sS -o "$TMP_BODY" -w "%{http_code}" \
|
||||||
|
-X GET --get "${WORKFLOW_URL}" \
|
||||||
|
--data-urlencode "onlyActive=${ONLY_ACTIVE}" \
|
||||||
|
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||||
|
-H "Accept: application/json")
|
||||||
|
|
||||||
|
BODY=$(cat "$TMP_BODY")
|
||||||
|
|
||||||
|
if [ "$HTTP_STATUS" -lt 200 ] || [ "$HTTP_STATUS" -ge 300 ]; then
|
||||||
|
error "Workflow API call failed with HTTP ${HTTP_STATUS}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Remove invalid ASCII control chars except TAB/LF/CR, then extract unique job names.
|
||||||
|
printf '%s' "$BODY" \
|
||||||
|
| tr -d '\000-\010\013\014\016-\037' \
|
||||||
|
| jq -r '.. | objects | .jobName? // empty' \
|
||||||
|
| sed 's/^[[:space:]]*//; s/[[:space:]]*$//' \
|
||||||
|
| awk 'length($0) > 0 && !seen[$0]++'
|
||||||
|
|
||||||
79
.gitea/workflows/list_jobs.sh
Normal file
79
.gitea/workflows/list_jobs.sh
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# --- Configuration (override with env vars in CI) ---
|
||||||
|
DIFF_OUTPUT="${DIFF_OUTPUT:-deleted_workflows.txt}"
|
||||||
|
BASE_REF="${BASE_REF:-${GITHUB_BASE_REF:-main}}"
|
||||||
|
HEAD_REF="${HEAD_REF:-${GITHUB_HEAD_REF:-HEAD}}"
|
||||||
|
REPOSITORY_FILE="${REPOSITORY_FILE:-src/template_code_location/repository.py}"
|
||||||
|
FAIL_ON_DELETION="${FAIL_ON_DELETION:-true}"
|
||||||
|
|
||||||
|
TMP_DIR="$(mktemp -d)"
|
||||||
|
BASE_DIR="${TMP_DIR}/base"
|
||||||
|
HEAD_DIR="${TMP_DIR}/head"
|
||||||
|
BASE_JOBS_FILE="${TMP_DIR}/base_jobs.txt"
|
||||||
|
HEAD_JOBS_FILE="${TMP_DIR}/head_jobs.txt"
|
||||||
|
|
||||||
|
export DAGSTER_HOME="$(mktemp -d)"
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
git worktree remove -f "${BASE_DIR}" >/dev/null 2>&1 || true
|
||||||
|
git worktree remove -f "${HEAD_DIR}" >/dev/null 2>&1 || true
|
||||||
|
rm -rf "${TMP_DIR}" "${DAGSTER_HOME}"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
get_jobs_for_ref() {
|
||||||
|
local workdir="$1"
|
||||||
|
|
||||||
|
(
|
||||||
|
cd "${workdir}"
|
||||||
|
PYTHONPATH="${workdir}/src${PYTHONPATH:+:${PYTHONPATH}}" \
|
||||||
|
dagster job list -f "${REPOSITORY_FILE}" 2>/dev/null | \
|
||||||
|
grep '^Job: ' | \
|
||||||
|
awk '{print $2}' | \
|
||||||
|
sort -u
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "Fetching refs from origin..."
|
||||||
|
git fetch origin --quiet
|
||||||
|
|
||||||
|
if ! git rev-parse --verify "${BASE_REF}" >/dev/null 2>&1; then
|
||||||
|
git fetch origin --quiet "${BASE_REF}:${BASE_REF}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${HEAD_REF}" != "HEAD" ] && ! git rev-parse --verify "${HEAD_REF}" >/dev/null 2>&1; then
|
||||||
|
git fetch origin --quiet "${HEAD_REF}:${HEAD_REF}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Preparing worktrees for ${BASE_REF} and ${HEAD_REF}..."
|
||||||
|
git worktree add --quiet "${BASE_DIR}" "${BASE_REF}"
|
||||||
|
# Use detached commit SHA for HEAD to avoid "already used by worktree" error
|
||||||
|
HEAD_SHA="$(git rev-parse "${HEAD_REF}")"
|
||||||
|
git worktree add --quiet --detach "${HEAD_DIR}" "${HEAD_SHA}"
|
||||||
|
|
||||||
|
echo "Collecting workflows/jobs from ${BASE_REF}..."
|
||||||
|
get_jobs_for_ref "${BASE_DIR}" > "${BASE_JOBS_FILE}" || true
|
||||||
|
|
||||||
|
echo "Collecting workflows/jobs from ${HEAD_REF}..."
|
||||||
|
get_jobs_for_ref "${HEAD_DIR}" > "${HEAD_JOBS_FILE}" || true
|
||||||
|
|
||||||
|
# comm -23: items present in base but missing from head
|
||||||
|
comm -23 "${BASE_JOBS_FILE}" "${HEAD_JOBS_FILE}" > "${DIFF_OUTPUT}"
|
||||||
|
|
||||||
|
if [ -s "${DIFF_OUTPUT}" ]; then
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
echo "DELETED WORKFLOWS DETECTED"
|
||||||
|
echo "The following workflows/jobs exist in ${BASE_REF} but are missing in ${HEAD_REF}:"
|
||||||
|
cat "${DIFF_OUTPUT}"
|
||||||
|
echo "------------------------------------------------"
|
||||||
|
|
||||||
|
if [ "${FAIL_ON_DELETION}" = "true" ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "No workflows/jobs were deleted in ${HEAD_REF} compared to ${BASE_REF}."
|
||||||
|
rm -f "${DIFF_OUTPUT}"
|
||||||
|
fi
|
||||||
@@ -11,16 +11,21 @@ dependencies = [
|
|||||||
"dagster>=1.8.13",
|
"dagster>=1.8.13",
|
||||||
"dagster-postgres>=0.24.13",
|
"dagster-postgres>=0.24.13",
|
||||||
"util-services",
|
"util-services",
|
||||||
|
"data-processing",
|
||||||
|
"dataframe-level-anonymisation",
|
||||||
|
"field-level-pseudo-anonymisation",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.uv]
|
[tool.uv]
|
||||||
exclude-dependencies = ["transformers", "spacy-transformers", "torch"]
|
|
||||||
override-dependencies = [
|
override-dependencies = [
|
||||||
"util-services @ git+https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git@v0.7.0",
|
"util-services @ git+https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git@v0.7.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.uv.sources]
|
[tool.uv.sources]
|
||||||
util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "v0.7.0" }
|
util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "v0.7.0" }
|
||||||
|
data-processing = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/data-processing.git", rev = "v0.4.0" }
|
||||||
|
dataframe-level-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/dataframe-level-anonymisation.git", rev = "v0.6.0" }
|
||||||
|
field-level-pseudo-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/field-level-pseudo-anonymisation.git", rev = "v0.7.0" }
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
dev = [
|
dev = [
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
from dagster import Definitions
|
from dagster import Definitions
|
||||||
|
from dataframe_level_anonymisation.jobs import (k_anonymity_job_s3,
|
||||||
|
l_diversity_job_s3,
|
||||||
|
t_closeness_job_s3)
|
||||||
from util_services.custom_json_logger import simpl_json_logger
|
from util_services.custom_json_logger import simpl_json_logger
|
||||||
from util_services.resources import s3_resource
|
from util_services.resources import s3_resource
|
||||||
|
|
||||||
@@ -6,7 +9,10 @@ from template_code_location.jobs.jobs import data_processing_job
|
|||||||
|
|
||||||
defs = Definitions(
|
defs = Definitions(
|
||||||
jobs=[
|
jobs=[
|
||||||
|
data_processing_job,
|
||||||
|
k_anonymity_job_s3,
|
||||||
|
l_diversity_job_s3,
|
||||||
|
t_closeness_job_s3,
|
||||||
],
|
],
|
||||||
sensors=[],
|
sensors=[],
|
||||||
resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},
|
resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},
|
||||||
|
|||||||
Reference in New Issue
Block a user