From b8b3c8c41d6cdae4a2e451088c4cc164e9071925 Mon Sep 17 00:00:00 2001 From: Richard Mrasek Date: Fri, 12 Jun 2026 10:06:55 +0200 Subject: [PATCH] added deletion check, and added workflows --- .gitea/workflows/check-deleted-workflows.yml | 97 ++++++++++++++++++++ .gitea/workflows/check_active_workflows.sh | 66 +++++++++++++ .gitea/workflows/list_jobs.sh | 77 ++++++++++++++++ 3 files changed, 240 insertions(+) create mode 100644 .gitea/workflows/check-deleted-workflows.yml create mode 100644 .gitea/workflows/check_active_workflows.sh create mode 100644 .gitea/workflows/list_jobs.sh diff --git a/.gitea/workflows/check-deleted-workflows.yml b/.gitea/workflows/check-deleted-workflows.yml new file mode 100644 index 0000000..82fbc41 --- /dev/null +++ b/.gitea/workflows/check-deleted-workflows.yml @@ -0,0 +1,97 @@ +name: Check Deleted Workflows + +on: + pull_request: + branches: + - main + types: + - opened + - synchronize + - reopened + - ready_for_review + workflow_dispatch: + +jobs: + check-deleted-workflows: + runs-on: orchestration-platform + defaults: + run: + shell: sh + steps: + - name: Checkout repository + run: | + REPO_DIR="repo" + REPO_CLONE_URL="https://gitea.dataprovider01.sandbox-cat-dat.simpl-europe.eu/dataprovider01/template-code-location.git" + CLONE_USER="${{ secrets.REGISTRY_USERNAME }}" + CLONE_PASS="${{ secrets.REGISTRY_PASSWORD }}" + + if [ -z "${CLONE_USER}" ] || [ -z "${CLONE_PASS}" ]; then + echo "Missing REGISTRY_USERNAME or REGISTRY_PASSWORD secret" + exit 1 + fi + + rm -rf "${REPO_DIR}" + AUTH_HEADER="$(printf '%s:%s' "${CLONE_USER}" "${CLONE_PASS}" | base64 | tr -d '\n')" + git clone --depth 0 \ + -c "http.extraHeader=Authorization: Basic ${AUTH_HEADER}" \ + "${REPO_CLONE_URL}" \ + "${REPO_DIR}" + + - name: Install runtime tools + run: | + apk add --no-cache bash git python3 py3-pip jq curl + pip install --no-cache-dir uv + + - name: Install project dependencies + run: | + cd repo + uv sync --frozen --no-dev --no-install-package torch + + - name: Compute deleted workflows/jobs against main + run: | + cd repo + PATH="$PWD/.venv/bin:$PATH" \ + BASE_REF="${GITHUB_BASE_REF:-main}" \ + HEAD_REF="${GITHUB_HEAD_REF:-HEAD}" \ + REPOSITORY_FILE="src/template_code_location/repository.py" \ + DIFF_OUTPUT="deleted_workflows.txt" \ + FAIL_ON_DELETION="false" \ + bash .gitea/workflows/list_jobs.sh + + - name: Compute active workflows list + run: | + cd repo + PATH="$PWD/.venv/bin:$PATH" \ + ONLY_ACTIVE="true" \ + REGISTRY_USERNAME="${{ secrets.REGISTRY_USERNAME }}" \ + REGISTRY_PASSWORD="${{ secrets.REGISTRY_PASSWORD }}" \ + bash .gitea/workflows/check_active_workflows.sh > active_workflows.txt + + - name: Fail only on overlap with active workflows + run: | + cd repo + + if [ ! -s deleted_workflows.txt ]; then + echo "No deleted workflows/jobs found." + exit 0 + fi + + if [ ! -s active_workflows.txt ]; then + echo "Active workflows list is empty; no overlap to block on." + exit 0 + fi + + awk 'length($0) > 0' deleted_workflows.txt | sort -u > deleted_normalized.txt + awk 'length($0) > 0' active_workflows.txt | sort -u > active_normalized.txt + comm -12 deleted_normalized.txt active_normalized.txt > overlapping_workflows.txt + + if [ -s overlapping_workflows.txt ]; then + echo "------------------------------------------------" + echo "DELETED ACTIVE WORKFLOWS DETECTED" + echo "The following deleted workflows/jobs are currently active:" + cat overlapping_workflows.txt + echo "------------------------------------------------" + exit 1 + fi + + echo "No overlap between deleted workflows/jobs and active workflows." diff --git a/.gitea/workflows/check_active_workflows.sh b/.gitea/workflows/check_active_workflows.sh new file mode 100644 index 0000000..79ace5e --- /dev/null +++ b/.gitea/workflows/check_active_workflows.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Default configuration (override via env vars). +BASE_URL="${BASE_URL:-https://participant.be.dataprovider01.sandbox-cat-dat.simpl-europe.eu}" +AUTH_BASE="${AUTH_BASE:-${BASE_URL}/auth}" +REALM="${REALM:-participant}" +USERNAME="${USERNAME:-${REGISTRY_USERNAME:-}}" +PASSWORD="${PASSWORD:-${REGISTRY_PASSWORD:-}}" +CLIENT_ID="${CLIENT_ID:-frontend-cli}" +WORKFLOW_URL="${WORKFLOW_URL:-${BASE_URL}/asset-orchestrator/v1/workflowDefinitions}" +ONLY_ACTIVE="${ONLY_ACTIVE:-true}" + +TOKEN_URL="${AUTH_BASE}/realms/${REALM}/protocol/openid-connect/token" + +error() { + printf "%s\n" "$1" >&2 +} + +command -v jq >/dev/null 2>&1 || { + error "jq is required" + exit 1 +} + +TOKEN_RESPONSE=$(curl -sS -X POST "${TOKEN_URL}" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + --data-urlencode "grant_type=password" \ + --data-urlencode "client_id=${CLIENT_ID}" \ + --data-urlencode "username=${USERNAME}" \ + --data-urlencode "password=${PASSWORD}") + +ACCESS_TOKEN=$(printf '%s' "$TOKEN_RESPONSE" | jq -r '.access_token // empty') + +if [ -z "${USERNAME:-}" ] || [ -z "${PASSWORD:-}" ]; then + error "USERNAME/PASSWORD (or REGISTRY_USERNAME/REGISTRY_PASSWORD) must be set" + exit 1 +fi + +if [ -z "$ACCESS_TOKEN" ]; then + error "Failed to obtain access token" + exit 1 +fi + +TMP_BODY=$(mktemp) +trap 'rm -f "$TMP_BODY"' EXIT + +HTTP_STATUS=$(curl -sS -o "$TMP_BODY" -w "%{http_code}" \ + -X GET --get "${WORKFLOW_URL}" \ + --data-urlencode "onlyActive=${ONLY_ACTIVE}" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Accept: application/json") + +BODY=$(cat "$TMP_BODY") + +if [ "$HTTP_STATUS" -lt 200 ] || [ "$HTTP_STATUS" -ge 300 ]; then + error "Workflow API call failed with HTTP ${HTTP_STATUS}" + exit 1 +fi + +# Remove invalid ASCII control chars except TAB/LF/CR, then extract unique job names. +printf '%s' "$BODY" \ + | tr -d '\000-\010\013\014\016-\037' \ + | jq -r '.. | objects | .jobName? // empty' \ + | sed 's/^[[:space:]]*//; s/[[:space:]]*$//' \ + | awk 'length($0) > 0 && !seen[$0]++' + \ No newline at end of file diff --git a/.gitea/workflows/list_jobs.sh b/.gitea/workflows/list_jobs.sh new file mode 100644 index 0000000..a77b7bc --- /dev/null +++ b/.gitea/workflows/list_jobs.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# --- Configuration (override with env vars in CI) --- +DIFF_OUTPUT="${DIFF_OUTPUT:-deleted_workflows.txt}" +BASE_REF="${BASE_REF:-${GITHUB_BASE_REF:-main}}" +HEAD_REF="${HEAD_REF:-${GITHUB_HEAD_REF:-HEAD}}" +REPOSITORY_FILE="${REPOSITORY_FILE:-src/template_code_location/repository.py}" +FAIL_ON_DELETION="${FAIL_ON_DELETION:-true}" + +TMP_DIR="$(mktemp -d)" +BASE_DIR="${TMP_DIR}/base" +HEAD_DIR="${TMP_DIR}/head" +BASE_JOBS_FILE="${TMP_DIR}/base_jobs.txt" +HEAD_JOBS_FILE="${TMP_DIR}/head_jobs.txt" + +export DAGSTER_HOME="$(mktemp -d)" + +cleanup() { + git worktree remove -f "${BASE_DIR}" >/dev/null 2>&1 || true + git worktree remove -f "${HEAD_DIR}" >/dev/null 2>&1 || true + rm -rf "${TMP_DIR}" "${DAGSTER_HOME}" +} +trap cleanup EXIT + +get_jobs_for_ref() { + local workdir="$1" + + ( + cd "${workdir}" + PYTHONPATH="${workdir}/src${PYTHONPATH:+:${PYTHONPATH}}" \ + dagster job list -f "${REPOSITORY_FILE}" 2>/dev/null | \ + grep '^Job: ' | \ + awk '{print $2}' | \ + sort -u + ) +} + +echo "Fetching refs from origin..." +git fetch origin --quiet + +if ! git rev-parse --verify "${BASE_REF}" >/dev/null 2>&1; then + git fetch origin --quiet "${BASE_REF}:${BASE_REF}" +fi + +if [ "${HEAD_REF}" != "HEAD" ] && ! git rev-parse --verify "${HEAD_REF}" >/dev/null 2>&1; then + git fetch origin --quiet "${HEAD_REF}:${HEAD_REF}" +fi + +echo "Preparing worktrees for ${BASE_REF} and ${HEAD_REF}..." +git worktree add --quiet "${BASE_DIR}" "${BASE_REF}" +git worktree add --quiet "${HEAD_DIR}" "${HEAD_REF}" + +echo "Collecting workflows/jobs from ${BASE_REF}..." +get_jobs_for_ref "${BASE_DIR}" > "${BASE_JOBS_FILE}" || true + +echo "Collecting workflows/jobs from ${HEAD_REF}..." +get_jobs_for_ref "${HEAD_DIR}" > "${HEAD_JOBS_FILE}" || true + +# comm -23: items present in base but missing from head +comm -23 "${BASE_JOBS_FILE}" "${HEAD_JOBS_FILE}" > "${DIFF_OUTPUT}" + +if [ -s "${DIFF_OUTPUT}" ]; then + echo "------------------------------------------------" + echo "DELETED WORKFLOWS DETECTED" + echo "The following workflows/jobs exist in ${BASE_REF} but are missing in ${HEAD_REF}:" + cat "${DIFF_OUTPUT}" + echo "------------------------------------------------" + + if [ "${FAIL_ON_DELETION}" = "true" ]; then + exit 1 + fi +else + echo "No workflows/jobs were deleted in ${HEAD_REF} compared to ${BASE_REF}." + rm -f "${DIFF_OUTPUT}" +fi \ No newline at end of file