From 4e0b216410fc0d0879b583047fc7ba1f0390c612 Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Fri, 24 Apr 2026 18:38:12 +0200
Subject: [PATCH 01/15] feat(SIMPL-24642): consolidate all code locations into
 template-code-location

- Rename src/template-code-location to src/template_code_location
- Copy data-processing jobs/ops/config_models
- Copy dataframe-level-anonymisation jobs/ops/utils/config_models
- Copy field-level-pseudo-anonymisation jobs/ops/techniques/config_models
- Update all imports to template_code_location namespace
- Merge all jobs into unified repository.py with sensors/resources/loggers
- Update pyproject.toml with all dependencies
- Update Dockerfile for consolidated image
---
 Dockerfile                                    |  11 +-
 pyproject.toml                                |  28 +-
 src/template-code-location/repository.py      |   6 -
 .../__init__.py                               |   0
 .../data_processing}/__init__.py              |   0
 .../data_processing/config_models/__init__.py |  18 +
 .../aggregation_configuration.py              |  25 +
 .../columns_select_configuration.py           |  17 +
 ...coordinates_normalization_configuration.py |  22 +
 .../config_models/fill_missing_config.py      |   9 +
 .../config_models/filter_configuration.py     |  52 +++
 .../spell_check_configuration.py              |   8 +
 .../data_processing/jobs.py                   | 119 +++++
 .../data_processing/ops.py                    | 256 +++++++++++
 .../__init__.py                               |   0
 .../config_models/__init__.py                 |  13 +
 .../config_models/base_config.py              |  33 ++
 .../config_models/hierarchies.py              |  18 +
 .../k_anonymity_configuration.py              |  11 +
 .../l_diversity_configuration.py              |   8 +
 .../t_closeness_configuration.py              |   8 +
 .../dataframe_level_anonymisation/jobs.py     |  86 ++++
 .../dataframe_level_anonymisation/ops.py      | 187 ++++++++
 .../dataframe_level_anonymisation/utils.py    |  19 +
 .../__init__.py                               |   0
 .../config_models/__init__.py                 |  28 ++
 .../config_models/languages.py                |  72 +++
 .../config_models/pii_entities.py             |  24 +
 .../config_models/structured_config.py        | 110 +++++
 .../config_models/unstructured_config.py      | 115 +++++
 .../field_level_pseudo_anonymisation/jobs.py  | 126 ++++++
 .../field_level_pseudo_anonymisation/ops.py   |  77 ++++
 .../techniques/__init__.py                    |   3 +
 ...onymisation_pseudonymisation_techniques.py |  42 ++
 .../depseudonymisation_techniques.py          |   9 +
 .../unstructured_ops.py                       | 428 ++++++++++++++++++
 .../field_level_pseudo_anonymisation/utils.py |  32 ++
 src/template_code_location/jobs/__init__.py   |   0
 .../jobs/jobs.py                              |   0
 src/template_code_location/ops/__init__.py    |   0
 .../ops/ops.py                                |   0
 src/template_code_location/repository.py      |  65 +++
 42 files changed, 2071 insertions(+), 14 deletions(-)
 delete mode 100644 src/template-code-location/repository.py
 rename src/{template-code-location => template_code_location}/__init__.py (100%)
 rename src/{template-code-location/jobs => template_code_location/data_processing}/__init__.py (100%)
 create mode 100644 src/template_code_location/data_processing/config_models/__init__.py
 create mode 100644 src/template_code_location/data_processing/config_models/aggregation_configuration.py
 create mode 100644 src/template_code_location/data_processing/config_models/columns_select_configuration.py
 create mode 100644 src/template_code_location/data_processing/config_models/coordinates_normalization_configuration.py
 create mode 100644 src/template_code_location/data_processing/config_models/fill_missing_config.py
 create mode 100644 src/template_code_location/data_processing/config_models/filter_configuration.py
 create mode 100644 src/template_code_location/data_processing/config_models/spell_check_configuration.py
 create mode 100644 src/template_code_location/data_processing/jobs.py
 create mode 100644 src/template_code_location/data_processing/ops.py
 rename src/{template-code-location/ops => template_code_location/dataframe_level_anonymisation}/__init__.py (100%)
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/__init__.py
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/base_config.py
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/hierarchies.py
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/k_anonymity_configuration.py
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/l_diversity_configuration.py
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/t_closeness_configuration.py
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/jobs.py
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/ops.py
 create mode 100644 src/template_code_location/dataframe_level_anonymisation/utils.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/__init__.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/__init__.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/languages.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/pii_entities.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/structured_config.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/unstructured_config.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/jobs.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/ops.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/techniques/__init__.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/techniques/anonymisation_pseudonymisation_techniques.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/techniques/depseudonymisation_techniques.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/unstructured_ops.py
 create mode 100644 src/template_code_location/field_level_pseudo_anonymisation/utils.py
 create mode 100644 src/template_code_location/jobs/__init__.py
 rename src/{template-code-location => template_code_location}/jobs/jobs.py (100%)
 create mode 100644 src/template_code_location/ops/__init__.py
 rename src/{template-code-location => template_code_location}/ops/ops.py (100%)
 create mode 100644 src/template_code_location/repository.py

diff --git a/Dockerfile b/Dockerfile
index b61745b..fd4e780 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,13 +1,16 @@
 FROM python:3.12-slim-bookworm
 
+# Install git for git-based dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
+
 WORKDIR /app
 
 COPY pyproject.toml .
-RUN pip install --no-cache-dir dagster dagster-webserver
-
 COPY src/ src/
+
+# Install the package and all dependencies
 RUN pip install --no-cache-dir .
 
-EXPOSE 3000
+EXPOSE 4000
 
-CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "3000", "-m", "template-code-location.repository"]
+CMD ["dagster", "code-server", "start", "-h", "0.0.0.0", "-p", "4000", "-f", "src/template_code_location/repository.py"]
diff --git a/pyproject.toml b/pyproject.toml
index ca2cdc0..3b2741f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,23 +4,43 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "template-code-location"
-version = "0.0.1"
-description = "Template code location for data processings services"
+version = "0.1.0"
+description = "Consolidated code location for all data services workflows"
 requires-python = ">=3.12"
 dependencies = [
+    # Dagster core
     "dagster>=1.8.13",
     "dagster-webserver>=1.8.13",
     "dagster-postgres>=0.24.13",
-    "pandas>=3.0",
+    # Data processing
+    "pandas>=2.1.4",
     "pyarrow>=23.0",
+    "numpy>=2.4",
     "lxml>=6.0",
     "xmltodict>=1.0",
     "rdflib>=7.6",
-    "numpy>=2.4",
+    "openpyxl",
+    "xlrd>=2.0.1",
+    "tabulate==0.8.10",
+    "pyspellchecker>=0.8.4",
+    "PyGeodesy>=24.6.11",
+    # Validation
     "great_expectations>=1.16",
     "pandera>=0.31",
+    "pydantic>=2.6.0,<3.0.0",
+    # Scraping
     "scrapy>=2.15",
     "BeautifulSoup4>=4.14",
+    # Anonymisation libraries
+    "pycanon==1.0.1.post2",
+    "anjana>=1.0.0",
+    # Field-level pseudo-anonymisation
+    "scrubadub",
+    "scrubadub_spacy",
+    "hvac",
+    "cryptography",
+    # Util services (git dependency)
+    "util-services @ git+https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git@v0.4.1",
 ]
 
 [project.optional-dependencies]
diff --git a/src/template-code-location/repository.py b/src/template-code-location/repository.py
deleted file mode 100644
index 10c73e6..0000000
--- a/src/template-code-location/repository.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from dagster import Definitions
-from .jobs.jobs import data_processing_job
-
-defs = Definitions(
-    jobs=[data_processing_job],
-)
diff --git a/src/template-code-location/__init__.py b/src/template_code_location/__init__.py
similarity index 100%
rename from src/template-code-location/__init__.py
rename to src/template_code_location/__init__.py
diff --git a/src/template-code-location/jobs/__init__.py b/src/template_code_location/data_processing/__init__.py
similarity index 100%
rename from src/template-code-location/jobs/__init__.py
rename to src/template_code_location/data_processing/__init__.py
diff --git a/src/template_code_location/data_processing/config_models/__init__.py b/src/template_code_location/data_processing/config_models/__init__.py
new file mode 100644
index 0000000..5833cab
--- /dev/null
+++ b/src/template_code_location/data_processing/config_models/__init__.py
@@ -0,0 +1,18 @@
+"""Configuration models for data processing."""
+
+from .columns_select_configuration import ColumnsSelectConfiguration
+from .fill_missing_config import FillMissingConfiguration
+from .spell_check_configuration import SpellCheckConfiguration
+from .coordinates_normalization_configuration import CoordinatesNormalizationConfiguration
+from .aggregation_configuration import AggregationConfiguration
+from .filter_configuration import DatasetFilterConfiguration, FilterCondition
+
+__all__ = [
+    "ColumnsSelectConfiguration",
+    "FillMissingConfiguration",
+    "SpellCheckConfiguration",
+    "CoordinatesNormalizationConfiguration",
+    "AggregationConfiguration",
+    "FilterCondition",
+    "DatasetFilterConfiguration"
+]
diff --git a/src/template_code_location/data_processing/config_models/aggregation_configuration.py b/src/template_code_location/data_processing/config_models/aggregation_configuration.py
new file mode 100644
index 0000000..553740f
--- /dev/null
+++ b/src/template_code_location/data_processing/config_models/aggregation_configuration.py
@@ -0,0 +1,25 @@
+from typing import List
+
+from pydantic import Field, field_validator
+
+from .columns_select_configuration import ColumnsSelectConfiguration
+
+
+class AggregationConfiguration(ColumnsSelectConfiguration):
+
+    operation: str = Field(
+        default="sum",
+        description="Aggregation operations: sum, mean, min, max, count"
+    )
+
+    @field_validator("operation")
+    @classmethod
+    def validate_operations(cls, value):
+        allowed = {"sum", "mean", "min", "max", "count"}
+        if value not in allowed:
+            raise ValueError(
+                f"Invalid aggregation operation '{value}'. "
+                f"Allowed values: {allowed}"
+            )
+
+        return value
diff --git a/src/template_code_location/data_processing/config_models/columns_select_configuration.py b/src/template_code_location/data_processing/config_models/columns_select_configuration.py
new file mode 100644
index 0000000..658450d
--- /dev/null
+++ b/src/template_code_location/data_processing/config_models/columns_select_configuration.py
@@ -0,0 +1,17 @@
+from typing import List
+from pydantic import Field,field_validator
+from dagster import Config
+
+
+class ColumnsSelectConfiguration(Config):
+    columns: List[str] = Field(
+        default=["Name"], description="List of columns to process."
+    )
+
+    @field_validator("columns")
+    @classmethod
+    def ensure_unique_columns(cls, v: List[str]) -> List[str]:
+
+        unique_values = list(dict.fromkeys(v)) 
+            
+        return unique_values
diff --git a/src/template_code_location/data_processing/config_models/coordinates_normalization_configuration.py b/src/template_code_location/data_processing/config_models/coordinates_normalization_configuration.py
new file mode 100644
index 0000000..64342e4
--- /dev/null
+++ b/src/template_code_location/data_processing/config_models/coordinates_normalization_configuration.py
@@ -0,0 +1,22 @@
+from typing import Optional
+
+from pydantic import Field, model_validator
+from dagster import Config
+
+
+class CoordinatesNormalizationConfiguration(Config):
+    latColumn: Optional[str] = Field(
+        default="lat", description="Latitude column name"
+    )
+    lonColumn: Optional[str] = Field(
+        default="lon", description="Longitude column name"
+    )
+
+    @model_validator(mode="before")
+    @classmethod
+    def replace_nulls_with_defaults(cls, values):
+        if values.get("latColumn") is None:
+            values["latColumn"] = "lat"
+        if values.get("lonColumn") is None:
+            values["lonColumn"] = "lon"
+        return values
diff --git a/src/template_code_location/data_processing/config_models/fill_missing_config.py b/src/template_code_location/data_processing/config_models/fill_missing_config.py
new file mode 100644
index 0000000..4c9e5b2
--- /dev/null
+++ b/src/template_code_location/data_processing/config_models/fill_missing_config.py
@@ -0,0 +1,9 @@
+from typing import Dict
+from dagster import Config
+from pydantic import Field
+
+
+class FillMissingConfiguration(Config):
+    fill_map: Dict[str, str] = Field(
+        default={"Age": "UNKNOWN_AGE"}, description="Missing values filling map."
+    )
diff --git a/src/template_code_location/data_processing/config_models/filter_configuration.py b/src/template_code_location/data_processing/config_models/filter_configuration.py
new file mode 100644
index 0000000..86bde37
--- /dev/null
+++ b/src/template_code_location/data_processing/config_models/filter_configuration.py
@@ -0,0 +1,52 @@
+from enum import Enum
+import operator
+from typing import List, Literal, Callable
+from pydantic import Field, model_validator
+from dagster import Config
+import pandas as pd
+
+class FilterOperator(str, Enum):
+    EQ = "=="
+    NE = "!="
+    LT = "<"
+    LE = "<="
+    GT = ">"
+    GE = ">="
+
+    @property
+    def function(self) -> Callable:
+        mapping = {
+            FilterOperator.EQ: operator.eq,
+            FilterOperator.NE: operator.ne,
+            FilterOperator.LT: operator.lt,
+            FilterOperator.LE: operator.le,
+            FilterOperator.GT: operator.gt,
+            FilterOperator.GE: operator.ge,
+        }
+        return mapping[self]
+
+class FilterCondition(Config):
+    column: str = Field(..., description="Name of the column to filter")
+    type: Literal["string", "numeric"] = Field(..., description="Column type (string or numeric)")
+    value: str = Field(..., description="Value to compare against")
+    op: FilterOperator = Field(default=FilterOperator.EQ, description="Operator to apply (string supports only EQ and NE)")
+
+    @model_validator(mode="after")
+    def check_operator_compatibility(self) -> "FilterCondition":
+        if self.type == "string" and self.op not in [FilterOperator.EQ, FilterOperator.NE]:
+            raise ValueError(
+                f"Invalid operator '{self.op.name}' for type 'string'. "
+                "Only EQ (==) and NE (!=) are allowed."
+            )
+        return self
+
+    def apply(self, df: pd.DataFrame) -> pd.Series:
+        val = float(self.value) if self.type == "numeric" else self.value
+        return self.op.function(df[self.column], val)
+
+class DatasetFilterConfiguration(Config):
+    conditions: List[FilterCondition] = Field(
+        default=[],
+        description="List of filter conditions to apply on the dataset. "
+                    "String columns support only 'EQ' and 'NE', numeric columns also support 'LT', 'LE', 'GT' and 'GE'."
+    )
diff --git a/src/template_code_location/data_processing/config_models/spell_check_configuration.py b/src/template_code_location/data_processing/config_models/spell_check_configuration.py
new file mode 100644
index 0000000..7a12f87
--- /dev/null
+++ b/src/template_code_location/data_processing/config_models/spell_check_configuration.py
@@ -0,0 +1,8 @@
+from typing import Literal
+from pydantic import Field
+
+from .columns_select_configuration import ColumnsSelectConfiguration
+
+
+class SpellCheckConfiguration(ColumnsSelectConfiguration):
+    language: Literal["en", "es", "it", "fr", "pt", "de", "nl"] = Field(default="en", description="Language to use in the SpellChecker module.")
diff --git a/src/template_code_location/data_processing/jobs.py b/src/template_code_location/data_processing/jobs.py
new file mode 100644
index 0000000..54fb939
--- /dev/null
+++ b/src/template_code_location/data_processing/jobs.py
@@ -0,0 +1,119 @@
+from dagster import job
+from util_services.util_ops import (
+    preview_dataframe,
+    read_csv_from_s3,
+    write_csv_to_s3,
+)
+from .ops import (
+    remove_duplicates,
+    fill_missing_values,
+    standardize_categorical_values,
+    correct_typos,
+    normalize_numeric_min_max,
+    normalize_datetime,
+    normalize_coordinates,
+    add_global_aggregations,
+    filter_dataset
+)
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def remove_duplicates_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = remove_duplicates(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def fill_missing_values_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = fill_missing_values(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def standardize_categorical_values_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = standardize_categorical_values(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def correct_typos_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = correct_typos(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def normalize_numeric_min_max_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = normalize_numeric_min_max(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def normalize_datetime_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = normalize_datetime(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def normalize_coordinates_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = normalize_coordinates(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def add_global_aggregations_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = add_global_aggregations(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+@job(tags={
+    "business_operation": "PROCESSING",
+    "resource_type": "RD_DATA"
+})
+def filter_dataset_job_s3():
+    org_df = read_csv_from_s3()
+    anon_df = filter_dataset(org_df)
+    preview_dataframe(org_df)
+    write_csv_to_s3(anon_df)
+    preview_dataframe(anon_df)
diff --git a/src/template_code_location/data_processing/ops.py b/src/template_code_location/data_processing/ops.py
new file mode 100644
index 0000000..e380cb8
--- /dev/null
+++ b/src/template_code_location/data_processing/ops.py
@@ -0,0 +1,256 @@
+import pandas as pd
+from dagster import Out, op
+from spellchecker import SpellChecker
+
+from template_code_location.data_processing.config_models import (
+    AggregationConfiguration,
+    ColumnsSelectConfiguration,
+    CoordinatesNormalizationConfiguration,
+    FillMissingConfiguration,
+    SpellCheckConfiguration,
+    DatasetFilterConfiguration
+)
+
+
+def _parse_dms_to_decimal(value):
+    """Parse a DMS (degrees-minutes-seconds) string to decimal degrees using PyGeodesy.
+
+    Supported formats include (but are not limited to):
+        - 40°26'46"N / 40°26′46″N
+        - 40 26 46 N
+        - 40:26:46N
+        - 40d26m46sN
+        - -40.446  (already decimal – returned as-is)
+
+    Returns None if parsing fails.
+    """
+    from pygeodesy.dms import parseDMS
+
+    if pd.isna(value):
+        return None
+
+    text = str(value).strip()
+    if not text:
+        return None
+
+    try:
+        return float(parseDMS(text))
+    except (ValueError, TypeError):
+        try:
+            return float(text)
+        except (ValueError, TypeError):
+            return None
+
+
+@op(out={"data": Out()})
+def remove_duplicates(context, df: pd.DataFrame):
+    """Remove duplicate rows from the input DataFrame."""
+    logger = context.log
+
+    before = df.shape[0]
+
+    df = df.drop_duplicates()
+
+    after = df.shape[0]
+
+    logger.info(f"Removed {before - after} duplicate rows")
+
+    return df
+
+@op(out={"data": Out()})
+def fill_missing_values(context, config: FillMissingConfiguration, df: pd.DataFrame):
+    """Fill missing values in the DataFrame according to the configured column-to-value mapping."""
+    logger = context.log
+
+    logger.info(f"Filling missing values: {config.fill_map}")
+
+    return df.fillna(config.fill_map)
+
+@op(out={"data": Out()})
+def standardize_categorical_values(context, config: ColumnsSelectConfiguration, df: pd.DataFrame):
+    """Standardize categorical values in selected columns by trimming whitespace and converting text to lowercase."""
+    logger = context.log
+
+    for col in config.columns:
+        if col not in df.columns:
+            logger.warning(f"Column '{col}' not found in DataFrame, skipping.")
+            continue
+
+        original = df[col]
+
+        standardized = (
+            df[col]
+            .fillna("")
+            .astype(str)
+            .str.strip()
+            .str.lower()
+        )
+
+        changed_count = (original != standardized).sum()
+        df[col] = standardized
+
+        logger.info(f"Standardized '{col}' column – {changed_count} values modified")
+
+    return df
+
+@op(out={"data": Out()})
+def correct_typos(context, config: SpellCheckConfiguration, df: pd.DataFrame):
+    """Correct spelling mistakes in the specified text columns."""
+    logger = context.log
+
+    for column in config.columns:
+        if column not in df.columns:
+            logger.warning(f"Column '{column}' not found in DataFrame, skipping.")
+            continue
+
+        spell = SpellChecker(language=config.language)
+
+        original = df[column].astype(str)
+        corrected = original.apply(lambda x, spell_checker=spell: spell_checker.correction(x) if x else x)
+
+        changed_count = (original != corrected).sum()
+        logger.info(f"Corrected typos in '{column}' – {changed_count} values modified")
+
+        df[column] = corrected
+
+    return df
+
+@op(out={"data": Out()})
+def normalize_datetime(context, config: ColumnsSelectConfiguration, df: pd.DataFrame):
+    logger = context.log
+
+    for col in config.columns:
+        if col not in df.columns:
+            logger.warning(f"Column '{col}' not found, skipping normalization.")
+            continue
+
+        normalized = pd.to_datetime(df[col], utc=True, format="mixed", dayfirst=True, errors="coerce")
+
+        if normalized.notna().sum() == 0:
+            logger.warning(
+                f"Column '{col}' has no normalizable datetime values, skipping."
+            )
+            continue
+
+        iso_col = f"{col}_iso"
+
+        formatted = normalized.dt.strftime("%Y-%m-%dT%H:%M:%SZ").fillna("")
+        non_empty = formatted[formatted != ""]
+        if len(non_empty) > 0 and non_empty.str.startswith("1970-01-01").all():
+            logger.warning(
+                f"Column '{col}' all normalized values are '1970-01-01', likely bad input — skipping."
+            )
+            continue
+
+        df[iso_col] = formatted
+
+        logger.info(f"Normalized datetime column '{col}' into '{iso_col}'")
+
+    return df
+
+@op(out={"data": Out()})
+def normalize_numeric_min_max(context, config: ColumnsSelectConfiguration, df: pd.DataFrame):
+    logger = context.log
+
+    for col in config.columns:
+        if col not in df.columns:
+            logger.warning(f"Column '{col}' not found, skipping normalization.")
+            continue
+
+        min_val = df[col].min()
+        max_val = df[col].max()
+
+        if min_val == max_val:
+            logger.warning(f"Column '{col}' has constant values, skipping normalization.")
+            continue
+
+        df[col + "_norm"] = (df[col] - min_val) / (max_val - min_val)
+        logger.info(f"Normalized numeric column '{col}'")
+
+    return df
+
+@op(out={"data": Out()})
+def normalize_coordinates(context, config: CoordinatesNormalizationConfiguration, df: pd.DataFrame):
+    logger = context.log
+
+    lat = config.latColumn
+    lon = config.lonColumn
+
+    for col in [lat, lon]:
+        if pd.api.types.is_numeric_dtype(df[col]):
+            logger.info(f"Column '{col}' is numeric — coercing directly")
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+        else:
+            logger.info(f"Column '{col}' is non-numeric — parsing as DMS with PyGeodesy")
+            df[col] = df[col].apply(_parse_dms_to_decimal)
+
+    invalid_lat = df[lat].isnull().sum()
+    invalid_lon = df[lon].isnull().sum()
+    logger.info(f"Found {invalid_lat} invalid latitudes and {invalid_lon} invalid longitudes")
+
+    df[lat] = df[lat].round(4)
+    df[lon] = df[lon].round(4)
+
+    before_filter_rows = len(df)
+    df = df[(df[lat].between(-90, 90)) & (df[lon].between(-180, 180))]
+    after_filter_rows = len(df)
+    logger.info(f"Filtered coordinates out of range: removed {before_filter_rows - after_filter_rows} rows")
+
+    logger.info(f"Coordinate normalization completed: resulting dataframe has {after_filter_rows} rows")
+
+    return df
+
+@op(out={"data": Out()})
+def add_global_aggregations(context, config: AggregationConfiguration, df: pd.DataFrame):
+    logger = context.log
+
+    group_by_cols = []
+    
+    for col in config.columns:
+        if col not in df.columns:
+            logger.warning(f"Column '{col}' not found, skipping aggregation.")
+            continue
+        group_by_cols.append(col)
+    
+    if config.operation not in {"sum", "mean", "min", "max", "count"}:
+        logger.warning(f"Unsupported aggregation '{config.operation}'")
+    
+    numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
+    cols_to_keep = list(set(numeric_cols + group_by_cols))
+    df = df[[c for c in cols_to_keep if c in df.columns]]
+    df = df.groupby(group_by_cols).agg(config.operation).reset_index()
+    return df
+
+@op(out={"data": Out()})
+def filter_dataset(context, config: DatasetFilterConfiguration, df: pd.DataFrame):
+    logger = context.log
+    total_rows_before = len(df)
+    
+    logger.info(f"Starting dataset filtering: initial dataframe has {total_rows_before} rows")
+
+    combined_mask = pd.Series([True] * total_rows_before, index=df.index)
+
+    for condition in config.conditions:
+        if condition.column not in df.columns:
+            logger.warning(f"Column '{condition.column}' not found, skipping filtering.")
+            continue
+        if df[condition.column].isna().all():
+            logger.warning(f"Column '{condition.column}' is empty (all NaN), skipping filtering.")
+            continue
+        try:
+            current_mask = condition.apply(df)
+            combined_mask &= current_mask
+            
+            logger.info(f"Applied filter: {condition.column} {condition.op.value} '{condition.value}'")
+        except Exception as e:
+            logger.error(f"Error applying filter on column '{condition.column}': {e}")
+
+    filtered_df = df[combined_mask]
+    total_rows_after = len(filtered_df)
+    
+    logger.info(
+        f"Filtering completed: {total_rows_after} rows remain "
+        f"(removed {total_rows_before - total_rows_after} rows in total)"
+    )
+
+    return filtered_df
diff --git a/src/template-code-location/ops/__init__.py b/src/template_code_location/dataframe_level_anonymisation/__init__.py
similarity index 100%
rename from src/template-code-location/ops/__init__.py
rename to src/template_code_location/dataframe_level_anonymisation/__init__.py
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/__init__.py b/src/template_code_location/dataframe_level_anonymisation/config_models/__init__.py
new file mode 100644
index 0000000..0f490b5
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/config_models/__init__.py
@@ -0,0 +1,13 @@
+"""Configuration models for dataframe-level anonymization."""
+
+from .k_anonymity_configuration import KAnonymityConfiguration
+from .l_diversity_configuration import LDiversityConfiguration
+from .t_closeness_configuration import TClosenessConfiguration
+from .base_config import BaseConfiguration
+
+__all__ = [
+    "BaseConfiguration",
+    "KAnonymityConfiguration",
+    "LDiversityConfiguration",
+    "TClosenessConfiguration",
+]
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/base_config.py b/src/template_code_location/dataframe_level_anonymisation/config_models/base_config.py
new file mode 100644
index 0000000..4abf451
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/config_models/base_config.py
@@ -0,0 +1,33 @@
+from typing import Dict, List
+from dagster import Config
+from pydantic import Field, field_validator, model_validator
+
+
+class BaseConfiguration(Config):
+    ident: List[str] = Field(default=["Name"], description="List of identifier column names.")
+    quasi_identifiers: List[str] = Field(default=["Age"], description="List of quasi-identifier column names.")
+    supp_level: float = Field(default=50.0, ge=0.0, le=100.0, description="Max suppression allowed (0–100).")
+    generalisation_hierarchies: Dict[str, str] = Field(
+        default={"Age": "simpl_age"}, description="Hierarchies used to generalize quasi-identifiers."
+    )
+
+    @field_validator("quasi_identifiers")
+    def validate_quasi_identifiers(cls, value):
+        if not value:
+            raise ValueError("At least one quasi-identifier must be provided.")
+        return value
+
+    @field_validator("ident")
+    def validate_ident(cls, value):
+        if not value:
+            raise ValueError("At least one identifier must be provided.")
+        return value
+
+    @model_validator(mode="after")
+    def check_no_overlap(self):
+        ident = set(self.ident)
+        quasi = set(self.quasi_identifiers)
+        overlap = ident & quasi
+        if overlap:
+            raise ValueError(f"Fields cannot be both identifiers and quasi-identifiers: {overlap}")
+        return self
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/hierarchies.py b/src/template_code_location/dataframe_level_anonymisation/config_models/hierarchies.py
new file mode 100644
index 0000000..65105a0
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/config_models/hierarchies.py
@@ -0,0 +1,18 @@
+from anjana.anonymity.utils import utils
+
+simpl_age = {
+    0: [age for age in range(0, 100)],
+    1: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 5),
+    2: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 10),
+    3: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 20),
+    4: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 100),
+}
+simpl_age2 = {
+    0: [age for age in range(0, 100)],
+    1: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 5),
+}
+simpl_gender = {0: ["M", "F", "O"], 1: ["*", "*", "*"]}
+
+
+def get_all_hierarchies():
+    return {name: obj for name, obj in globals().items() if isinstance(obj, dict)}
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/k_anonymity_configuration.py b/src/template_code_location/dataframe_level_anonymisation/config_models/k_anonymity_configuration.py
new file mode 100644
index 0000000..0ddd88f
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/config_models/k_anonymity_configuration.py
@@ -0,0 +1,11 @@
+from typing import List
+from pydantic import Field
+
+from .base_config import BaseConfiguration
+
+
+class KAnonymityConfiguration(BaseConfiguration):
+    k: int = Field(default=3, ge=2, description="Desired level of k-anonymity (must be >= 2).")
+    sensitive_attributes: List[str] = Field(
+        default=["Disease"], description="List of sensitive attribute column names."
+    )
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/l_diversity_configuration.py b/src/template_code_location/dataframe_level_anonymisation/config_models/l_diversity_configuration.py
new file mode 100644
index 0000000..c764f1d
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/config_models/l_diversity_configuration.py
@@ -0,0 +1,8 @@
+from pydantic import Field
+from .base_config import BaseConfiguration
+
+
+class LDiversityConfiguration(BaseConfiguration):
+    k: int = Field(default=2, ge=2, description="Desired level of k-anonymity (must be >= 2).")
+    l: int = Field(default=3, ge=1, description="L-diversity level (must be >= 1)")
+    sensitive_attribute: str = Field(default="Disease", description="Sensitive attribute name.")
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/t_closeness_configuration.py b/src/template_code_location/dataframe_level_anonymisation/config_models/t_closeness_configuration.py
new file mode 100644
index 0000000..4461539
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/config_models/t_closeness_configuration.py
@@ -0,0 +1,8 @@
+from pydantic import Field
+from .base_config import BaseConfiguration
+
+
+class TClosenessConfiguration(BaseConfiguration):
+    k: int = Field(default=2, ge=2, description="Desired level of k-anonymity (must be >= 2).")
+    t: float = Field(default=0.5, ge=0.0, le=1.0, description="Maximum t-distance threshold.")
+    sensitive_attribute: str = Field(default="Disease", description="Sensitive attribute name.")
diff --git a/src/template_code_location/dataframe_level_anonymisation/jobs.py b/src/template_code_location/dataframe_level_anonymisation/jobs.py
new file mode 100644
index 0000000..35c76f7
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/jobs.py
@@ -0,0 +1,86 @@
+from dagster import job
+from util_services.util_ops import (
+    preview_dataframe,
+    read_structured_to_df,
+    write_df_to_local,
+    read_structured_from_s3,
+    write_df_to_s3,
+    write_semistructured_to_s3,
+    read_semistructured_from_s3
+)
+
+from .ops import apply_k_anonymity, apply_l_diversity, apply_t_closeness
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION"
+})
+def k_anonymity_job():
+    org_df = read_structured_to_df()
+    anon_df, _ = apply_k_anonymity(org_df)
+    preview_dataframe(org_df)
+    write_df_to_local(anon_df)
+    preview_dataframe(anon_df)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION"
+})
+def l_diversity_job():
+    org_df = read_structured_to_df()
+    anon_df, _ = apply_l_diversity(org_df)
+    preview_dataframe(org_df)
+    write_df_to_local(anon_df)
+    preview_dataframe(anon_df)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION"
+})
+def t_closeness_job():
+    org_df = read_structured_to_df()
+    anon_df, _ = apply_t_closeness(org_df)
+    preview_dataframe(org_df)
+    write_df_to_local(anon_df)
+    preview_dataframe(anon_df)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION",
+    "resource_type": "RD_DATA"
+})
+def k_anonymity_job_s3():
+    org_df = read_structured_from_s3()
+    anon_df, _ = apply_k_anonymity(org_df)
+    preview_dataframe(org_df)
+    write_df_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION",
+    "resource_type": "RD_DATA"
+})
+def l_diversity_job_s3():
+    org_df = read_structured_from_s3()
+    anon_df, _ = apply_l_diversity(org_df)
+    preview_dataframe(org_df)
+    write_df_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION",
+    "resource_type": "RD_DATA"
+})
+def t_closeness_job_s3():
+    org_df = read_structured_from_s3()
+    anon_df, _ = apply_t_closeness(org_df)
+    preview_dataframe(org_df)
+    write_df_to_s3(anon_df)
+    preview_dataframe(anon_df)
+
+@job()
+def read_write_semistructured_job_s3():
+    semistruct_data = read_semistructured_from_s3()
+    write_semistructured_to_s3(semistruct_data)
diff --git a/src/template_code_location/dataframe_level_anonymisation/ops.py b/src/template_code_location/dataframe_level_anonymisation/ops.py
new file mode 100644
index 0000000..93682bf
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/ops.py
@@ -0,0 +1,187 @@
+import json
+from textwrap import dedent
+
+import pandas as pd
+from anjana.anonymity import k_anonymity, l_diversity, t_closeness
+from dagster import (
+    DagsterInvalidInvocationError,
+    MarkdownMetadataValue,
+    Out,
+    Output,
+    get_dagster_logger,
+    op,
+)
+from pycanon import anonymity
+
+from template_code_location.dataframe_level_anonymisation.config_models import (
+    KAnonymityConfiguration,
+    LDiversityConfiguration,
+    TClosenessConfiguration,
+)
+from template_code_location.dataframe_level_anonymisation.config_models.hierarchies import get_all_hierarchies
+
+
+def _calc_dataframe_metrics(df_anon, df_org, quasi_identifiers, sensitive_atttributes):
+    # --- Metrics ---
+    # Anonymization metrics
+    k_anon = anonymity.k_anonymity(df_anon, quasi_identifiers)
+    l_div = anonymity.l_diversity(df_anon, quasi_identifiers, sensitive_atttributes, True)
+    t_clos = anonymity.t_closeness(df_anon, quasi_identifiers, sensitive_atttributes, True)
+
+    # Data Utilization metrics
+    supression_rate = 1 - len(df_anon) / len(df_org)
+    grouped = df_anon.groupby(quasi_identifiers)
+    mean_equivalence_class_size = len(df_anon) / len(grouped) if len(grouped) else 0
+
+    # flake8: noqa
+    anon_report = dedent(
+        f"""
+        ### Anonymization & Data Utilization Metrics
+
+        | Metric | Value | Description |
+        |--------|-------|-------------|
+        | **k-anonymity** | `k = {k_anon}` | Minimum number of records sharing the same quasi-identifier values. |
+        | **l-diversity** | `l = {l_div}` | Diversity of sensitive attributes within each equivalence class. |
+        | **t-closeness** | `t = {round(t_clos, 2)}` | Distance between sensitive attribute distribution in a group and the overall dataset. |
+        | **Suppression rate** | `{round(supression_rate, 2)}` | Fraction of records or attributes suppressed to meet privacy requirements. |
+        | **Mean equivalence class size** | `{round(mean_equivalence_class_size, 2)}` | Average size of equivalence classes for quasi-identifiers, indicates data grouping. |
+    """
+    )
+    # flake8: enable
+    metrics = {
+        "k_anon": k_anon,
+        "l_div": l_div,
+        "t_clos": t_clos,
+        "supp_rate": supression_rate,
+        "mean_equivalence_class": mean_equivalence_class_size,
+    }
+    return anon_report, metrics
+
+
+def _validate_and_get_hierarchies(config, df: pd.DataFrame):
+    hierarchies = get_all_hierarchies()
+
+    # Dataset smaller than k
+    if len(df) < config.k:
+        raise DagsterInvalidInvocationError(
+            f"Cannot apply k-anonymity: dataset has {len(df)} records, but k={config.k}"
+        )
+
+    # Missing or incomplete generalisation hierarchies
+    for qi in config.quasi_identifiers:
+        if qi not in config.generalisation_hierarchies or not config.generalisation_hierarchies[qi]:
+            raise DagsterInvalidInvocationError(
+                f"Generalisation hierarchy for quasi-identifier '{qi}' is missing or incomplete"
+            )
+        if config.generalisation_hierarchies[qi] not in hierarchies:
+            raise DagsterInvalidInvocationError(
+                f"Generalisation hierarchy '{config.generalisation_hierarchies[qi]}' is missing in the code basis"
+            )
+
+    hier = {
+        qi: hierarchies[config.generalisation_hierarchies[qi]] for qi in config.quasi_identifiers
+    }
+    return hier
+
+
+@op(out={"data": Out(), "metrics": Out()})
+def apply_k_anonymity(context, config: KAnonymityConfiguration, df: pd.DataFrame):
+    
+    hier = _validate_and_get_hierarchies(config, df)
+
+    data_anon = k_anonymity(
+        df, config.ident, config.quasi_identifiers, config.k, config.supp_level, hier
+    )
+    if "index" in data_anon.columns and "index" not in df.columns:
+        data_anon.drop(columns="index", inplace=True)
+    anon_report, metrics = _calc_dataframe_metrics(
+        data_anon, df, config.quasi_identifiers, config.sensitive_attributes
+    )
+    yield Output(
+        value=data_anon,
+        metadata={
+            "metric_report": MarkdownMetadataValue(anon_report),
+            "metric_json": json.dumps(metrics),
+        },
+        output_name="data",
+    )
+    yield Output(value=metrics, output_name="metrics")
+
+
+@op(out={"data": Out(), "metrics": Out()})
+def apply_l_diversity(context, config: LDiversityConfiguration, df: pd.DataFrame):
+
+    hier = _validate_and_get_hierarchies(config, df)
+
+    data_anon = l_diversity(
+        df,
+        config.ident,
+        config.quasi_identifiers,
+        config.sensitive_attribute,
+        config.k,
+        config.l,
+        config.supp_level,
+        hier,
+    )
+    if data_anon.empty:
+        raise DagsterInvalidInvocationError(
+            "Could not tranform the data to l-diversity, empty dataset returned!"
+        )
+    anon_report, metrics = _calc_dataframe_metrics(
+        data_anon, df, config.quasi_identifiers, [config.sensitive_attribute]
+    )
+    yield Output(
+        value=data_anon,
+        metadata={
+            "metric_report": MarkdownMetadataValue(anon_report),
+            "metric_json": json.dumps(metrics),
+        },
+        output_name="data",
+    )
+    yield Output(value=metrics, output_name="metrics")
+
+
+@op(out={"data": Out(), "metrics": Out()})
+def apply_t_closeness(context, config: TClosenessConfiguration, df: pd.DataFrame):
+    
+    hier = _validate_and_get_hierarchies(config, df)
+
+    try:
+        data_anon = t_closeness(
+            df,
+            config.ident,
+            config.quasi_identifiers,
+            config.sensitive_attribute,
+            config.k,
+            config.t,
+            config.supp_level,
+            hier,
+        )
+    except ValueError as e:
+        if "Cannot be quasi-identifiers" in str(e):
+            raise DagsterInvalidInvocationError(
+                f"T-closeness failed: k-anonymity parameter = {config.k} is too small "
+                f"for existing hierarchies of {config.quasi_identifiers} in inner k-anonymity call."
+            )
+        else:
+            # Re-raise other ValueError types with context
+            raise DagsterInvalidInvocationError(f"T-closeness failed with error: {str(e)}")
+
+    if data_anon.empty:
+        raise DagsterInvalidInvocationError(
+            f"Could not transform the data to t-closeness, empty dataset returned! "
+            f"This may indicate that the t-closeness constraint (t={config.t}) is too strict for the given data."
+        )
+
+    anon_report, metrics = _calc_dataframe_metrics(
+        data_anon, df, config.quasi_identifiers, [config.sensitive_attribute]
+    )
+    yield Output(
+        value=data_anon,
+        metadata={
+            "metric_report": MarkdownMetadataValue(anon_report),
+            "metric_json": json.dumps(metrics),
+        },
+        output_name="data",
+    )
+    yield Output(value=metrics, output_name="metrics")
diff --git a/src/template_code_location/dataframe_level_anonymisation/utils.py b/src/template_code_location/dataframe_level_anonymisation/utils.py
new file mode 100644
index 0000000..c233c4e
--- /dev/null
+++ b/src/template_code_location/dataframe_level_anonymisation/utils.py
@@ -0,0 +1,19 @@
+import numpy as np
+
+
+def parse_value_list(values):
+    return [int(v) if isinstance(v, str) and v.isdigit() else v for v in values]
+
+
+# Hierarchy normalization for Anjana
+def normalize_hierarchy_levels(hierarchy_dict):
+    normalized = {}
+    for column, levels in hierarchy_dict.items():
+        normalized[column] = {}
+        for level_str, mapping_list in levels.items():
+            level = int(level_str)
+            if level == 0:
+                normalized[column][level] = np.array(parse_value_list(mapping_list))
+            else:
+                normalized[column][level] = mapping_list
+    return normalized
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/__init__.py b/src/template_code_location/field_level_pseudo_anonymisation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/__init__.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/__init__.py
new file mode 100644
index 0000000..60944be
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/config_models/__init__.py
@@ -0,0 +1,28 @@
+from .structured_config import (  # noqa: F401
+    HashConfig,
+    EncryptConfig,
+    RedactConfig,
+    ReplaceConfig,
+    PseudoTechniqueConfig,
+    AnonymisePseudonymizeStructuredConfig,
+    DecryptConfig,
+    DepseudoTechniqueConfig,
+    DepseudonymizeStructuredConfig,
+)
+
+from .unstructured_config import (  # noqa: F401, F811
+    HashConfig,
+    EncryptConfig,
+    RedactConfig,
+    ReplaceConfig,
+    RetainConfig,
+    PseudoTechniqueConfig,
+    AnonymisePseudonymizeUnstructuredConfig,
+    DecryptConfig,
+    DepseudoTechniqueConfig,
+    DepseudonymizeUnstructuredConfig,
+)
+
+from .languages import SupportedLanguages, LanguageEnum  # noqa: F401
+
+from .pii_entities import PIIEntityEnum, PII_MAPPING  # noqa: F401
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/languages.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/languages.py
new file mode 100644
index 0000000..e3ba89e
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/config_models/languages.py
@@ -0,0 +1,72 @@
+from enum import Enum
+from typing import ClassVar
+
+
+class SupportedLanguages:
+    LANGUAGES: ClassVar[dict[str, str]] = {
+        "hr": "hr_HR",  # Croatian
+        "da": "da_DK",  # Danish
+        "nl": "nl_NL",  # Dutch
+        "en": "en_US",  # English
+        "fi": "fi_FI",  # Finnish
+        "fr": "fr_FR",  # French
+        "de": "de_DE",  # German
+        "el": "el_GR",  # Greek
+        "it": "it_IT",  # Italian
+        "lt": "lt_LT",  # Lithuanian
+        "pl": "pl_PL",  # Polish
+        "pt": "pt_PT",  # Portuguese
+        "ro": "ro_RO",  # Romanian
+        "sl": "sl_SI",  # Slovenian
+        "es": "es_ES",  # Spanish
+        "sv": "sv_SE",  # Swedish
+    }
+    LANGUAGE_MODELS = {
+        "en": "en_core_web_sm",
+        "it": "it_core_news_sm",
+        "de": "de_core_news_sm",
+        "fr": "fr_core_news_sm",
+        "es": "es_core_news_sm",
+        "nl": "nl_core_news_sm",
+        "da": "da_core_news_sm",
+        "sv": "sv_core_news_sm",
+        "fi": "fi_core_news_sm",
+        "pl": "pl_core_news_sm",
+        "el": "el_core_news_sm",
+        "hr": "hr_core_news_sm",
+        "lt": "lt_core_news_sm",
+        "pt": "pt_core_news_sm",
+        "ro": "ro_core_news_sm",
+        "sl": "sl_core_news_sm",
+    }
+
+    @classmethod
+    def codes(cls) -> list[str]:
+        return list(cls.LANGUAGES.keys())
+
+    @classmethod
+    def get_locale(cls, code: str) -> str:
+        return cls.LANGUAGES[code]
+
+    @classmethod
+    def get_language_model(cls, code: str) -> str:
+        return cls.LANGUAGE_MODELS[code]
+
+
+class LanguageEnum(str, Enum):
+    hr = "hr"
+    da = "da"
+    nl = "nl"
+    en = "en"
+    fi = "fi"
+    fr = "fr"
+    de = "de"
+    el = "el"
+    it = "it"
+    lt = "lt"
+    pl = "pl"
+    pt = "pt"
+    ro = "ro"
+    sl = "sl"
+    es = "es"
+    sv = "sv"
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/pii_entities.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/pii_entities.py
new file mode 100644
index 0000000..e730b6d
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/config_models/pii_entities.py
@@ -0,0 +1,24 @@
+from enum import Enum
+
+
+class PIIEntityEnum(str, Enum):
+    PERSON = "Person"
+    EMAIL = "Email"
+    CREDIT_CARD = "Credit card"
+    DATE_OF_BIRTH = "Date of birth"
+    URL = "URLs"
+    PHONE_NUMBERS = "Phone numbers"
+    CREDENTIALS = "Credentials"
+    X_SOCIAL = "X (formally known as Twitter) username"
+
+
+PII_MAPPING: dict[PIIEntityEnum, str] = {
+    PIIEntityEnum.PERSON: "NameFilth",
+    PIIEntityEnum.EMAIL: "EmailFilth",
+    PIIEntityEnum.CREDIT_CARD: "CreditCardFilth",
+    PIIEntityEnum.DATE_OF_BIRTH: "DateOfBirthFilth",
+    PIIEntityEnum.URL: "UrlFilth",
+    PIIEntityEnum.PHONE_NUMBERS: "PhoneFilth",
+    PIIEntityEnum.CREDENTIALS: "CredentialFilth",
+    PIIEntityEnum.X_SOCIAL: "TwitterFilth",
+}
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/structured_config.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/structured_config.py
new file mode 100644
index 0000000..af8abf6
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/config_models/structured_config.py
@@ -0,0 +1,110 @@
+from typing import List, Literal, Optional, Union
+
+from dagster import Config
+from pydantic import Field as PydanticField, model_validator, field_validator
+  
+
+class HashConfig(Config):
+    type: Literal["hash"] = "hash"
+    columns: List[str] = PydanticField(default=["example_column"], description="Columns to hash")
+    algorithm: str = PydanticField(default="sha256", description="Hashing algorithm")
+
+class EncryptConfig(Config):
+    type: Literal["encrypt"] = "encrypt"
+    columns: List[str] = PydanticField(default=["example_column"], description="Columns to encrypt")
+    key_name: str = PydanticField(default="my_key", description="Key identifier used for encryption")
+
+class RedactConfig(Config):
+    type: Literal["redact"] = "redact"
+    columns: List[str] = PydanticField(default=["example_column"], description="Columns to redact")
+
+class ReplaceConfig(Config):
+    type: Literal["replace"] = "replace"
+    columns: List[str] = PydanticField(default=["example_column"], description="Columns to replace")
+    new_value: str = PydanticField(default="REPLACED", description="Replacement value")
+
+class PseudoTechniqueConfig(Config):
+    technique: Union[HashConfig, EncryptConfig, RedactConfig, ReplaceConfig] = PydanticField(
+        default={"hash": HashConfig().model_dump(exclude={"type"})},
+        discriminator="type"
+    )
+
+
+class AnonymisePseudonymizeStructuredConfig(Config):
+    used_function: List[PseudoTechniqueConfig] = PydanticField(
+        default=[{"technique": {"hash": HashConfig().model_dump(exclude={"type"})}}],
+        description=("List of functions to be used on column"),
+    )
+
+    @model_validator(mode="after")
+    def ensure_unique_columns(self):
+        column_to_techniques = self._collect_column_to_techniques()
+        duplicates = {
+            col: techs for col, techs in column_to_techniques.items() if len(techs) > 1
+        }
+
+        if duplicates:
+            formatted = "; ".join(
+                f"{col} -> {', '.join(techs)}" for col, techs in duplicates.items()
+            )
+            raise ValueError(f"Duplicate column(s) across techniques not allowed:\n{formatted}")
+
+        return self
+
+    def _collect_column_to_techniques(self):
+        """Extract column-to-techniques mapping from used_function list."""
+        column_to_techniques = {}
+        for f in self.used_function:
+            technique_type, cols = self._extract_technique_and_columns(f)
+            for col in cols:
+                column_to_techniques.setdefault(col, []).append(technique_type)
+        return column_to_techniques
+
+    def _extract_technique_and_columns(self, item):
+        """Extract technique type and columns list from a PseudoTechniqueConfig item (dict or model instance)."""
+        if isinstance(item, dict):
+            tech = item.get("technique") or {}
+            if isinstance(tech, dict):
+                if "type" in tech:
+                    return tech.get("type"), tech.get("columns") or []
+                elif len(tech) == 1:
+                    # variant-key mapping: {'hash': {...}}
+                    technique_type, inner = next(iter(tech.items()))
+                    return technique_type, inner.get("columns") or []
+            return None, []
+        else:
+            # item is a PseudoTechniqueConfig instance
+            technique_type = item.technique.type
+            cols = getattr(item.technique, "columns", [])
+            return technique_type, cols
+    
+class DecryptConfig(Config):
+    type: Literal["decrypt"] = "decrypt"
+    columns: List[str] = PydanticField(default=["example_column"], description="Columns to decrypt")
+    key_name: str = PydanticField(default="my_key", description="Key identifier used for decryption")
+    
+class DepseudoTechniqueConfig(Config):
+    technique: DecryptConfig = PydanticField(default={"type": "decrypt", **DecryptConfig().model_dump(exclude={"type"})})
+
+
+class DepseudonymizeStructuredConfig(Config):
+    used_function: List[DepseudoTechniqueConfig] = PydanticField(
+        default=[{"technique": {"type": "decrypt", **DecryptConfig().model_dump(exclude={"type"})}}],
+        description=("Decryption functions to be used on column"),
+    )
+
+    @field_validator("used_function", mode="before")
+    def _normalize_depseudo_used_function(cls, v):
+        normalized = []
+        for item in v:
+            if isinstance(item, dict):
+                normalized.append(DepseudoTechniqueConfig.model_validate(item))
+            else:
+                normalized.append(item)
+        return normalized
+
+    @model_validator(mode="after")
+    def ensure_unique_columns(self):
+        # For depseudonymize, we don't have per-column uniqueness constraints,
+        # but keep a no-op validator to preserve API parity.
+        return self
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/unstructured_config.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/unstructured_config.py
new file mode 100644
index 0000000..abea0b0
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/config_models/unstructured_config.py
@@ -0,0 +1,115 @@
+from typing import List, Literal, Optional, Union
+
+from dagster import Config
+from pydantic import Field as PydanticField, model_validator, field_validator
+from .languages import LanguageEnum
+from .pii_entities import PIIEntityEnum
+
+
+class HashConfig(Config):
+    type: Literal["hash"] = "hash"
+    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to hash")
+    algorithm: str = PydanticField(default="sha256", description="Hashing algorithm")
+
+class EncryptConfig(Config):
+    type: Literal["encrypt"] = "encrypt"
+    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to encrypt")
+    key_name: str = PydanticField(default="my_key", description="Key identifier used for encryption")
+
+
+class RedactConfig(Config):
+    type: Literal["redact"] = "redact"
+    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to redact")
+
+class ReplaceConfig(Config):
+    type: Literal["replace"] = "replace"
+    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to replace")
+    new_value: str = PydanticField(default="REPLACED", description="Replacement value")
+
+class RetainConfig(Config):
+    type: Literal["retain"] = "retain"
+    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to retain")
+
+class PseudoTechniqueConfig(Config):
+    technique: Union[HashConfig, EncryptConfig, RedactConfig, ReplaceConfig, RetainConfig] = PydanticField(
+        default={"hash": HashConfig().model_dump(exclude={"type"})},
+        discriminator="type"
+    )
+
+class AnonymisePseudonymizeUnstructuredConfig(Config):
+    language: LanguageEnum = PydanticField(
+        default=LanguageEnum.en,
+        description="Language code (must be one of: hr, da, nl, en, fi, fr, de, el, it, lt, pl, pt, ro, sl, es, sv)"
+
+    )
+    used_function: List[PseudoTechniqueConfig] = PydanticField(
+        default=[{"technique": {"hash": HashConfig().model_dump(exclude={"type"})}}],
+        description=("List of functions to be used on PIIs"),
+    )
+
+    @field_validator("used_function", mode="before")
+    def _normalize_used_function(cls, v):
+        normalized = []
+        for item in v:
+            if isinstance(item, dict):
+                normalized.append(PseudoTechniqueConfig.model_validate(item))
+            else:
+                normalized.append(item)
+        return normalized
+
+    @model_validator(mode="after")
+    def ensure_unique_pii(self):
+        pii_to_techniques = self._collect_pii_to_techniques()
+        duplicates = {
+            pii: techs for pii, techs in pii_to_techniques.items() if len(techs) > 1
+        }
+
+        if duplicates:
+            formatted = "; ".join(
+                f"{pii} -> {', '.join(techs)}" for pii, techs in duplicates.items()
+            )
+            raise ValueError(f"Duplicate PII(s) across techniques not allowed:\n{formatted}")
+
+        return self
+
+    def _collect_pii_to_techniques(self):
+        """Extract PII-to-techniques mapping from used_function list."""
+        pii_to_techniques = {}
+        for f in self.used_function:
+            technique_type, piis = self._extract_technique_and_pii(f)
+            for pii in piis:
+                pii_to_techniques.setdefault(pii, []).append(technique_type)
+        return pii_to_techniques
+
+    def _extract_technique_and_pii(self, item):
+        """Extract technique type and PII list from a PseudoTechniqueConfig item (dict or model instance)."""
+        if isinstance(item, dict):
+            tech = item.get("technique") or {}
+            if isinstance(tech, dict):
+                if "type" in tech:
+                    return tech.get("type"), tech.get("pii") or tech.get("columns") or []
+                elif len(tech) == 1:
+                    # variant-key mapping: {'hash': {...}}
+                    technique_type, inner = next(iter(tech.items()))
+                    return technique_type, inner.get("pii") or inner.get("columns") or []
+            return None, []
+        else:
+            # item is a PseudoTechniqueConfig instance
+            technique_type = item.technique.type
+            piis = getattr(item.technique, "pii", []) or getattr(item.technique, "columns", [])
+            return technique_type, piis
+    
+class DecryptConfig(Config):
+    type: Literal["decrypt"] = "decrypt"
+    key_name: str = PydanticField(default="my_key", description="Key identifier used for decryption")
+    
+class DepseudoTechniqueConfig(Config):
+    technique: DecryptConfig = PydanticField(
+        default={"type": "decrypt", **DecryptConfig().model_dump(exclude={"type"})},
+    )
+
+class DepseudonymizeUnstructuredConfig(Config):
+    used_function: List[DepseudoTechniqueConfig] = PydanticField(
+        default=[{"technique": {"type": "decrypt", **DecryptConfig().model_dump(exclude={"type"})}}],
+        description=("Decryption function"),
+    )
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/jobs.py b/src/template_code_location/field_level_pseudo_anonymisation/jobs.py
new file mode 100644
index 0000000..56baf11
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/jobs.py
@@ -0,0 +1,126 @@
+from dagster import job
+from util_services.util_ops import (
+    preview_dataframe,
+    read_structured_to_df,
+    write_df_to_local,
+    write_string_to_txt,
+    read_txt_to_string,
+    preview_txt,
+    read_structured_from_s3,
+    write_df_to_s3,
+    read_txt_from_s3,
+    write_text_to_s3,
+)
+from .ops import (
+    anonymize_pseudonymize_structured,
+    depseudonymize_structured,
+)
+from .unstructured_ops import (
+    anonymize_pseudonymize_unstructured,
+    depseudonymize_unstructured,
+)
+
+@job(tags={
+    "business_operation": "ANONYMISATION_PSEUDONYMISATION"
+})
+def anonymize_pseudonymize_structured_job():
+    df = read_structured_to_df()
+    preview_dataframe(df)
+    df_anon, metrics = anonymize_pseudonymize_structured(df)
+    preview_dataframe(df_anon)
+    write_df_to_local(df_anon)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION_PSEUDONYMISATION",
+    "resource_type": "RD_DATA"
+})
+def anonymize_pseudonymize_structured_job_s3():
+    df = read_structured_from_s3()
+    preview_dataframe(df)
+    df_anon, metrics = anonymize_pseudonymize_structured(df)
+    preview_dataframe(df_anon)
+    write_df_to_s3(df_anon)
+
+
+@job(tags={
+    "business_operation": "DEPSEUDONYMISATION"
+})
+def depseudonymize_structured_job():
+    df = read_structured_to_df()
+    preview_dataframe(df)
+    df_anon, metrics = depseudonymize_structured(df)
+    preview_dataframe(df_anon)
+    write_df_to_local(df_anon)
+
+
+@job(tags={
+    "business_operation": "DEPSEUDONYMISATION",
+    "resource_type": "RD_DATA"
+})
+def depseudonymize_structured_job_s3():
+    df = read_structured_from_s3()
+    preview_dataframe(df)
+    df_anon, metrics = depseudonymize_structured(df)
+    preview_dataframe(df_anon)
+    write_df_to_s3(df_anon)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION_PSEUDONYMISATION"
+})
+def anonymize_pseudonymize_depseudonymize_structured_job():
+    df = read_structured_to_df()
+    preview_dataframe(df)
+    df_pseduo, metrics = anonymize_pseudonymize_structured(df)
+    preview_dataframe(df_pseduo)
+    df_depseduo, metrics = depseudonymize_structured(df_pseduo)
+    preview_dataframe(df_depseduo)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION_PSEUDONYMISATION"
+})
+def anonymize_pseudonymize_unstructured_job():
+    text = read_txt_to_string()
+    preview_txt(text)
+    text_anon, metrics = anonymize_pseudonymize_unstructured(text)
+    preview_txt(text_anon)
+    preview_txt(metrics)
+    write_string_to_txt(text_anon)
+
+
+@job(tags={
+    "business_operation": "ANONYMISATION_PSEUDONYMISATION",
+    "resource_type": "RD_DATA"
+})
+def anonymize_pseudonymize_unstructured_job_s3():
+    text = read_txt_from_s3()
+    preview_txt(text)
+    text_anon, metrics = anonymize_pseudonymize_unstructured(text)
+    preview_txt(text_anon)
+    preview_txt(metrics)
+    write_text_to_s3(text_anon)
+
+
+@job(tags={
+    "business_operation": "DEPSEUDONYMISATION"
+})
+def depseudonymize_unstructured_job():
+    text = read_txt_to_string()
+    preview_txt(text)
+    text_anon, metrics = depseudonymize_unstructured(text)
+    preview_txt(text_anon)
+    write_string_to_txt(text_anon)
+
+
+@job(tags={
+    "business_operation": "DEPSEUDONYMISATION",
+    "resource_type": "RD_DATA"
+})
+def depseudonymize_unstructured_job_s3():
+    text = read_txt_from_s3()
+    preview_txt(text)
+    text_anon, metrics = depseudonymize_unstructured(text)
+    preview_txt(text_anon)
+    write_text_to_s3(text_anon)
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/ops.py b/src/template_code_location/field_level_pseudo_anonymisation/ops.py
new file mode 100644
index 0000000..a485ff9
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/ops.py
@@ -0,0 +1,77 @@
+import pandas as pd
+import numpy as np
+from dagster import Out, Output, op
+from cryptography.fernet import InvalidToken
+from template_code_location.field_level_pseudo_anonymisation.config_models import (
+    AnonymisePseudonymizeStructuredConfig,
+    DepseudonymizeStructuredConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.techniques import (
+    anonymisation_pseudonymisation_techniques as anon_pseudo_funcs,
+)
+import template_code_location.field_level_pseudo_anonymisation.techniques.depseudonymisation_techniques as depseudo_funcs
+from .utils import create_get_encryption_key
+
+
+def _apply_column_wise_function(config, df, funcs):
+    for used_function in config.used_function:
+        func_name = used_function.technique.type
+        columns = used_function.technique.columns
+        func = getattr(funcs, func_name)
+        params = used_function.technique.model_dump()
+        del params["type"]
+        del params["columns"]
+
+        if func_name in ["encrypt", "decrypt"]:
+            key_name = used_function.technique.key_name
+            del params["key_name"]
+            params["key"] = create_get_encryption_key(func_name, key_name)
+
+        missing = [col for col in columns if col not in df.columns]
+        if missing:
+            raise ValueError(
+                f"The following columns required by technique '{func_name}' "
+                f"are not present in the DataFrame: {', '.join(missing)}"
+            )
+
+        # Skip processing if DataFrame is empty
+        if len(df) == 0:
+            continue
+
+        for column in columns:
+            try:
+                vectorized_func = np.vectorize(lambda x: func(x, **params))
+                df[column] = vectorized_func(df[column].to_numpy())
+            except InvalidToken:
+                raise ValueError(
+                    f"Invalid Fernet token while decrypting column '{column}' "
+                    f"using key '{key_name}'. The data may not be encrypted "
+                    f"or the key may be incorrect. "
+                )
+    return df
+
+
+@op(out={"data": Out(), "metrics": Out()})
+def anonymize_pseudonymize_structured(
+    context, config: AnonymisePseudonymizeStructuredConfig, df: pd.DataFrame
+):
+
+    df = _apply_column_wise_function(config, df, anon_pseudo_funcs)
+    yield Output(
+        value=df,
+        metadata={},
+        output_name="data",
+    )
+    yield Output(value={}, output_name="metrics")
+
+
+@op(out={"data": Out(), "metrics": Out()})
+def depseudonymize_structured(context, config: DepseudonymizeStructuredConfig, df: pd.DataFrame):
+
+    df = _apply_column_wise_function(config, df, depseudo_funcs)
+    yield Output(
+        value=df,
+        metadata={},
+        output_name="data",
+    )
+    yield Output(value={}, output_name="metrics")
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/techniques/__init__.py b/src/template_code_location/field_level_pseudo_anonymisation/techniques/__init__.py
new file mode 100644
index 0000000..128c371
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/techniques/__init__.py
@@ -0,0 +1,3 @@
+from .anonymisation_pseudonymisation_techniques import hash, redact, replace, encrypt  # noqa: F401
+
+from .depseudonymisation_techniques import decrypt  # noqa: F401
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/techniques/anonymisation_pseudonymisation_techniques.py b/src/template_code_location/field_level_pseudo_anonymisation/techniques/anonymisation_pseudonymisation_techniques.py
new file mode 100644
index 0000000..ce15613
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/techniques/anonymisation_pseudonymisation_techniques.py
@@ -0,0 +1,42 @@
+import hashlib
+from cryptography.fernet import Fernet
+
+
+def hash(value: str, algorithm: str = "sha256") -> str:
+    """
+    Hash the value using the specified algorithm (default: SHA-256).
+    """
+    value = str(value)
+    hash_func = hashlib.new(algorithm)
+    hash_func.update(value.encode("utf-8"))
+    return hash_func.hexdigest()
+
+
+def redact(value: str) -> str:
+    """
+    Redact the column and return an empty string
+    """
+    return ""
+
+
+def replace(value: str, new_value) -> str:
+    """
+    Replace the value column with the provided value
+    """
+    return new_value
+
+
+def encrypt(value: str, key: bytes) -> str:
+    """
+    Encrypt the value using the provided Fernet key.
+    """
+    value = str(value)
+    f = Fernet(key)
+    return f.encrypt(value.encode()).decode()
+
+
+def retain(value: str) -> str:
+    """
+    Retain the original value without any changes.
+    """
+    return value
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/techniques/depseudonymisation_techniques.py b/src/template_code_location/field_level_pseudo_anonymisation/techniques/depseudonymisation_techniques.py
new file mode 100644
index 0000000..4e0937c
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/techniques/depseudonymisation_techniques.py
@@ -0,0 +1,9 @@
+from cryptography.fernet import Fernet
+
+
+def decrypt(value: str, key: bytes) -> str:
+    """
+    Decrypt a string using the provided Fernet key.
+    """
+    f = Fernet(key)
+    return f.decrypt(value.encode()).decode()
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/unstructured_ops.py b/src/template_code_location/field_level_pseudo_anonymisation/unstructured_ops.py
new file mode 100644
index 0000000..f8f0ffe
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/unstructured_ops.py
@@ -0,0 +1,428 @@
+import importlib
+import importlib.abc
+import importlib.machinery
+import re
+import sys
+import types
+
+
+# ---------------------------------------------------------------------------
+# Stub out the `transformers` and `spacy_transformers` packages before any
+# other import triggers spaCy's entry-point scan or scrubadub_spacy's runtime
+# import of spacy_transformers.pipeline_component.
+# ---------------------------------------------------------------------------
+_STUB_PACKAGES = ("transformers", "spacy_transformers")
+
+
+class _StubModule(types.ModuleType):
+    """Module that returns a dummy class for any attribute access."""
+
+    def __getattr__(self, name: str):
+        return type(name, (), {})
+
+
+class _StubFinder(importlib.abc.MetaPathFinder):
+    """Intercept any import under the stubbed packages and return a stub module."""
+
+    def find_spec(self, fullname, path=None, target=None):  # noqa: ANN001
+        for pkg in _STUB_PACKAGES:
+            if fullname == pkg or fullname.startswith(pkg + "."):
+                return importlib.machinery.ModuleSpec(fullname, _StubLoader())
+        return None
+
+
+class _StubLoader(importlib.abc.Loader):
+    def create_module(self, spec):  # noqa: ANN001
+        mod = _StubModule(spec.name)
+        mod.__path__ = []  # mark as package
+        mod.__spec__ = spec
+        return mod
+
+    def exec_module(self, module):  # noqa: ANN001
+        pass
+
+
+# Install the finder once, before scrubadub / spacy are imported.
+if not any(isinstance(f, _StubFinder) for f in sys.meta_path):
+    sys.meta_path.insert(0, _StubFinder())
+# ---------------------------------------------------------------------------
+
+
+import scrubadub  # noqa: E402
+import scrubadub_spacy  # noqa: E402
+from cryptography.fernet import InvalidToken  # noqa: E402
+from dagster import Out, Output, get_dagster_logger, op  # noqa: E402
+from scrubadub.detectors import RegexDetector  # noqa: E402
+from scrubadub.filth import CredentialFilth, NameFilth  # noqa: E402
+
+from template_code_location.field_level_pseudo_anonymisation.techniques import (
+    anonymisation_pseudonymisation_techniques as anon_pseudo_funcs,
+)
+from template_code_location.field_level_pseudo_anonymisation.techniques import (
+    depseudonymisation_techniques as depseudo_funcs,
+)
+
+from .config_models import (
+    PII_MAPPING,
+    AnonymisePseudonymizeUnstructuredConfig,
+    DepseudonymizeUnstructuredConfig,
+    PIIEntityEnum,
+    PseudoTechniqueConfig,
+    SupportedLanguages,
+)
+from .utils import create_get_encryption_key
+
+
+def _initialize_scrubber(language: str) -> scrubadub.Scrubber:
+    class SIMPLCredentialDetector(RegexDetector):
+        """
+        Remove username/password combinations from dirty ``text``.
+        """
+
+        filth_cls = CredentialFilth
+        name = "credential"
+        autoload = True
+
+        regex = re.compile(
+            r"""
+            (?:username|login|u:)\s*(?::\s*)?
+            (?P<username>[\w.\-@+]+)
+            [\s\S]{0,500}?
+            (?:password|pw|p:)\s*(?::\s*)?
+            (?P<password>[^\s]+)
+        """,
+            re.MULTILINE | re.VERBOSE | re.IGNORECASE,
+        )
+
+    locale = SupportedLanguages.get_locale(language)
+    scrubber = scrubadub.Scrubber(locale=locale)
+
+    model_name = SupportedLanguages.get_language_model(language)
+    spacy_detector = scrubadub_spacy.detectors.SpacyEntityDetector(model=model_name)
+    spacy_detector.named_entities = {
+        "PERSON",
+        "PER",
+        "ORG",
+        "persName",
+        "PRS",
+    }  # Need to set it after the constructor because scrubadub_spacy uses upper on all entries
+    spacy_detector.filth_cls_map["persName"] = NameFilth  # Required because PL uses persName
+    spacy_detector.filth_cls_map["PRS"] = NameFilth  # Required for swedish that uses PRS
+    scrubber.add_detector(spacy_detector)
+    if language in ["en", "de"]:
+        scrubber.add_detector(
+            scrubadub.detectors.DateOfBirthDetector
+        )  # add optional data of birth detector
+    scrubber.remove_detector(
+        scrubadub.detectors.CredentialDetector
+    )  # remove the not so great credentials detector and replace with custom SIMPL one
+    scrubber.add_detector(SIMPLCredentialDetector())
+    return scrubber
+
+
+def _map_filth_to_pii_enum(filth) -> PIIEntityEnum | None:
+    cls_name = filth.__class__.__name__
+    for pii_enum, filth_name in PII_MAPPING.items():
+        if filth_name == cls_name:
+            return pii_enum
+    return None
+
+
+def _get_metrics(metrics_dict: dict, language: str) -> str:
+    # Format metrics as Markdown table
+    metrics_report = f"""
+## PII Anonymization Report
+
+### Summary
+- **Total PII Detected**: {metrics_dict['total_pii_detected']}
+- **Original Length**: {metrics_dict['text_length_original']} chars
+- **Anonymized Length**: {metrics_dict['text_length_anonymised']} chars
+- **Language**: {language}
+
+### PII by Type
+| Entity Type | Count |
+|-------------|-------|
+"""
+    for pii_type, count in metrics_dict["pii_by_type"].items():
+        metrics_report += f"| {pii_type} | {count} |\n"
+
+    metrics_report += "\n### Techniques Applied\n"
+    for pii, technique in metrics_dict["techniques_applied"].items():
+        metrics_report += f"- **{pii}**: {technique}\n"
+
+    return metrics_report
+
+
+def _build_metrics_dict(
+    pii_counts: dict[str, int],
+    text: str,
+    anon_text: str,
+    technique_map: dict[PIIEntityEnum, PseudoTechniqueConfig],
+) -> dict:
+    metrics_dict = {
+        "total_pii_detected": sum(pii_counts.values()),
+        "pii_by_type": pii_counts,
+        "text_length_original": len(text),
+        "text_length_anonymised": len(anon_text),
+        "techniques_applied": {
+            pii.name: technique_map[pii].technique.type for pii in technique_map.keys()
+        },
+    }
+
+    return metrics_dict
+
+
+@op(out={"data": Out(), "metrics": Out()})
+def anonymize_pseudonymize_unstructured(
+    context, config: AnonymisePseudonymizeUnstructuredConfig, text: str
+):
+    logger = get_dagster_logger()
+
+    if text is None or not text.strip():
+        raise ValueError("Input text cannot be None or empty")
+
+    logger.debug(
+        f"Starting unstructured PII anonymization | lang={config.language.value} "
+        f"| input_chars={len(text)}"
+    )
+
+    # --- Filth detection ---
+    try:
+        scrubber = _initialize_scrubber(config.language.value)
+        filths = list(scrubber.iter_filth(text))
+        logger.info(f"Detected {len(filths)} potential PII entities before filtering.")
+    except Exception as e:
+        logger.error(f"Scrubber initialization/detection failed | lang={config.language.value}")
+        raise RuntimeError(f"PII detection failed for language '{config.language.value}'") from e
+
+    # --- Build technique routing map ---
+    technique_map = _build_technique_map(config)
+    logger.debug(
+        "Technique map constructed: "
+        + ", ".join(f"{pii.name}->{cfg.technique.type}" for pii, cfg in technique_map.items())
+    )
+
+    replacements = []
+    key_cache = {}
+    pii_counts = {}
+
+    # --- Process filths ---
+    for idx, filth in enumerate(filths, start=1):
+        pii_enum = _map_filth_to_pii_enum(filth)
+
+        if pii_enum is None:
+            logger.debug(f"[{idx}] Skipping unknown filth class={filth.__class__.__name__}")
+            continue
+
+        start_idx, end_idx = _extract_span(filth, logger, idx)
+        if start_idx is None:
+            continue
+
+        original_value = text[start_idx:end_idx]
+        technique_cfg = technique_map.get(pii_enum)
+
+        # No technique configured
+        if technique_cfg is None:
+            _handle_missing_technique(
+                pii_enum,
+                start_idx,
+                end_idx,
+                text,
+                pii_counts,
+                replacements,
+                logger,
+                idx,
+            )
+            continue
+
+        # Apply configured technique
+        t = technique_cfg.technique
+        params = _prepare_params(t, key_cache, idx, logger)
+        replacement = _apply_technique(original_value, t.type, params, pii_enum, idx, logger)
+
+        replacements.append((start_idx, end_idx, replacement))
+        pii_counts[pii_enum.name] = pii_counts.get(pii_enum.name, 0) + 1
+
+    # --- Apply replacements ---
+    anon_text = _apply_replacements(text, replacements, logger)
+
+    logger.info(f"Anonymisation completed, total PII counts: {pii_counts}")
+
+    metrics_report = _get_metrics(
+        _build_metrics_dict(pii_counts, text, anon_text, technique_map),
+        config.language.value,
+    )
+
+    yield Output(value=anon_text, output_name="data")
+    yield Output(value=metrics_report, output_name="metrics")
+
+
+@op(out={"data": Out(), "metrics": Out()})
+def depseudonymize_unstructured(context, config: DepseudonymizeUnstructuredConfig, input_text: str):
+
+    input_restored, metrics = _apply_depseudonimisation_function(config, input_text, depseudo_funcs)
+    yield Output(
+        value=input_restored,
+        metadata={},
+        output_name="data",
+    )
+    yield Output(value=metrics, output_name="metrics")
+
+
+def _apply_depseudonimisation_function(config, input_text: str, funcs_module):
+    """
+    Searches and depseudonymizes text segments formatted as:
+        {technique:pseudonymized_value}
+    """
+
+    total_depseudo_count = 0
+    depseudonimized_text = input_text  # Initialize with input text
+
+    # Loop through each depseudonymisation technique defined in the config
+    for used_function in config.used_function:
+        func_name = used_function.technique.type
+        func = getattr(funcs_module, func_name)
+        pseudo_anon_func = ""
+
+        # Prepare parameters
+        params = used_function.technique.model_dump()
+        del params["type"]
+
+        if func_name == "decrypt":
+            key_name = used_function.technique.key_name
+            del params["key_name"]
+            pseudo_anon_func = "encrypt"
+            params["key"] = create_get_encryption_key(func_name, key_name)
+
+        # Regex pattern for this technique, e.g. {encrypt:...}
+        pattern = rf"\{{{pseudo_anon_func}:([^}}]+)\}}"
+
+        def replace_match(match):
+            nonlocal total_depseudo_count
+            pseudovalue = match.group(1)
+            total_depseudo_count += 1
+            try:
+                return func(pseudovalue, **params)
+            except InvalidToken:
+                raise ValueError(
+                    f"Invalid Fernet token while decrypting value using key '{key_name}'. "
+                    f"The data may not be encrypted or the key may be incorrect."
+                )
+            except Exception as e:
+                raise RuntimeError(f"Error during depseudonymisation with '{func_name}': {e}")
+
+        # Apply replacements for this technique
+        depseudonimized_text = re.sub(pattern, replace_match, depseudonimized_text)
+
+    yield depseudonimized_text
+    yield {"total_depseudo_count": total_depseudo_count}
+
+
+def _build_technique_map(config):
+    technique_map = {}
+    for func_cfg in config.used_function:
+        for pii in func_cfg.technique.pii:
+            technique_map[pii] = func_cfg
+    return technique_map
+
+
+def _extract_span(filth, logger, idx):
+    start_idx = getattr(filth, "beg", getattr(filth, "start", None))
+    end_idx = getattr(filth, "end", None)
+    if start_idx is None or end_idx is None:
+        logger.debug(f"[{idx}] Filth missing span attributes; skipping.")
+        return None, None
+    return start_idx, end_idx
+
+
+def _handle_missing_technique(
+    pii_enum, start_idx, end_idx, text, pii_counts, replacements, logger, idx
+):
+    original_value = text[start_idx:end_idx]
+    logger.debug(
+        f"[{idx}] PII={pii_enum.name} span=({start_idx},{end_idx}) value={original_value} "
+        f"- No technique configured, using placeholder"
+    )
+    placeholder = f"{{{{{pii_enum.name}}}}}"
+    replacements.append((start_idx, end_idx, placeholder))
+    pii_counts[pii_enum.name] = pii_counts.get(pii_enum.name, 0) + 1
+
+
+def _prepare_params(t, key_cache, idx, logger):
+    params = t.model_dump()
+    del params["type"]
+    del params["pii"]
+
+    if t.type == "encrypt":
+        try:
+            if t.key_name not in key_cache:
+                logger.debug(
+                    f"[{idx}] Retrieving/generating Vault key name={t.key_name} for encryption"
+                )
+                key_cache[t.key_name] = create_get_encryption_key("encrypt", t.key_name)
+            params["key"] = key_cache[t.key_name]
+            del params["key_name"]
+            logger.debug(f"[{idx}] Encryption key prepared")
+        except Exception as e:
+            raise RuntimeError(
+                f"Encryption key retrieval failed for key '{t.key_name}': {type(e).__name__}"
+            ) from e
+
+    return params
+
+
+def _apply_technique(original_value, t_type, params, pii_enum, idx, logger):
+    try:
+        func = getattr(anon_pseudo_funcs, t_type)
+        replacement = func(original_value, **params)
+
+        if t_type == "encrypt":
+            replacement = f"{{encrypt:{replacement}}}"
+
+        logger.debug(f"[{idx}] {t_type.capitalize()} complete")
+        return replacement
+
+    except AttributeError:
+        logger.warning(f"[{idx}] Technique '{t_type}' not recognized; inserting placeholder.")
+        return f"{{UNIMPL_{t_type}_{pii_enum.name}}}"
+
+    except Exception as e:
+        raise RuntimeError(
+            f"Technique '{t_type}' failed for PII type '{pii_enum.name}': {type(e).__name__}"
+        ) from e
+
+
+def _apply_replacements(text, replacements, logger):
+    if not replacements:
+        logger.info("No PII detected; returning original text.")
+        return text
+
+    logger.debug(f"Applying {len(replacements)} replacements to text body.")
+    replacements.sort(key=lambda r: r[0])
+
+    # Detect overlaps
+    for i in range(len(replacements) - 1):
+        if replacements[i][1] > replacements[i + 1][0]:
+            logger.warning(
+                f"Overlapping PII detected at positions "
+                f"({replacements[i][0]},{replacements[i][1]}) "
+                f"and ({replacements[i+1][0]},{replacements[i+1][1]}). "
+                f"Using first match."
+            )
+            replacements[i + 1] = (
+                replacements[i][1],
+                replacements[i + 1][1],
+                replacements[i + 1][2],
+            )
+
+    result_parts = []
+    last = 0
+    for start, end, repl in replacements:
+        if start < last:
+            continue
+        result_parts.append(text[last:start])
+        result_parts.append(repl)
+        last = end
+
+    result_parts.append(text[last:])
+    return "".join(result_parts)
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/utils.py b/src/template_code_location/field_level_pseudo_anonymisation/utils.py
new file mode 100644
index 0000000..25ebd75
--- /dev/null
+++ b/src/template_code_location/field_level_pseudo_anonymisation/utils.py
@@ -0,0 +1,32 @@
+import os
+import hvac
+from hvac.exceptions import InvalidPath
+from cryptography.fernet import Fernet
+
+
+def create_get_encryption_key(func_name: str, key_name: str) -> bytes:
+    client = hvac.Client(url=os.getenv("OPENBAO_URL"), token=os.getenv("OPENBAO_TOKEN"))
+
+    secret_folder = os.getenv("ENCRYPTION_KEYS_PATH")
+    secret_path = f"{secret_folder}/{key_name}" if secret_folder else key_name
+    mount_point = os.getenv("ENCRYPTION_KEYS_MOUNT_POINT")
+
+    try:
+        secret_response = client.secrets.kv.v2.read_secret_version(
+            path=secret_path, mount_point=mount_point
+        )
+        key_value = secret_response["data"]["data"]["value"]
+
+    except InvalidPath:
+        if func_name == "encrypt":
+            new_key = Fernet.generate_key().decode()
+            client.secrets.kv.v2.create_or_update_secret(
+                path=secret_path, mount_point=mount_point, secret={"value": new_key}
+            )
+            key_value = new_key
+        else:
+            raise ValueError(f"Fernet key '{key_name}' not found in Vault for decrypt.")
+    except Exception as e:
+        raise ValueError(f"Error while reading Fernet key '{key_name}': {e}")
+
+    return key_value.encode()
diff --git a/src/template_code_location/jobs/__init__.py b/src/template_code_location/jobs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/template-code-location/jobs/jobs.py b/src/template_code_location/jobs/jobs.py
similarity index 100%
rename from src/template-code-location/jobs/jobs.py
rename to src/template_code_location/jobs/jobs.py
diff --git a/src/template_code_location/ops/__init__.py b/src/template_code_location/ops/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/template-code-location/ops/ops.py b/src/template_code_location/ops/ops.py
similarity index 100%
rename from src/template-code-location/ops/ops.py
rename to src/template_code_location/ops/ops.py
diff --git a/src/template_code_location/repository.py b/src/template_code_location/repository.py
new file mode 100644
index 0000000..cf97606
--- /dev/null
+++ b/src/template_code_location/repository.py
@@ -0,0 +1,65 @@
+from dagster import Definitions
+from util_services.resources import s3_resource
+from util_services.sensors import (
+    notify_success,
+    notify_failure,
+    notify_canceled
+)
+from util_services.custom_json_logger import simpl_json_logger
+
+# Data processing jobs
+from template_code_location.data_processing.jobs import (
+    remove_duplicates_job_s3,
+    fill_missing_values_job_s3,
+    standardize_categorical_values_job_s3,
+    correct_typos_job_s3,
+    normalize_numeric_min_max_job_s3,
+    normalize_datetime_job_s3,
+    normalize_coordinates_job_s3,
+    add_global_aggregations_job_s3,
+    filter_dataset_job_s3,
+)
+
+# Dataframe-level anonymisation jobs
+from template_code_location.dataframe_level_anonymisation.jobs import (
+    k_anonymity_job_s3,
+    l_diversity_job_s3,
+    t_closeness_job_s3,
+    read_write_semistructured_job_s3,
+)
+
+# Field-level pseudo-anonymisation jobs
+from template_code_location.field_level_pseudo_anonymisation.jobs import (
+    anonymize_pseudonymize_structured_job_s3,
+    depseudonymize_structured_job_s3,
+    anonymize_pseudonymize_unstructured_job_s3,
+    depseudonymize_unstructured_job_s3,
+)
+
+defs = Definitions(
+    jobs=[
+        # Data processing
+        remove_duplicates_job_s3,
+        fill_missing_values_job_s3,
+        standardize_categorical_values_job_s3,
+        correct_typos_job_s3,
+        normalize_numeric_min_max_job_s3,
+        normalize_datetime_job_s3,
+        normalize_coordinates_job_s3,
+        add_global_aggregations_job_s3,
+        filter_dataset_job_s3,
+        # Dataframe-level anonymisation
+        k_anonymity_job_s3,
+        l_diversity_job_s3,
+        t_closeness_job_s3,
+        read_write_semistructured_job_s3,
+        # Field-level pseudo-anonymisation
+        anonymize_pseudonymize_structured_job_s3,
+        depseudonymize_structured_job_s3,
+        anonymize_pseudonymize_unstructured_job_s3,
+        depseudonymize_unstructured_job_s3,
+    ],
+    sensors=[notify_success, notify_failure, notify_canceled],
+    resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},
+    loggers={"simpl": simpl_json_logger},
+)

From d14b2dfac46fd193705231eb38f618e5933b7add Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Fri, 24 Apr 2026 18:42:07 +0200
Subject: [PATCH 02/15] feat(SIMPL-24642): migrate tests from 3 source repos
 with updated imports

---
 tests/__init__.py                             |    1 +
 tests/data_processing/__init__.py             |    1 +
 tests/data_processing/conftest.py             |   53 +
 tests/data_processing/conftest_utils.py       |    7 +
 tests/data_processing/test_config_models.py   |  202 +++
 tests/data_processing/test_integration.py     |  185 +++
 tests/data_processing/test_jobs.py            |   56 +
 tests/data_processing/test_ops.py             |  700 +++++++++++
 .../dataframe_level_anonymisation/__init__.py |    1 +
 .../config_models/__init__.py                 |    1 +
 .../config_models/test_base_config.py         |   54 +
 .../config_models/test_hierarchies.py         |   48 +
 .../config_models/test_k_anonymity_config.py  |   41 +
 .../config_models/test_l_diversity_config.py  |   44 +
 .../config_models/test_t_closeness_config.py  |   56 +
 .../test_jobs.py                              |   44 +
 .../dataframe_level_anonymisation/test_ops.py |  230 ++++
 .../test_utils.py                             |   70 ++
 .../__init__.py                               |    1 +
 .../conftest.py                               |  444 +++++++
 .../test_config_models_coverage.py            |  633 ++++++++++
 .../test_decrypt_structured.py                | 1090 ++++++++++++++++
 .../test_decrypt_unstructured.py              |  288 +++++
 .../test_encrypt_structured.py                | 1119 +++++++++++++++++
 .../test_encrypt_unstructured.py              |  853 +++++++++++++
 .../test_jobs.py                              |   58 +
 26 files changed, 6280 insertions(+)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/data_processing/__init__.py
 create mode 100644 tests/data_processing/conftest.py
 create mode 100644 tests/data_processing/conftest_utils.py
 create mode 100644 tests/data_processing/test_config_models.py
 create mode 100644 tests/data_processing/test_integration.py
 create mode 100644 tests/data_processing/test_jobs.py
 create mode 100644 tests/data_processing/test_ops.py
 create mode 100644 tests/dataframe_level_anonymisation/__init__.py
 create mode 100644 tests/dataframe_level_anonymisation/config_models/__init__.py
 create mode 100644 tests/dataframe_level_anonymisation/config_models/test_base_config.py
 create mode 100644 tests/dataframe_level_anonymisation/config_models/test_hierarchies.py
 create mode 100644 tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py
 create mode 100644 tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py
 create mode 100644 tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py
 create mode 100644 tests/dataframe_level_anonymisation/test_jobs.py
 create mode 100644 tests/dataframe_level_anonymisation/test_ops.py
 create mode 100644 tests/dataframe_level_anonymisation/test_utils.py
 create mode 100644 tests/field_level_pseudo_anonymisation/__init__.py
 create mode 100644 tests/field_level_pseudo_anonymisation/conftest.py
 create mode 100644 tests/field_level_pseudo_anonymisation/test_config_models_coverage.py
 create mode 100644 tests/field_level_pseudo_anonymisation/test_decrypt_structured.py
 create mode 100644 tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py
 create mode 100644 tests/field_level_pseudo_anonymisation/test_encrypt_structured.py
 create mode 100644 tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py
 create mode 100644 tests/field_level_pseudo_anonymisation/test_jobs.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/data_processing/__init__.py b/tests/data_processing/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/data_processing/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/data_processing/conftest.py b/tests/data_processing/conftest.py
new file mode 100644
index 0000000..9eda2af
--- /dev/null
+++ b/tests/data_processing/conftest.py
@@ -0,0 +1,53 @@
+"""Pytest configuration and shared fixtures."""
+
+import pytest
+import pandas as pd
+from unittest.mock import MagicMock, patch
+import sys
+from dagster import build_op_context
+
+# Mock external dependencies that might not be available in test environment
+sys.modules['spellchecker'] = MagicMock()
+
+
+@pytest.fixture
+def mock_context():
+    """Create a mock Dagster context for testing operations."""
+    context = build_op_context()
+    return context
+
+
+@pytest.fixture
+def sample_dataframe():
+    """Create a sample DataFrame for testing."""
+    return pd.DataFrame({
+        'Name': ['John Doe', 'jane smith', 'John Doe', 'bob johnson', 'John Doe'],
+        'Age': [25, 30, 25, None, 25],
+        'City': ['New York', 'los angeles', 'New York', 'chicago', 'New York'],
+        'Status': ['Active', 'INACTIVE', 'Active', 'penDing', 'Active']
+    })
+
+
+@pytest.fixture
+def sample_dataframe_with_typos():
+    """Create a sample DataFrame with typos for spell checking."""
+    return pd.DataFrame({
+        'Name': ['jon doe', 'jane smith', 'bob jonson'],
+        'Description': ['developer', 'analst', 'enginer']
+    })
+
+
+@pytest.fixture
+def empty_dataframe():
+    """Create an empty DataFrame."""
+    return pd.DataFrame()
+
+
+@pytest.fixture
+def dataframe_with_missing_values():
+    """Create a DataFrame with various missing values."""
+    return pd.DataFrame({
+        'Column1': [1, None, 3, None, 5],
+        'Column2': ['a', 'b', None, 'd', None],
+        'Column3': [None, None, None, None, None]
+    })
diff --git a/tests/data_processing/conftest_utils.py b/tests/data_processing/conftest_utils.py
new file mode 100644
index 0000000..19d2f59
--- /dev/null
+++ b/tests/data_processing/conftest_utils.py
@@ -0,0 +1,7 @@
+"""Configuration utilities for testing."""
+
+import os
+import sys
+
+# Add src directory to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
diff --git a/tests/data_processing/test_config_models.py b/tests/data_processing/test_config_models.py
new file mode 100644
index 0000000..989054f
--- /dev/null
+++ b/tests/data_processing/test_config_models.py
@@ -0,0 +1,202 @@
+"""Unit tests for configuration models."""
+
+import pytest
+from pydantic import ValidationError
+from template_code_location.data_processing.config_models import (
+    FillMissingConfiguration,
+    ColumnsSelectConfiguration,
+    SpellCheckConfiguration,
+    AggregationConfiguration
+)
+
+
+class TestColumnsSelectConfiguration:
+    """Tests for ColumnsSelectConfiguration."""
+
+    def test_default_columns(self):
+        """Test default columns configuration."""
+        config = ColumnsSelectConfiguration()
+        assert config.columns == ['Name']
+
+    def test_custom_columns(self):
+        """Test custom columns configuration."""
+        config = ColumnsSelectConfiguration(columns=['Col1', 'Col2', 'Col3'])
+        assert config.columns == ['Col1', 'Col2', 'Col3']
+
+    def test_empty_columns_list(self):
+        """Test with empty columns list."""
+        config = ColumnsSelectConfiguration(columns=[])
+        assert config.columns == []
+
+    def test_single_column(self):
+        """Test with a single column."""
+        config = ColumnsSelectConfiguration(columns=['SingleCol'])
+        assert config.columns == ['SingleCol']
+
+    def test_columns_with_special_characters(self):
+        """Test columns with special characters."""
+        config = ColumnsSelectConfiguration(columns=['Col-1', 'Col_2', 'Col.3'])
+        assert config.columns == ['Col-1', 'Col_2', 'Col.3']
+
+    def test_duplicate_columns_are_removed(self):
+        """Verifica che i duplicati vengano rimossi mantenendo l'ordine (grazie a dict.fromkeys)."""
+        config = ColumnsSelectConfiguration(columns=['A', 'B', 'A', 'C', 'B'])
+        
+        assert config.columns == ['A', 'B', 'C']
+
+    def test_duplicate_default_behavior(self):
+        """Verifica che anche input estremi vengano gestiti correttamente."""
+        config = ColumnsSelectConfiguration(columns=['Name', 'Name', 'Name'])
+        assert config.columns == ['Name']
+
+
+class TestFillMissingConfiguration:
+    """Tests for FillMissingConfiguration."""
+
+    def test_default_fill_map(self):
+        """Test default fill map configuration."""
+        config = FillMissingConfiguration()
+        
+        assert config.fill_map == {'Age': 'UNKNOWN_AGE'}
+
+    def test_custom_fill_map(self):
+        """Test custom fill map configuration."""
+        fill_map = {'Age': '0', 'Name': 'UNKNOWN', 'City': 'N/A'}
+        config = FillMissingConfiguration(fill_map=fill_map)
+        
+        assert config.fill_map == fill_map
+
+    def test_empty_fill_map(self):
+        """Test with empty fill map."""
+        config = FillMissingConfiguration(fill_map={})
+        
+        assert config.fill_map == {}
+
+    def test_fill_map_with_numeric_values(self):
+        """Test fill map with numeric string values."""
+        fill_map = {'Age': '0', 'Score': '-1', 'Count': '999'}
+        config = FillMissingConfiguration(fill_map=fill_map)
+        
+        assert config.fill_map == fill_map
+
+    def test_fill_map_with_string_values(self):
+        """Test fill map with string values."""
+        fill_map = {'Name': 'Unknown', 'Email': 'no-email'}
+        config = FillMissingConfiguration(fill_map=fill_map)
+        
+        assert config.fill_map == fill_map
+
+    def test_fill_map_mixed_types(self):
+        """Test fill map with mixed value types (all strings)."""
+        fill_map = {'IntCol': '0', 'StrCol': 'Unknown', 'FloatCol': '0.0'}
+        config = FillMissingConfiguration(fill_map=fill_map)
+        
+        assert config.fill_map == fill_map
+
+
+class TestSpellCheckConfiguration:
+    """Tests for SpellCheckConfiguration."""
+
+    def test_default_spell_check_config(self):
+        """Test default spell check configuration."""
+        config = SpellCheckConfiguration()
+        
+        assert config.columns == ['Name']
+        assert config.language == 'en'
+
+    def test_custom_spell_check_config(self):
+        """Test custom spell check configuration."""
+        config = SpellCheckConfiguration(
+            columns=['Description', 'Notes'],
+            language='es'
+        )
+        
+        assert config.columns == ['Description', 'Notes']
+        assert config.language == 'es'
+
+    def test_spell_check_all_languages(self):
+        """Test spell check with all supported languages."""
+        supported_languages = ['en', 'es', 'it', 'fr', 'pt', 'de', 'nl']
+        
+        for lang in supported_languages:
+            config = SpellCheckConfiguration(language=lang)
+            assert config.language == lang
+
+    def test_spell_check_invalid_language(self):
+        """Test spell check with invalid language."""
+        with pytest.raises(ValidationError):
+            SpellCheckConfiguration(language='invalid')
+
+    def test_spell_check_multiple_columns(self):
+        """Test spell check with multiple columns."""
+        columns = ['Col1', 'Col2', 'Col3', 'Col4']
+        config = SpellCheckConfiguration(columns=columns)
+        
+        assert config.columns == columns
+
+    def test_spell_check_empty_columns(self):
+        """Test spell check with empty columns list."""
+        config = SpellCheckConfiguration(columns=[])
+        
+        assert config.columns == []
+        assert config.language == 'en'
+
+    def test_spell_check_inheritance(self):
+        """Test that SpellCheckConfiguration inherits from ColumnsSelectConfiguration."""
+        config = SpellCheckConfiguration()
+        
+        assert isinstance(config, ColumnsSelectConfiguration)
+        assert hasattr(config, 'columns')
+        assert hasattr(config, 'language')
+
+    @pytest.mark.parametrize("language", ['en', 'es', 'it', 'fr', 'pt', 'de', 'nl'])
+    def test_spell_check_languages_parametrized(self, language):
+        """Test spell check with parametrized languages."""
+        config = SpellCheckConfiguration(language=language)
+        assert config.language == language
+
+class TestAggregationConfiguration:
+    """Tests for AggregationConfiguration."""
+
+    def test_aggregation_default_config(self):
+        """Test default aggregation configuration."""
+        config = AggregationConfiguration()
+        
+        assert config.columns == ['Name']
+        assert config.operation == 'sum'
+
+    @pytest.mark.parametrize("op", ["sum", "mean", "min", "max", "count"])
+    def test_aggregation_valid_operations(self, op):
+        """Test all allowed aggregation operations."""
+        config = AggregationConfiguration(operation=op)
+        assert config.operation == op
+
+    def test_aggregation_invalid_operation(self):
+        """Test that an invalid operation raises a ValidationError."""
+        with pytest.raises(ValidationError) as excinfo:
+            AggregationConfiguration(operation="invalid_op")
+        
+        assert "Invalid aggregation operation 'invalid_op'" in str(excinfo.value)
+
+    def test_aggregation_custom_columns(self):
+        """Test aggregation with custom columns."""
+        config = AggregationConfiguration(columns=['Price', 'Quantity'], operation='mean')
+        
+        assert config.columns == ['Price', 'Quantity']
+        assert config.operation == 'mean'
+
+    def test_aggregation_inheritance(self):
+        """Test that AggregationConfiguration inherits from ColumnsSelectConfiguration."""
+        config = AggregationConfiguration()
+        
+        assert isinstance(config, ColumnsSelectConfiguration)
+        assert hasattr(config, 'columns')
+        assert hasattr(config, 'operation')
+
+    def test_aggregation_model_dump(self):
+        """Test that model_dump contains all expected fields (useful for the Dagster op)."""
+        config = AggregationConfiguration(columns=['Value'], operation='max')
+        dump = config.model_dump()
+        
+        assert dump['columns'] == ['Value']
+        assert dump['operation'] == 'max'
diff --git a/tests/data_processing/test_integration.py b/tests/data_processing/test_integration.py
new file mode 100644
index 0000000..c9d01eb
--- /dev/null
+++ b/tests/data_processing/test_integration.py
@@ -0,0 +1,185 @@
+"""Integration tests for data processing jobs."""
+
+import pytest
+import pandas as pd
+from unittest.mock import patch, MagicMock
+from template_code_location.data_processing.ops import (
+    remove_duplicates,
+    fill_missing_values,
+    standardize_categorical_values,
+    correct_typos
+)
+from template_code_location.data_processing.config_models import (
+    FillMissingConfiguration,
+    ColumnsSelectConfiguration,
+    SpellCheckConfiguration
+)
+
+
+class TestPipelineIntegration:
+    """Integration tests for data processing pipeline."""
+
+    def test_pipeline_remove_duplicates_then_standardize(self, mock_context):
+        """Test pipeline: remove duplicates then standardize."""
+        df = pd.DataFrame({
+            'Name': ['  JOHN DOE  ', 'jane smith', '  JOHN DOE  ', 'bob johnson'],
+            'City': ['NEW YORK', 'los angeles', 'NEW YORK', 'chicago']
+        })
+        
+        # Step 1: Remove duplicates
+        df_no_dupes = remove_duplicates(mock_context, df)
+        assert df_no_dupes.shape[0] == 3
+        
+        # Step 2: Standardize
+        config = ColumnsSelectConfiguration(columns=['Name', 'City'])
+        df_standardized = standardize_categorical_values(mock_context, config, df_no_dupes)
+        
+        assert df_standardized['Name'].iloc[0] == 'john doe'
+        assert df_standardized['City'].iloc[0] == 'new york'
+
+    def test_pipeline_fill_missing_then_standardize(self, mock_context):
+        """Test pipeline: fill missing values then standardize."""
+        df = pd.DataFrame({
+            'Category': ['  ACTIVE  ', None, '  PENDING  '],
+            'Value': ['1', '2', None]
+        })
+        
+        # Step 1: Fill missing values
+        fill_config = FillMissingConfiguration(fill_map={'Value': '0'})
+        df_filled = fill_missing_values(mock_context, fill_config, df)
+        
+        # Step 2: Standardize
+        std_config = ColumnsSelectConfiguration(columns=['Category'])
+        df_standardized = standardize_categorical_values(mock_context, std_config, df_filled)
+        
+        assert df_standardized['Category'].iloc[0] == 'active'
+        assert df_filled['Value'].iloc[2] == '0'
+
+    def test_pipeline_all_operations(self, mock_context):
+        """Test complete pipeline with all operations."""
+        df = pd.DataFrame({
+            'Name': ['  john doe  ', 'JANE SMITH', '  john doe  ', None],
+            'Value': ['1', None, '1', '2']
+        })
+        
+        # Step 1: Remove duplicates
+        df = remove_duplicates(mock_context, df)
+        assert df.shape[0] == 3
+        
+        # Step 2: Fill missing
+        fill_config = FillMissingConfiguration(fill_map={'Value': '0'})
+        df = fill_missing_values(mock_context, fill_config, df)
+        assert df['Value'].isna().sum() == 0
+        
+        # Step 3: Standardize
+        std_config = ColumnsSelectConfiguration(columns=['Name'])
+        df = standardize_categorical_values(mock_context, std_config, df)
+        
+        assert df['Name'].iloc[0] == 'john doe'
+
+    def test_pipeline_with_large_dataset(self, mock_context):
+        """Test pipeline performance with larger dataset."""
+        # Create larger dataset
+        size = 1000
+        df = pd.DataFrame({
+            'ID': list(range(size)),
+            'Name': ['User_' + str(i % 50) for i in range(size)],
+            'Status': ['ACTIVE', 'INACTIVE', 'PENDING'] * (size // 3) + ['ACTIVE'] * (size % 3),
+            'Score': [i % 100 for i in range(size)]
+        })
+        
+        # Add some duplicates
+        df = pd.concat([df, df.head(100)], ignore_index=True)
+        
+        # Process
+        df_cleaned = remove_duplicates(mock_context, df)
+        
+        assert df_cleaned.shape[0] == 1000
+        assert df_cleaned.shape[1] == 4
+
+
+class TestErrorHandling:
+    """Tests for error handling and edge cases."""
+
+    def test_operation_with_corrupted_data(self, mock_context):
+        """Test operations with corrupted/unusual data."""
+        df = pd.DataFrame({
+            'Col': [float('nan'), float('inf'), -float('inf'), 0, 1, 2]
+        })
+        
+        # Should handle special float values
+        result = remove_duplicates(mock_context, df)
+        assert result.shape[0] > 0
+
+    def test_operation_preserves_index(self, mock_context):
+        """Test that index is handled correctly."""
+        df = pd.DataFrame(
+            {'Col': [1, 2, 1, 3]},
+            index=['a', 'b', 'c', 'd']
+        )
+        
+        result = remove_duplicates(mock_context, df)
+        # Index may be reset, so just check shape
+        assert result.shape[0] == 3
+
+    def test_standardize_with_unicode_characters(self, mock_context):
+        """Test standardization with unicode characters."""
+        df = pd.DataFrame({
+            'Name': ['José', 'François', 'Müller']
+        })
+        
+        config = ColumnsSelectConfiguration(columns=['Name'])
+        result = standardize_categorical_values(mock_context, config, df)
+        
+        # Should handle unicode correctly
+        assert result.shape[0] == 3
+
+    def test_fill_with_same_key_multiple_times(self, mock_context):
+        """Test filling when fill_map has multiple entries."""
+        df = pd.DataFrame({
+            'A': ['1', None, '3'],
+            'B': [None, None, 'c'],
+            'C': [None, '2', None]
+        })
+        
+        config = FillMissingConfiguration(fill_map={
+            'A': '-1',
+            'B': 'EMPTY',
+            'C': '0'
+        })
+        
+        result = fill_missing_values(mock_context, config, df)
+        
+        assert result.loc[1, 'A'] == '-1'
+        assert result.loc[0, 'B'] == 'EMPTY'
+        assert result.loc[0, 'C'] == '0'
+
+
+class TestDataTypePreservation:
+    """Tests to ensure data types are preserved appropriately."""
+
+    def test_remove_duplicates_preserves_dtypes(self, mock_context):
+        """Test that remove_duplicates preserves column data types."""
+        df = pd.DataFrame({
+            'int32': pd.array([1, 2, 1], dtype='int32'),
+            'float64': pd.array([1.5, 2.5, 1.5], dtype='float64'),
+            'str': ['a', 'b', 'a']
+        })
+        
+        result = remove_duplicates(mock_context, df)
+        
+        assert result['int32'].dtype == df['int32'].dtype
+        assert result['float64'].dtype == df['float64'].dtype
+
+    def test_fill_missing_preserves_column_types_where_possible(self, mock_context):
+        """Test that fill_missing handles type preservation."""
+        df = pd.DataFrame({
+            'A': pd.array(['1', None, '3'], dtype='string'),
+            'B': ['x', 'y', 'z']
+        })
+        
+        config = FillMissingConfiguration(fill_map={'A': '0'})
+        result = fill_missing_values(mock_context, config, df)
+        
+        assert result['A'].loc[1] == '0'
+        assert result['B'].dtype == df['B'].dtype
diff --git a/tests/data_processing/test_jobs.py b/tests/data_processing/test_jobs.py
new file mode 100644
index 0000000..5373f7c
--- /dev/null
+++ b/tests/data_processing/test_jobs.py
@@ -0,0 +1,56 @@
+from template_code_location.data_processing.jobs import (
+    remove_duplicates_job_s3,
+    fill_missing_values_job_s3,
+    standardize_categorical_values_job_s3,
+    correct_typos_job_s3,
+    normalize_numeric_min_max_job_s3,
+    normalize_datetime_job_s3,
+    normalize_coordinates_job_s3,
+    add_global_aggregations_job_s3
+)
+
+
+def test_remove_duplicates_job_s3_is_callable():
+    """Test remove_duplicates_job_s3 is a valid Dagster job"""
+    assert callable(remove_duplicates_job_s3)
+    assert hasattr(remove_duplicates_job_s3, 'execute_in_process')
+
+
+def test_fill_missing_values_job_s3_is_callable():
+    """Test fill_missing_values_job_s3 is a valid Dagster job"""
+    assert callable(fill_missing_values_job_s3)
+    assert hasattr(fill_missing_values_job_s3, 'execute_in_process')
+
+
+def test_standardize_categorical_values_job_s3_is_callable():
+    """Test standardize_categorical_values_job_s3 is a valid Dagster job"""
+    assert callable(standardize_categorical_values_job_s3)
+    assert hasattr(standardize_categorical_values_job_s3, 'execute_in_process')
+
+
+def test_correct_typos_job_s3_is_callable():
+    """Test correct_typos_job_s3 is a valid Dagster job"""
+    assert callable(correct_typos_job_s3)
+    assert hasattr(correct_typos_job_s3, 'execute_in_process')
+
+
+def test_normalize_numeric_min_max_job_s3_is_callable():
+    """Test normalize_numeric_min_max_job_s3 is a valid Dagster job"""
+    assert callable(normalize_numeric_min_max_job_s3)
+    assert hasattr(normalize_numeric_min_max_job_s3, 'execute_in_process')
+
+
+def test_normalize_datetime_job_s3_is_callable():
+    """Test normalize_datetime_job_s3 is a valid Dagster job"""
+    assert callable(normalize_datetime_job_s3)
+    assert hasattr(normalize_datetime_job_s3, 'execute_in_process')
+
+def test_normalize_coordinates_job_s3_is_callable():
+    """Test normalize_coordinates_job_s3 is a valid Dagster job"""
+    assert callable(normalize_coordinates_job_s3)
+    assert hasattr(normalize_coordinates_job_s3, 'execute_in_process')
+
+def test_add_global_aggregations_job_s3_is_callable():
+    """Test add_global_aggregations_job_s3 is a valid Dagster job"""
+    assert callable(add_global_aggregations_job_s3)
+    assert hasattr(add_global_aggregations_job_s3, 'execute_in_process')
diff --git a/tests/data_processing/test_ops.py b/tests/data_processing/test_ops.py
new file mode 100644
index 0000000..def913b
--- /dev/null
+++ b/tests/data_processing/test_ops.py
@@ -0,0 +1,700 @@
+"""Unit tests for data processing operations."""
+
+import pytest
+import pandas as pd
+from template_code_location.data_processing.ops import (
+    remove_duplicates,
+    fill_missing_values,
+    standardize_categorical_values,
+    correct_typos,
+    normalize_datetime,
+    normalize_numeric_min_max,
+    normalize_coordinates,
+    add_global_aggregations
+)
+from template_code_location.data_processing.config_models import (
+    FillMissingConfiguration,
+    ColumnsSelectConfiguration,
+    SpellCheckConfiguration,
+    AggregationConfiguration,
+    CoordinatesNormalizationConfiguration
+)
+
+
+class TestRemoveDuplicates:
+    """Tests for the remove_duplicates operation."""
+
+    def test_remove_duplicates_basic(self, mock_context, sample_dataframe):
+        """Test basic duplicate removal."""
+        result = remove_duplicates(mock_context, sample_dataframe)
+        
+        # Should have 3 unique rows (john doe appears 3x, jane smith 1x, bob johnson 1x)
+        assert result.shape[0] == 3
+        assert len(result) < len(sample_dataframe)
+
+    def test_remove_duplicates_no_duplicates(self, mock_context):
+        """Test remove_duplicates when there are no duplicates."""
+        df = pd.DataFrame({
+            'A': [1, 2, 3],
+            'B': ['x', 'y', 'z']
+        })
+        result = remove_duplicates(mock_context, df)
+        
+        assert result.shape[0] == 3
+        pd.testing.assert_frame_equal(result, df)
+
+    def test_remove_duplicates_all_duplicates(self, mock_context):
+        """Test remove_duplicates when all rows are identical."""
+        df = pd.DataFrame({
+            'A': [1, 1, 1],
+            'B': ['x', 'x', 'x']
+        })
+        result = remove_duplicates(mock_context, df)
+        
+        assert result.shape[0] == 1
+
+    def test_remove_duplicates_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test remove_duplicates with empty DataFrame."""
+        result = remove_duplicates(mock_context, empty_dataframe)
+        
+        assert result.shape[0] == 0
+        assert result.shape[1] == 0
+
+    def test_remove_duplicates_preserves_data_types(self, mock_context):
+        """Test that remove_duplicates preserves data types."""
+        df = pd.DataFrame({
+            'int_col': [1, 2, 1],
+            'str_col': ['a', 'b', 'a'],
+            'float_col': [1.5, 2.5, 1.5]
+        })
+        result = remove_duplicates(mock_context, df)
+        
+        assert result['int_col'].dtype == df['int_col'].dtype
+        assert result['str_col'].dtype == df['str_col'].dtype
+        assert result['float_col'].dtype == df['float_col'].dtype
+
+
+class TestFillMissingValues:
+    """Tests for the fill_missing_values operation."""
+
+    def test_fill_missing_values_basic(self, mock_context, dataframe_with_missing_values):
+        """Test basic missing value filling."""
+        config = FillMissingConfiguration(fill_map={'Column1': '0', 'Column2': 'N/A'})
+        result = fill_missing_values(mock_context, config, dataframe_with_missing_values)
+        
+        # Check that no NaN values remain
+        assert result['Column1'].isna().sum() == 0
+        assert result['Column2'].isna().sum() == 0
+
+    def test_fill_missing_values_with_different_values(self, mock_context):
+        """Test filling with different replacement values."""
+        df = pd.DataFrame({
+            'A': [1, None, 3],
+            'B': [None, 'b', 'c']
+        })
+        config = FillMissingConfiguration(fill_map={'A': '-1', 'B': 'UNKNOWN'})
+        result = fill_missing_values(mock_context, config, df)
+        
+        assert result.loc[1, 'A'] == '-1'
+        assert result.loc[0, 'B'] == 'UNKNOWN'
+
+    def test_fill_missing_values_partial_columns(self, mock_context):
+        """Test filling only specified columns."""
+        df = pd.DataFrame({
+            'A': [1, None, 3],
+            'B': [None, 'b', 'c']
+        })
+        config = FillMissingConfiguration(fill_map={'A': '999'})
+        result = fill_missing_values(mock_context, config, df)
+        
+        assert result.loc[1, 'A'] == '999'
+        assert pd.isna(result.loc[0, 'B'])  # B should still have NaN
+
+    def test_fill_missing_values_no_missing(self, mock_context):
+        """Test when there are no missing values."""
+        df = pd.DataFrame({
+            'A': ['1', '2', '3'],
+            'B': ['a', 'b', 'c']
+        })
+        config = FillMissingConfiguration(fill_map={'A': '0'})
+        result = fill_missing_values(mock_context, config, df)
+        
+        pd.testing.assert_frame_equal(result, df)
+
+    def test_fill_missing_values_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test with empty DataFrame."""
+        config = FillMissingConfiguration(fill_map={})
+        result = fill_missing_values(mock_context, config, empty_dataframe)
+        
+        assert result.shape[0] == 0
+
+
+class TestStandardizeCategoricalValues:
+    """Tests for the standardize_categorical_values operation."""
+
+    def test_standardize_categorical_basic(self, mock_context, sample_dataframe):
+        """Test basic categorical standardization."""
+        config = ColumnsSelectConfiguration(columns=['Name', 'City', 'Status'])
+        result = standardize_categorical_values(mock_context, config, sample_dataframe)
+        
+        # Check that values are lowercase and stripped
+        assert result['Name'].iloc[0] == 'john doe'
+        assert result['City'].iloc[1] == 'los angeles'
+        assert result['Status'].iloc[1] == 'inactive'
+
+    def test_standardize_categorical_single_column(self, mock_context):
+        """Test standardization on a single column."""
+        df = pd.DataFrame({
+            'City': ['  NEW YORK  ', 'LOS ANGELES', '  chicago  ']
+        })
+        config = ColumnsSelectConfiguration(columns=['City'])
+        result = standardize_categorical_values(mock_context, config, df)
+        
+        assert result['City'].iloc[0] == 'new york'
+        assert result['City'].iloc[1] == 'los angeles'
+        assert result['City'].iloc[2] == 'chicago'
+
+    def test_standardize_categorical_missing_column(self, mock_context, sample_dataframe):
+        """Test with non-existent column (should skip)."""
+        config = ColumnsSelectConfiguration(columns=['NonExistent', 'Name'])
+        result = standardize_categorical_values(mock_context, config, sample_dataframe)
+        
+        # Should process 'Name' column without error
+        assert result['Name'].iloc[0] == 'john doe'
+
+    def test_standardize_categorical_with_missing_values(self, mock_context):
+        """Test standardization with missing values."""
+        df = pd.DataFrame({
+            'Category': ['  ACTIVE  ', None, '  pending  ']
+        })
+        config = ColumnsSelectConfiguration(columns=['Category'])
+        result = standardize_categorical_values(mock_context, config, df)
+        
+        assert result['Category'].iloc[0] == 'active'
+        assert result['Category'].iloc[1] == ''
+        assert result['Category'].iloc[2] == 'pending'
+
+    def test_standardize_categorical_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test with empty DataFrame."""
+        config = ColumnsSelectConfiguration(columns=['A', 'B'])
+        result = standardize_categorical_values(mock_context, config, empty_dataframe)
+        
+        assert result.shape[0] == 0
+
+    def test_standardize_categorical_numeric_columns(self, mock_context):
+        """Test that numeric columns are converted to strings."""
+        df = pd.DataFrame({
+            'NumCol': [1, 2, 3]
+        })
+        config = ColumnsSelectConfiguration(columns=['NumCol'])
+        result = standardize_categorical_values(mock_context, config, df)
+        
+        assert result['NumCol'].iloc[0] == '1'
+        assert isinstance(result['NumCol'].iloc[0], str)
+
+
+class TestCorrectTypos:
+    """Tests for the correct_typos operation."""
+
+    def test_correct_typos_basic(self, mock_context):
+        """Test basic typo correction."""
+        df = pd.DataFrame({
+            'Name': ['jon', 'jayne', 'bob']
+        })
+        config = SpellCheckConfiguration(columns=['Name'], language='en')
+        result = correct_typos(mock_context, config, df)
+        
+        # Result should have corrections applied
+        assert result.shape[0] == 3
+
+    def test_correct_typos_missing_column(self, mock_context):
+        """Test with non-existent column (should skip)."""
+        df = pd.DataFrame({
+            'Name': ['jon', 'jayne']
+        })
+        config = SpellCheckConfiguration(columns=['NonExistent'], language='en')
+        result = correct_typos(mock_context, config, df)
+        
+        # Should not raise error, just skip
+        pd.testing.assert_frame_equal(result, df)
+
+    def test_correct_typos_with_missing_values(self, mock_context):
+        """Test typo correction with missing values."""
+        df = pd.DataFrame({
+            'Text': ['helo', '', 'wrld']
+        })
+        config = SpellCheckConfiguration(columns=['Text'], language='en')
+        result = correct_typos(mock_context, config, df)
+        
+        # Empty strings should be preserved
+        assert result.loc[1, 'Text'] == ''
+
+    def test_correct_typos_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test with empty DataFrame."""
+        config = SpellCheckConfiguration(columns=['A'], language='en')
+        result = correct_typos(mock_context, config, empty_dataframe)
+        
+        assert result.shape[0] == 0
+
+    def test_correct_typos_different_languages(self, mock_context):
+        """Test typo correction with different languages."""
+        df = pd.DataFrame({
+            'Text': ['ciao', 'mondo']
+        })
+        
+        for lang in ['en', 'es', 'it']:
+            config = SpellCheckConfiguration(columns=['Text'], language=lang)
+            result = correct_typos(mock_context, config, df)
+            
+            # Should process without error
+            assert result.shape[0] == 2
+
+    def test_correct_typos_numeric_values(self, mock_context):
+        """Test typo correction on numeric values converted to strings."""
+        df = pd.DataFrame({
+            'Values': [123, 456, 789]
+        })
+        config = SpellCheckConfiguration(columns=['Values'], language='en')
+        result = correct_typos(mock_context, config, df)
+        
+        # Numeric values should be converted to string and processed
+        assert result.shape[0] == 3
+
+class TestNormalizeDatetime:
+    """Tests for the normalize_datetime operation."""
+
+    def test_normalize_datetime_basic(self, mock_context):
+        """Test basic datetime normalization to ISO format."""
+        df = pd.DataFrame({
+            'date_col': ['2023-01-01 10:00:00', '2023-12-31T23:59:59']
+        })
+        
+        config = ColumnsSelectConfiguration(columns=['date_col'])
+        
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'date_col_iso' in result.columns
+        assert result['date_col_iso'].iloc[0] == '2023-01-01T10:00:00Z'
+        assert result['date_col_iso'].iloc[1] == '2023-12-31T23:59:59Z'
+
+    def test_normalize_datetime_missing_column(self, mock_context, sample_dataframe):
+        """Test behavior when a configured column is missing in the DataFrame."""
+        config = ColumnsSelectConfiguration(columns=['non_existent_column'])
+        
+        result = normalize_datetime(mock_context, config, sample_dataframe.copy())
+
+        pd.testing.assert_frame_equal(result, sample_dataframe)
+
+    def test_normalize_datetime_unparseable_values(self, mock_context):
+        """Test column with values that cannot be parsed as dates."""
+        df = pd.DataFrame({
+            'invalid_col': ['not-a-date', 'completely-random-text']
+        })
+        config = ColumnsSelectConfiguration(columns=['invalid_col'])
+        
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'invalid_col_iso' not in result.columns
+
+    def test_normalize_datetime_mixed_and_nulls(self, mock_context):
+        """Test column with mixed valid dates, invalid dates, and NaNs."""
+        df = pd.DataFrame({
+            'mixed_col': ['2023-05-01', None, 'invalid-date']
+        })
+        config = ColumnsSelectConfiguration(columns=['mixed_col'])
+        
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'mixed_col_iso' in result.columns
+        assert result['mixed_col_iso'].iloc[0] == '2023-05-01T00:00:00Z'
+        
+        assert result['mixed_col_iso'].iloc[1] == ""
+        assert result['mixed_col_iso'].iloc[2] == ""
+
+    def test_normalize_datetime_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test with an empty DataFrame."""
+        config = ColumnsSelectConfiguration(columns=['some_col'])
+        
+        result = normalize_datetime(mock_context, config, empty_dataframe)
+        
+        assert result.empty
+
+    def test_normalize_datetime_epoch_only(self, mock_context, capsys):
+        """If parsing a column yields only the Unix epoch date, it should be skipped."""
+        df = pd.DataFrame({
+            'weird_col': ['0', 0, '0000', '']
+        })
+
+        config = ColumnsSelectConfiguration(columns=['weird_col'])
+
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'weird_col_iso' not in result.columns
+
+        captured = capsys.readouterr()
+        assert "all normalized values are '1970-01-01'" in captured.err
+
+    def test_normalize_datetime_all_1970_skipped(self, mock_context, capsys):
+        """If all formatted values are '1970-01-01', the column should be skipped with a warning."""
+        df = pd.DataFrame({
+            'ts_col': ['1970-01-01 05:30:00', '1970-01-01 12:00:00']
+        })
+
+        config = ColumnsSelectConfiguration(columns=['ts_col'])
+
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'ts_col_iso' not in result.columns
+
+        captured = capsys.readouterr()
+        assert "all normalized values are '1970-01-01'" in captured.err
+
+    def test_normalize_datetime_integer_age_column_skipped(self, mock_context, capsys):
+        """If an integer column like 'age' is passed, all values become 1970-01-01 and should be skipped."""
+        df = pd.DataFrame({
+            'age': [66, 45, 40, 43, 20, 26, 69, 21, 46]
+        })
+
+        config = ColumnsSelectConfiguration(columns=['age'])
+
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'age_iso' not in result.columns
+
+        captured = capsys.readouterr()
+        assert "all normalized values are '1970-01-01'" in captured.err
+
+class TestNormalizeNumericMinMax:
+    """Tests for the normalize_numeric_min_max operation."""
+
+    def test_normalize_numeric_basic(self, mock_context):
+        """Test standard min-max normalization between 0 and 1."""
+        df = pd.DataFrame({
+            'score': [10, 20, 30, 40, 50]
+        })
+        config = ColumnsSelectConfiguration(columns=['score'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'score_norm' in result.columns
+        assert result['score_norm'].min() == 0.0
+        assert result['score_norm'].max() == 1.0
+        
+        assert result['score_norm'].iloc[2] == 0.5
+
+    def test_normalize_numeric_missing_column(self, mock_context):
+        """Test skipping of non-existent columns."""
+        df = pd.DataFrame({'existing': [1, 2, 3]})
+        config = ColumnsSelectConfiguration(columns=['missing_col'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'missing_col_norm' not in result.columns
+
+    def test_normalize_numeric_constant_values(self, mock_context):
+        """Test skipping when min == max to avoid division by zero."""
+        df = pd.DataFrame({
+            'constant': [10, 10, 10]
+        })
+        config = ColumnsSelectConfiguration(columns=['constant'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'constant_norm' not in result.columns
+
+    def test_normalize_numeric_with_nans(self, mock_context):
+        """Test normalization with NaN values (pandas min/max ignore NaNs by default)."""
+        df = pd.DataFrame({
+            'with_nans': [10, None, 50]
+        })
+        config = ColumnsSelectConfiguration(columns=['with_nans'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'with_nans_norm' in result.columns
+        assert result['with_nans_norm'].iloc[0] == 0.0
+        assert result['with_nans_norm'].iloc[2] == 1.0
+        assert pd.isna(result['with_nans_norm'].iloc[1])
+
+    def test_normalize_numeric_multiple_columns(self, mock_context):
+        """Test processing multiple columns in one call."""
+        df = pd.DataFrame({
+            'A': [1, 2],
+            'B': [10, 20]
+        })
+        config = ColumnsSelectConfiguration(columns=['A', 'B'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'A_norm' in result.columns
+        assert 'B_norm' in result.columns
+
+class TestNormalizeCoordinates:
+    """Tests for the normalize_coordinates operation."""
+
+    def test_normalize_coordinates_basic(self, mock_context):
+        """Test rounding and basic coordinate normalization."""
+        df = pd.DataFrame({
+            'lat': [45.123456, 46.0],
+            'lon': [9.123456, 10.0]
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert result['lat'].iloc[0] == 45.1235
+        assert result['lon'].iloc[0] == 9.1235
+        
+        assert len(result) == 2
+
+    def test_normalize_coordinates_filtering(self, mock_context):
+        """Test filtering of out-of-range coordinates."""
+        df = pd.DataFrame({
+            'lat': [45.0, 100.0, -91.0, 0.0],  # 100 e -91 sono out of range
+            'lon': [9.0, 0.0, 0.0, 200.0]      # 200 è out of range
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 1
+        assert result['lat'].iloc[0] == 45.0
+
+    def test_normalize_coordinates_invalid_types(self, mock_context):
+        """Test conversion of strings to numeric and handling of NaNs."""
+        df = pd.DataFrame({
+            'lat': ["45.5", "invalid", None],
+            'lon': ["9.5", "10.0", "11.0"]
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 1
+        assert isinstance(result['lat'].iloc[0], float)
+
+    def test_normalize_coordinates_empty_df(self, mock_context, empty_dataframe):
+        """Test with an empty DataFrame."""
+        
+        df = pd.DataFrame(columns=['lat', 'lon'])
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        
+        result = normalize_coordinates(mock_context, config, df)
+        
+        assert len(result) == 0
+        assert result.empty
+
+    def test_normalize_coordinates_default_config(self, mock_context):
+        """Test that normalize_coordinates uses default 'lat'/'lon' columns when no config is provided."""
+        df = pd.DataFrame({
+            'lat': [45.123456, 46.0],
+            'lon': [9.123456, 10.0]
+        })
+        config = CoordinatesNormalizationConfiguration()
+
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert result['lat'].iloc[0] == 45.1235
+        assert result['lon'].iloc[0] == 9.1235
+        assert len(result) == 2
+
+    def test_normalize_coordinates_null_config_values(self, mock_context):
+        """Test that null lat/lon column names fall back to defaults ('lat'/'lon')."""
+        df = pd.DataFrame({
+            'lat': [45.123456, 46.0],
+            'lon': [9.123456, 10.0]
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn=None, lonColumn=None)
+
+        assert config.latColumn == "lat"
+        assert config.lonColumn == "lon"
+
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert result['lat'].iloc[0] == 45.1235
+        assert result['lon'].iloc[0] == 9.1235
+        assert len(result) == 2
+
+    def test_normalize_coordinates_dms_degree_symbol(self, mock_context):
+        """Test DMS parsing with degree/minute/second symbols like 40°26'46\"N."""
+        df = pd.DataFrame({
+            'lat': ["40°26'46\"N", "51°30'26\"N"],
+            'lon': ["79°58'56\"W", "0°7'39\"W"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 2
+        # 40°26'46"N ≈ 40.4461
+        assert abs(result['lat'].iloc[0] - 40.4461) < 0.001
+        # 79°58'56"W ≈ -79.9822
+        assert abs(result['lon'].iloc[0] - (-79.9822)) < 0.001
+
+    def test_normalize_coordinates_dms_spaced_format(self, mock_context):
+        """Test DMS parsing with space-separated format like '40 26 46 N'."""
+        df = pd.DataFrame({
+            'lat': ["40 26 46 N"],
+            'lon': ["79 58 56 W"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 1
+        assert abs(result['lat'].iloc[0] - 40.4461) < 0.001
+        assert abs(result['lon'].iloc[0] - (-79.9822)) < 0.001
+
+    def test_normalize_coordinates_dms_already_decimal(self, mock_context):
+        """Test that string columns with decimal values are auto-parsed correctly."""
+        df = pd.DataFrame({
+            'lat': ["45.5", "46.0"],
+            'lon': ["9.5", "10.0"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 2
+        assert result['lat'].iloc[0] == 45.5
+        assert result['lon'].iloc[0] == 9.5
+
+    def test_normalize_coordinates_dms_mixed_valid_invalid(self, mock_context):
+        """Test auto-detection with a mix of valid DMS, valid decimal, and unparseable values."""
+        df = pd.DataFrame({
+            'lat': ["40°26'46\"N", "not_a_coord", "51.5"],
+            'lon': ["79°58'56\"W", "10.0", "0.1"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        # Row with "not_a_coord" for lat should be dropped (NaN lat)
+        assert len(result) == 2
+
+    def test_normalize_coordinates_dms_out_of_range(self, mock_context):
+        """Test that DMS-parsed coordinates outside valid range are filtered out."""
+        df = pd.DataFrame({
+            'lat': ["91°0'0\"N", "45°0'0\"N"],
+            'lon': ["0°0'0\"E", "9°0'0\"E"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        # First row has lat=91° which is out of [-90, 90]
+        assert len(result) == 1
+        assert abs(result['lat'].iloc[0] - 45.0) < 0.001
+
+    def test_normalize_coordinates_dms_south_and_east(self, mock_context):
+        """Test DMS parsing with south latitude and east longitude."""
+        df = pd.DataFrame({
+            'lat': ["33°51'54\"S"],
+            'lon': ["151°12'36\"E"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 1
+        # 33°51'54"S ≈ -33.865
+        assert result['lat'].iloc[0] < 0
+        assert abs(result['lat'].iloc[0] - (-33.865)) < 0.001
+        # 151°12'36"E ≈ 151.21
+        assert result['lon'].iloc[0] > 0
+        assert abs(result['lon'].iloc[0] - 151.21) < 0.01
+
+    def test_normalize_coordinates_autodetect_numeric_vs_dms(self, mock_context):
+        """Test that numeric columns are coerced directly while string columns are parsed as DMS."""
+        # Numeric columns — should go through pd.to_numeric path
+        df_numeric = pd.DataFrame({
+            'lat': [45.123456, 46.0],
+            'lon': [9.123456, 10.0]
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        result_numeric = normalize_coordinates(mock_context, config, df_numeric.copy())
+
+        assert result_numeric['lat'].iloc[0] == 45.1235
+        assert len(result_numeric) == 2
+
+        # String DMS columns — should go through _parse_dms_to_decimal path
+        df_dms = pd.DataFrame({
+            'lat': ["40°26'46\"N"],
+            'lon': ["79°58'56\"W"]
+        })
+        result_dms = normalize_coordinates(mock_context, config, df_dms.copy())
+
+        assert len(result_dms) == 1
+        assert abs(result_dms['lat'].iloc[0] - 40.4461) < 0.001
+
+class TestAddGlobalAggregations:
+    """Tests for the add_global_aggregations operation."""
+
+    def test_add_global_aggregations_success(self, mock_context):
+        """Test a successful group by and aggregation."""
+        df = pd.DataFrame({
+            'category': ['A', 'A', 'B'],
+            'value': [10, 20, 100],
+            'ignored_str': ['x', 'y', 'z']
+        })
+        
+        config = AggregationConfiguration(
+            columns=['category'], 
+            operation='sum'
+        )
+        
+        result = add_global_aggregations(mock_context, config, df.copy())
+
+        assert len(result) == 2 
+        assert result.loc[result['category'] == 'A', 'value'].values[0] == 30
+        assert result.loc[result['category'] == 'B', 'value'].values[0] == 100
+        assert 'ignored_str' not in result.columns
+        mock_context.log.info.assert_called()
+
+    def test_add_global_aggregations_missing_column(self, mock_context):
+        """Test skipping a column that does not exist in the dataframe."""
+        df = pd.DataFrame({'value': [1, 2, 3]})
+        config = AggregationConfiguration(
+            columns=['missing_col'], 
+            operation='count'
+        )
+
+        result = add_global_aggregations(mock_context, config, df.copy())
+
+        mock_context.log.warning.assert_any_call("Column 'missing_col' not found, skipping aggregation.")
+        assert len(result) == 1
+
+    def test_add_global_aggregations_unsupported_op(self, mock_context):
+        """Test the warning when an unsupported operation is provided."""
+        df = pd.DataFrame({'category': ['A'], 'value': [1]})
+        
+        config = AggregationConfiguration(
+            columns=['category'], 
+            operation='unsupported' 
+        )
+        
+        with pytest.raises(Exception):
+            add_global_aggregations(mock_context, config, df.copy())
+            
+        mock_context.log.warning.assert_any_call("Unsupported aggregation 'unsupported'")
+
+    def test_add_global_aggregations_only_numeric_kept(self, mock_context):
+        """Verify that non-numeric and non-grouping columns are dropped."""
+        df = pd.DataFrame({
+            'group': ['A', 'A'],
+            'num': [1, 2],
+            'text': ['hello', 'world']
+        })
+        config = AggregationConfiguration(columns=['group'], operation='mean')
+
+        result = add_global_aggregations(mock_context, config, df.copy())
+
+        assert 'text' not in result.columns
+        assert 'num' in result.columns
+        assert 'group' in result.columns
diff --git a/tests/dataframe_level_anonymisation/__init__.py b/tests/dataframe_level_anonymisation/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/dataframe_level_anonymisation/config_models/__init__.py b/tests/dataframe_level_anonymisation/config_models/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/config_models/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/dataframe_level_anonymisation/config_models/test_base_config.py b/tests/dataframe_level_anonymisation/config_models/test_base_config.py
new file mode 100644
index 0000000..92e599b
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/config_models/test_base_config.py
@@ -0,0 +1,54 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.dataframe_level_anonymisation.config_models.base_config import BaseConfiguration
+
+
+def test_valid_configuration_with_overrides():
+    cfg = BaseConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        supp_level=10.0,
+        generalisation_hierarchies={"age": "age_hierarchy"},
+    )
+    assert cfg.ident == ["id"]
+    assert cfg.quasi_identifiers == ["age"]
+    assert cfg.supp_level == 10.0
+    assert cfg.generalisation_hierarchies == {"age": "age_hierarchy"}
+
+
+def test_default_values_are_loaded():
+    cfg = BaseConfiguration()
+    assert cfg.ident == ["Name"]
+    assert cfg.quasi_identifiers == ["Age"]
+    assert cfg.supp_level == 50.0
+    assert cfg.generalisation_hierarchies == {"Age": "simpl_age"}
+
+
+def test_missing_ident_raises_error():
+    with pytest.raises(ValidationError):
+        BaseConfiguration(
+            ident=[]
+        )
+
+
+def test_missing_quasi_ident_raises_error():
+    with pytest.raises(ValidationError):
+        BaseConfiguration(
+            quasi_identifiers=[]
+        )
+
+
+def test_overlap_between_ident_and_quasi_identifiers():
+    with pytest.raises(ValidationError):
+        BaseConfiguration(
+            ident=["age"],
+            quasi_identifiers=["age"]
+        )
+
+
+def test_supp_level_bounds():
+    with pytest.raises(ValidationError):
+        BaseConfiguration(
+            supp_level=150.0  # fuori range
+        )
diff --git a/tests/dataframe_level_anonymisation/config_models/test_hierarchies.py b/tests/dataframe_level_anonymisation/config_models/test_hierarchies.py
new file mode 100644
index 0000000..c6994a9
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/config_models/test_hierarchies.py
@@ -0,0 +1,48 @@
+from template_code_location.dataframe_level_anonymisation.config_models.hierarchies import (
+    simpl_age,
+    simpl_age2,
+    simpl_gender,
+    get_all_hierarchies,
+)
+
+
+def test_simpl_age_structure():
+    assert isinstance(simpl_age, dict)
+    assert 0 in simpl_age
+    assert isinstance(simpl_age[0], list)
+    # verify first level contains 100 ages
+    assert len(simpl_age[0]) == 100
+    assert simpl_age[0][0] == 0
+    assert simpl_age[0][-1] == 99
+
+
+def test_simpl_age2_structure():
+    assert isinstance(simpl_age2, dict)
+    assert 0 in simpl_age2
+    assert 1 in simpl_age2
+    assert isinstance(simpl_age2[0], list)
+    assert isinstance(simpl_age2[1], list)
+
+
+def test_simpl_gender_structure():
+    assert isinstance(simpl_gender, dict)
+    assert 0 in simpl_gender
+    assert 1 in simpl_gender
+    assert simpl_gender[0] == ["M", "F", "O"]
+    assert simpl_gender[1] == ["*", "*", "*"]
+
+
+def test_get_all_hierarchies():
+    hier = get_all_hierarchies()
+
+    # the function should return dicts only
+    assert isinstance(hier, dict)
+
+    # ensure expected dicts are included
+    assert "simpl_age" in hier
+    assert "simpl_age2" in hier
+    assert "simpl_gender" in hier
+
+    # ensure the values returned are references to the actual dicts
+    assert hier["simpl_age"] is simpl_age
+    assert hier["simpl_gender"] is simpl_gender
diff --git a/tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py b/tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py
new file mode 100644
index 0000000..ef6e2c8
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py
@@ -0,0 +1,41 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.dataframe_level_anonymisation.config_models.k_anonymity_configuration import (
+    KAnonymityConfiguration,
+)
+
+
+def test_valid_k_anonymity_config_with_overrides():
+    cfg = KAnonymityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        supp_level=5.0,
+        generalisation_hierarchies={"age": "age_hier"},
+        k=3,
+        sensitive_attributes=["disease"],
+    )
+    assert cfg.k == 3
+    assert cfg.sensitive_attributes == ["disease"]
+    assert cfg.generalisation_hierarchies == {"age": "age_hier"}
+
+
+def test_default_values_are_loaded():
+    cfg = KAnonymityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        generalisation_hierarchies={"age": "age_hier"}
+    )
+    assert cfg.k == 3
+    assert cfg.sensitive_attributes == ["Disease"]
+
+
+def test_invalid_k_value_raises_error():
+    with pytest.raises(ValidationError):
+        KAnonymityConfiguration(
+            ident=["id"],
+            quasi_identifiers=["age"],
+            generalisation_hierarchies={"age": "age_hier"},
+            k=1,  # invalid, must be >= 2
+            sensitive_attributes=["disease"],
+        )
diff --git a/tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py b/tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py
new file mode 100644
index 0000000..c94db3e
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py
@@ -0,0 +1,44 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.dataframe_level_anonymisation.config_models.l_diversity_configuration import (
+    LDiversityConfiguration,
+)
+
+
+def test_valid_l_diversity_config_with_overrides():
+    cfg = LDiversityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        supp_level=5.0,
+        generalisation_hierarchies={"age": "age_hier"},
+        k=3,
+        l=2,
+        sensitive_attribute="disease",
+    )
+    assert cfg.k == 3
+    assert cfg.l == 2
+    assert cfg.sensitive_attribute == "disease"
+
+
+def test_default_values_are_loaded():
+    cfg = LDiversityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        generalisation_hierarchies={"age": "age_hier"}
+    )
+    assert cfg.k == 2
+    assert cfg.l == 3
+    assert cfg.sensitive_attribute == "Disease"
+
+
+def test_invalid_l_value_raises_error():
+    with pytest.raises(ValidationError):
+        LDiversityConfiguration(
+            ident=["id"],
+            quasi_identifiers=["age"],
+            generalisation_hierarchies={"age": "age_hier"},
+            k=3,
+            l=0,  # invalid, must be >= 1
+            sensitive_attribute="disease",
+        )
diff --git a/tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py b/tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py
new file mode 100644
index 0000000..615bd27
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py
@@ -0,0 +1,56 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.dataframe_level_anonymisation.config_models.t_closeness_configuration import (
+    TClosenessConfiguration,
+)
+
+
+def test_valid_t_closeness_config_with_overrides():
+    cfg = TClosenessConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        supp_level=5.0,
+        generalisation_hierarchies={"age": "age_hier"},
+        k=3,
+        t=0.4,
+        sensitive_attribute="disease",
+    )
+    assert cfg.k == 3
+    assert cfg.t == 0.4
+    assert cfg.sensitive_attribute == "disease"
+
+
+def test_default_values_are_loaded():
+    cfg = TClosenessConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        generalisation_hierarchies={"age": "age_hier"}
+    )
+    assert cfg.k == 2
+    assert cfg.t == 0.5
+    assert cfg.sensitive_attribute == "Disease"
+
+
+def test_invalid_t_value_low():
+    with pytest.raises(ValidationError):
+        TClosenessConfiguration(
+            ident=["id"],
+            quasi_identifiers=["age"],
+            generalisation_hierarchies={"age": "age_hier"},
+            k=3,
+            t=-0.1,  # invalid
+            sensitive_attribute="disease",
+        )
+
+
+def test_invalid_t_value_high():
+    with pytest.raises(ValidationError):
+        TClosenessConfiguration(
+            ident=["id"],
+            quasi_identifiers=["age"],
+            generalisation_hierarchies={"age": "age_hier"},
+            k=3,
+            t=2.0,  # invalid > 1
+            sensitive_attribute="disease",
+        )
diff --git a/tests/dataframe_level_anonymisation/test_jobs.py b/tests/dataframe_level_anonymisation/test_jobs.py
new file mode 100644
index 0000000..f890e2d
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/test_jobs.py
@@ -0,0 +1,44 @@
+from template_code_location.dataframe_level_anonymisation.jobs import (
+    k_anonymity_job,
+    l_diversity_job,
+    t_closeness_job,
+    k_anonymity_job_s3,
+    l_diversity_job_s3,
+    t_closeness_job_s3
+)
+
+
+def test_k_anonymity_job_is_callable():
+    """Test k_anonymity_job is a valid Dagster job"""
+    assert callable(k_anonymity_job)
+    assert hasattr(k_anonymity_job, 'execute_in_process')
+
+
+def test_l_diversity_job_is_callable():
+    """Test l_diversity_job is a valid Dagster job"""
+    assert callable(l_diversity_job)
+    assert hasattr(l_diversity_job, 'execute_in_process')
+
+
+def test_t_closeness_job_is_callable():
+    """Test t_closeness_job is a valid Dagster job"""
+    assert callable(t_closeness_job)
+    assert hasattr(t_closeness_job, 'execute_in_process')
+
+
+def test_k_anonymity_job_s3_is_callable():
+    """Test k_anonymity_job_s3 is a valid Dagster job"""
+    assert callable(k_anonymity_job_s3)
+    assert hasattr(k_anonymity_job_s3, 'execute_in_process')
+
+
+def test_l_diversity_job_s3_is_callable():
+    """Test l_diversity_job_s3 is a valid Dagster job"""
+    assert callable(l_diversity_job_s3)
+    assert hasattr(l_diversity_job_s3, 'execute_in_process')
+
+
+def test_t_closeness_job_s3_is_callable():
+    """Test t_closeness_job_s3 is a valid Dagster job"""
+    assert callable(t_closeness_job_s3)
+    assert hasattr(t_closeness_job_s3, 'execute_in_process')
diff --git a/tests/dataframe_level_anonymisation/test_ops.py b/tests/dataframe_level_anonymisation/test_ops.py
new file mode 100644
index 0000000..90c01aa
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/test_ops.py
@@ -0,0 +1,230 @@
+import pytest
+import pandas as pd
+from unittest.mock import patch
+from dagster import DagsterInvalidInvocationError, build_op_context
+
+from template_code_location.dataframe_level_anonymisation.ops import (
+    apply_k_anonymity,
+    apply_l_diversity,
+    apply_t_closeness,
+)
+from template_code_location.dataframe_level_anonymisation.config_models import (
+    KAnonymityConfiguration,
+    LDiversityConfiguration,
+    TClosenessConfiguration,
+)
+
+
+# ---------------------------
+# Fixtures
+# ---------------------------
+@pytest.fixture
+def fake_df():
+    return pd.DataFrame({"id": [1, 2], "age": [30, 40]})
+
+
+@pytest.fixture
+def k_config():
+    return KAnonymityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        sensitive_attributes=["age"],
+        k=2,
+        supp_level=0.0,
+        generalisation_hierarchies={"age": "simpl_age"},
+    )
+
+
+@pytest.fixture
+def l_config():
+    return LDiversityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        sensitive_attribute="age",
+        k=2,
+        l=1,
+        supp_level=0.0,
+        generalisation_hierarchies={"age": "simpl_age"},
+    )
+
+
+@pytest.fixture
+def t_config():
+    return TClosenessConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        sensitive_attribute="age",
+        k=2,
+        t=0.5,
+        supp_level=0.0,
+        generalisation_hierarchies={"age": "simpl_age"},
+    )
+
+
+@pytest.fixture
+def op_context():
+    return build_op_context()
+
+
+# ---------------------------
+# Helper for patching external functions
+# ---------------------------
+@pytest.fixture(autouse=True)
+def patch_external_ops():
+    with (
+        patch(
+            "dataframe_level_anonymisation.ops.get_all_hierarchies",
+            return_value={"simpl_age": {0: [30, 40]}},
+        ),
+        patch(
+            "dataframe_level_anonymisation.ops.k_anonymity",
+            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
+        ),
+        patch(
+            "dataframe_level_anonymisation.ops.l_diversity",
+            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
+        ),
+        patch(
+            "dataframe_level_anonymisation.ops.t_closeness",
+            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
+        ),
+    ):
+        yield
+
+
+# ---------------------------
+# Tests for apply_k_anonymity
+# ---------------------------
+def test_apply_k_anonymity_outputs(op_context, k_config, fake_df):
+    results = list(apply_k_anonymity(op_context, k_config, fake_df))
+    assert len(results) == 2
+
+    data_output = results[0].value
+    metrics_output = results[1].value
+
+    # Check types
+    assert isinstance(data_output, pd.DataFrame)
+    assert isinstance(metrics_output, dict)
+    assert "k_anon" in metrics_output
+    assert "l_div" in metrics_output
+    assert "t_clos" in metrics_output
+
+
+# ---------------------------
+# Tests for apply_l_diversity
+# ---------------------------
+def test_apply_l_diversity_outputs(op_context, l_config, fake_df):
+    results = list(apply_l_diversity(op_context, l_config, fake_df))
+    assert len(results) == 2
+
+    data_output = results[0].value
+    metrics_output = results[1].value
+
+    assert isinstance(data_output, pd.DataFrame)
+    assert isinstance(metrics_output, dict)
+    assert "k_anon" in metrics_output
+    assert "l_div" in metrics_output
+    assert "t_clos" in metrics_output
+
+
+def test_apply_l_diversity_empty_raises(op_context, l_config):
+    with patch("dataframe_level_anonymisation.ops.l_diversity", return_value=pd.DataFrame()):
+
+        with pytest.raises(DagsterInvalidInvocationError):
+            list(apply_l_diversity(op_context, l_config, pd.DataFrame({"id": [1], "age": [30]})))
+
+
+# ---------------------------
+# Tests for apply_t_closeness
+# ---------------------------
+def test_apply_t_closeness_outputs(op_context, t_config, fake_df):
+    results = list(apply_t_closeness(op_context, t_config, fake_df))
+    assert len(results) == 2
+
+    data_output = results[0].value
+    metrics_output = results[1].value
+
+    assert isinstance(data_output, pd.DataFrame)
+    assert isinstance(metrics_output, dict)
+    assert "k_anon" in metrics_output
+    assert "l_div" in metrics_output
+    assert "t_clos" in metrics_output
+
+
+def test_apply_t_closeness_empty_raises(op_context, t_config):
+    with patch("dataframe_level_anonymisation.ops.t_closeness", return_value=pd.DataFrame()):
+        with pytest.raises(DagsterInvalidInvocationError):
+            list(apply_t_closeness(op_context, t_config, pd.DataFrame({"id": [1], "age": [30]})))
+
+
+# ---------------------------
+# Additional tests for _validate_and_get_hierarchies
+# ---------------------------
+def test_validate_hierarchies_dataset_too_small(k_config):
+    small_df = pd.DataFrame({"id": [1], "age": [30]})
+    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
+
+    with pytest.raises(DagsterInvalidInvocationError):
+        _validate_and_get_hierarchies(k_config, small_df)
+
+
+def test_validate_hierarchies_missing_hierarchy(k_config, fake_df):
+    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
+
+    bad_config = k_config.model_copy(update={"generalisation_hierarchies": {}})
+
+    with pytest.raises(DagsterInvalidInvocationError):
+        _validate_and_get_hierarchies(bad_config, fake_df)
+
+
+def test_validate_hierarchies_hierarchy_not_in_code(k_config, fake_df):
+    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
+
+    with patch("dataframe_level_anonymisation.ops.get_all_hierarchies", return_value={}):
+        with pytest.raises(DagsterInvalidInvocationError):
+            _validate_and_get_hierarchies(k_config, fake_df)
+
+
+# ---------------------------
+# Additional tests for _calc_dataframe_metrics
+# ---------------------------
+def test_calc_dataframe_metrics_basic():
+    from template_code_location.dataframe_level_anonymisation.ops import _calc_dataframe_metrics
+
+    df_org = pd.DataFrame({"age": [30, 40], "id": [1, 2]})
+    df_anon = df_org.copy()
+
+    with (
+        patch("dataframe_level_anonymisation.ops.anonymity.k_anonymity", return_value=2),
+        patch("dataframe_level_anonymisation.ops.anonymity.l_diversity", return_value=1),
+        patch("dataframe_level_anonymisation.ops.anonymity.t_closeness", return_value=0.1),
+    ):
+
+        report, metrics = _calc_dataframe_metrics(df_anon, df_org, ["age"], ["age"])
+
+        assert "k-anonymity" in report
+        assert metrics["k_anon"] == 2
+        assert metrics["l_div"] == 1
+        assert metrics["t_clos"] == 0.1
+
+
+# ---------------------------
+# Tests for apply_t_closeness exception branches
+# ---------------------------
+def test_apply_t_closeness_value_error_quasi_identifiers(op_context, t_config, fake_df):
+    """Covers the branch where ValueError contains 'Cannot be quasi-identifiers'."""
+    with patch(
+        "dataframe_level_anonymisation.ops.t_closeness",
+        side_effect=ValueError("Cannot be quasi-identifiers invalid"),
+    ):
+        with pytest.raises(DagsterInvalidInvocationError):
+            list(apply_t_closeness(op_context, t_config, fake_df))
+
+
+def test_apply_t_closeness_value_error_other_message(op_context, t_config, fake_df):
+    """Covers the branch where ValueError is raised but message does NOT contain that substring."""
+    with patch(
+        "dataframe_level_anonymisation.ops.t_closeness", side_effect=ValueError("Some other error")
+    ):
+        with pytest.raises(DagsterInvalidInvocationError):
+            list(apply_t_closeness(op_context, t_config, fake_df))
diff --git a/tests/dataframe_level_anonymisation/test_utils.py b/tests/dataframe_level_anonymisation/test_utils.py
new file mode 100644
index 0000000..3fa1841
--- /dev/null
+++ b/tests/dataframe_level_anonymisation/test_utils.py
@@ -0,0 +1,70 @@
+import numpy as np
+
+from template_code_location.dataframe_level_anonymisation.utils import (
+    parse_value_list,
+    normalize_hierarchy_levels,
+)
+
+
+# ------------------------------------
+# Tests for parse_value_list
+# ------------------------------------
+def test_parse_value_list_all_strings_digits():
+    values = ["1", "2", "3"]
+    assert parse_value_list(values) == [1, 2, 3]
+
+
+def test_parse_value_list_mixed_values():
+    values = ["1", 2, "abc", "5"]
+    assert parse_value_list(values) == [1, 2, "abc", 5]
+
+
+def test_parse_value_list_no_digits():
+    values = ["a", "b", "c"]
+    assert parse_value_list(values) == ["a", "b", "c"]
+
+
+# ------------------------------------
+# Tests for normalize_hierarchy_levels
+# ------------------------------------
+def test_normalize_hierarchy_levels_level_0_converted_to_numpy_array():
+    hierarchy = {"age": {"0": ["1", "2", "3"], "1": ["0-10", "11-20"]}}
+
+    normalized = normalize_hierarchy_levels(hierarchy)
+
+    assert "age" in normalized
+    assert 0 in normalized["age"]
+    assert isinstance(normalized["age"][0], np.ndarray)
+    assert normalized["age"][0].tolist() == [1, 2, 3]  # converted via parse_value_list
+    assert normalized["age"][1] == ["0-10", "11-20"]  # untouched
+
+
+def test_normalize_hierarchy_levels_multiple_columns():
+    hierarchy = {"age": {"0": ["10", "20"]}, "gender": {"0": ["M", "F"], "1": ["*"]}}
+
+    normalized = normalize_hierarchy_levels(hierarchy)
+
+    # First column
+    assert isinstance(normalized["age"][0], np.ndarray)
+    assert normalized["age"][0].tolist() == [10, 20]
+
+    # Second column
+    assert isinstance(normalized["gender"][0], np.ndarray)
+    assert normalized["gender"][0].tolist() == ["M", "F"]
+    assert normalized["gender"][1] == ["*"]
+
+
+def test_normalize_hierarchy_levels_mixed_digit_non_digit_at_level_0():
+    hierarchy = {"test": {"0": ["1", "x", "3"]}}
+
+    normalized = normalize_hierarchy_levels(hierarchy)
+
+    assert isinstance(normalized["test"][0], np.ndarray)
+    assert normalized["test"][0].tolist() == ["1", "x", "3"]
+
+
+def test_normalize_hierarchy_levels_empty_mapping():
+    hierarchy = {"col": {}}
+    normalized = normalize_hierarchy_levels(hierarchy)
+
+    assert normalized == {"col": {}}
diff --git a/tests/field_level_pseudo_anonymisation/__init__.py b/tests/field_level_pseudo_anonymisation/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/field_level_pseudo_anonymisation/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/field_level_pseudo_anonymisation/conftest.py b/tests/field_level_pseudo_anonymisation/conftest.py
new file mode 100644
index 0000000..ee54069
--- /dev/null
+++ b/tests/field_level_pseudo_anonymisation/conftest.py
@@ -0,0 +1,444 @@
+"""
+Shared pytest fixtures and helpers for field-level pseudonymisation tests.
+
+This module provides:
+- Mock Vault client for testing without real Vault connections
+- Sample data fixtures
+- Configuration fixtures for encryption/decryption operations
+- Helper functions for running ops and managing test Vault storage
+"""
+
+import pandas as pd
+import pytest
+from dagster import build_op_context
+from cryptography.fernet import Fernet
+from hvac.exceptions import InvalidPath, Forbidden
+from unittest.mock import patch, MagicMock
+
+from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
+    AnonymisePseudonymizeStructuredConfig,
+    DepseudonymizeStructuredConfig,
+    EncryptConfig,
+    DecryptConfig,
+    PseudoTechniqueConfig,
+    DepseudoTechniqueConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.ops import (
+    anonymize_pseudonymize_structured,
+    depseudonymize_structured,
+)
+
+
+# -------------------------------- Mock Vault Storage ----------------------------------------
+
+# In-memory Vault simulation for tests
+_test_vault_storage = {}
+_test_vault_access_control = {}  # For simulating access control
+
+
+@pytest.fixture(autouse=True)
+def mock_vault_client():
+    """
+    Auto-use fixture that mocks the hvac.Client to avoid real Vault connections.
+    Uses an in-memory dict to simulate Vault storage for tests.
+    Includes access control simulation for AC3.
+    """
+    global _test_vault_storage, _test_vault_access_control
+    _test_vault_storage = {}  # Reset storage before each test
+    _test_vault_access_control = {}  # Reset access control
+
+    def mock_read_secret(path, mount_point):
+        """Mock reading secret from Vault with access control"""
+        full_path = f"{mount_point}/{path}"
+
+        # Check access control first
+        if full_path in _test_vault_access_control:
+            if not _test_vault_access_control[full_path]:
+                raise Forbidden(f"Access denied to secret: {full_path}")
+
+        if full_path not in _test_vault_storage:
+            raise InvalidPath(f"Secret not found: {full_path}")
+        return {"data": {"data": {"value": _test_vault_storage[full_path]}}}
+
+    def mock_create_or_update_secret(path, mount_point, secret):
+        """Mock creating/updating secret in Vault"""
+        full_path = f"{mount_point}/{path}"
+        _test_vault_storage[full_path] = secret["value"]
+
+    def mock_delete_metadata(path, mount_point):
+        """Mock deleting secret from Vault"""
+        full_path = f"{mount_point}/{path}"
+        if full_path in _test_vault_storage:
+            del _test_vault_storage[full_path]
+        if full_path in _test_vault_access_control:
+            del _test_vault_access_control[full_path]
+
+    with patch("hvac.Client") as mock_client_class:
+        mock_instance = MagicMock()
+        mock_instance.secrets.kv.v2.read_secret_version.side_effect = mock_read_secret
+        mock_instance.secrets.kv.v2.create_or_update_secret.side_effect = (
+            mock_create_or_update_secret
+        )
+        mock_instance.secrets.kv.v2.delete_metadata_and_all_versions.side_effect = (
+            mock_delete_metadata
+        )
+        mock_client_class.return_value = mock_instance
+        yield mock_instance
+
+
+# -------------------------------- Sample Data Fixtures ----------------------------------------
+
+
+@pytest.fixture
+def sample_df():
+    """
+    Fixture providing a sample structured dataset with PII data.
+    Represents typical data that requires pseudonymisation and restoration.
+    """
+    return pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4, 5],
+            "name": [
+                "Alice Smith",
+                "Bob Jones",
+                "Charlie Brown",
+                "David Wilson",
+                "Eva Garcia",
+            ],
+            "email": [
+                "alice@example.com",
+                "bob@example.com",
+                "charlie@example.com",
+                "david@example.com",
+                "eva@example.com",
+            ],
+            "ssn": [
+                "123-45-6789",
+                "234-56-7890",
+                "345-67-8901",
+                "456-78-9012",
+                "567-89-0123",
+            ],
+            "age": [25, 30, 35, 40, 45],
+            "salary": [50000.0, 60000.0, 70000.0, 80000.0, 90000.0],
+            "department": ["HR", "IT", "Finance", "IT", "HR"],
+        }
+    )
+
+
+# -------------------------------- Configuration Fixtures ----------------------------------------
+
+
+@pytest.fixture
+def encrypt_config_single_field():
+    """
+    Configuration for encrypting a single field (email).
+    Used to create pseudonymised data for restoration tests.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def decrypt_config_single_field():
+    """
+    Configuration for decrypting a single field (email).
+    Used to restore original values.
+    """
+    return DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def encrypt_config_multiple_fields():
+    """
+    Configuration for encrypting multiple fields (name, email, ssn).
+    Tests restoration of multiple sensitive fields.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    columns=["name", "email", "ssn"],
+                    key_name="test_restoration_key_multi",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def decrypt_config_multiple_fields():
+    """
+    Configuration for decrypting multiple fields (name, email, ssn).
+    """
+    return DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["name", "email", "ssn"],
+                    key_name="test_restoration_key_multi",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def encrypt_config_partial_fields():
+    """
+    Configuration for encrypting only some fields (email, ssn).
+    Tests partial restoration scenarios.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    columns=["email", "ssn"],
+                    key_name="test_restoration_key_partial",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def decrypt_config_partial_fields():
+    """
+    Configuration for decrypting only some fields (email, ssn).
+    """
+    return DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email", "ssn"],
+                    key_name="test_restoration_key_partial",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def authorized_multi_key_scenario():
+    """
+    Fixture for testing multi-key authorization scenarios.
+    Sets up two keys: one authorized, one denied.
+    """
+    clear_vault_key("authorized_key")
+    clear_vault_key("unauthorized_key")
+
+    # Create authorized key by generating it
+    authorized_key = Fernet.generate_key().decode()
+    set_vault_key("authorized_key", authorized_key)
+
+    # Create unauthorized key and deny access
+    unauthorized_key = Fernet.generate_key().decode()
+    set_vault_key("unauthorized_key", unauthorized_key)
+    deny_vault_access("unauthorized_key")
+
+    yield {"authorized": "authorized_key", "unauthorized": "unauthorized_key"}
+
+    # Cleanup
+    clear_vault_key("authorized_key")
+    clear_vault_key("unauthorized_key")
+
+
+@pytest.fixture
+def large_dataset():
+    """
+    Fixture providing a large dataset (10,000 rows) for performance testing.
+    Reusable across multiple performance tests.
+    """
+    return pd.DataFrame(
+        {
+            "id": range(1, 10001),
+            "email": [f"user{i}@example.com" for i in range(1, 10001)],
+            "name": [f"User {i}" for i in range(1, 10001)],
+            "ssn": [f"{i:03d}-{i:02d}-{i:04d}" for i in range(1, 10001)],
+            "age": [20 + (i % 50) for i in range(1, 10001)],
+            "salary": [30000.0 + (i * 10) for i in range(1, 10001)],
+            "department": [["HR", "IT", "Finance", "Sales"][i % 4] for i in range(1, 10001)],
+        }
+    )
+
+
+@pytest.fixture(scope="session")
+def vault_test_keys():
+    """
+    Session-scoped fixture to pre-generate test keys for faster test execution.
+    Avoids repeated key generation in each test.
+    """
+    keys = {f"test_key_{i}": Fernet.generate_key().decode() for i in range(10)}
+
+    return keys
+
+
+@pytest.fixture
+def cleanup_test_keys(request):
+    """
+    Fixture to automatically cleanup test keys after each test.
+    Use with: @pytest.mark.usefixtures("cleanup_test_keys")
+    """
+    yield
+
+    # Cleanup all test keys from mock Vault
+    test_keys = [k for k in _test_vault_storage.keys() if "test_" in k]
+    for key in test_keys:
+        _test_vault_storage.pop(key, None)
+
+
+# -------------------------------- Helper Functions ----------------------------------------
+
+
+def config_to_dagster_dict(config):
+    """
+    Convert Pydantic config to Dagster-compatible dictionary.
+
+    For AnonymisePseudonymizeStructuredConfig (uses discriminated Union):
+        Pydantic v2 outputs: {'technique': {'type': 'encrypt', 'columns': [...], 'key_name': '...'}}
+        Dagster expects: {'technique': {'encrypt': {'columns': [...], 'key_name': '...'}}}
+
+    For DepseudonymizeStructuredConfig (direct DecryptConfig, no Union):
+        Pydantic v2 outputs:
+        {'technique': {'type': 'decrypt', 'columns': [...], 'key_name': '...'}}
+        Dagster expects: Same flat structure with 'type' field
+
+    Args:
+        config: Pydantic config instance
+            (AnonymisePseudonymizeStructuredConfig or
+            DepseudonymizeStructuredConfig)
+
+    Returns:
+        dict: Dagster-compatible configuration dictionary
+    """
+    from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
+        AnonymisePseudonymizeStructuredConfig,
+    )
+
+    config_dict = config.model_dump()
+
+    # Only convert discriminated unions for AnonymisePseudonymizeStructuredConfig
+    # DepseudonymizeStructuredConfig uses direct DecryptConfig (no discriminated union)
+    if isinstance(config, AnonymisePseudonymizeStructuredConfig):
+        if "used_function" in config_dict:
+            for func_config in config_dict["used_function"]:
+                if "technique" in func_config:
+                    technique = func_config["technique"]
+                    # Pydantic outputs flat dict with 'type' field for discriminated unions
+                    if isinstance(technique, dict) and "type" in technique:
+                        # Extract the type discriminator
+                        technique_type = technique["type"]
+                        # Create nested structure without the 'type' field
+                        technique_data = {k: v for k, v in technique.items() if k != "type"}
+                        # Nest under the discriminator key for Dagster
+                        func_config["technique"] = {technique_type: technique_data}
+
+    return config_dict
+
+
+def run_encrypt_op(config, df):
+    """
+    Helper function to execute the anonymize_pseudonymize_structured op.
+
+    Args:
+        config: AnonymisePseudonymizeStructuredConfig instance
+        df: Input pandas DataFrame
+
+    Returns:
+        tuple: (result_df, metrics) - Output DataFrame and metrics dict
+    """
+    context = build_op_context(op_config=config_to_dagster_dict(config))
+    result_df, metrics = anonymize_pseudonymize_structured(context, df=df)
+    return result_df.value, metrics.value
+
+
+def run_decrypt_op(config, df):
+    """
+    Helper function to execute the depseudonymize_structured op.
+
+    Args:
+        config: DepseudonymizeStructuredConfig instance
+        df: Input pandas DataFrame
+
+    Returns:
+        tuple: (result_df, metrics) - Output DataFrame and metrics dict
+    """
+    context = build_op_context(op_config=config_to_dagster_dict(config))
+    result_df, metrics = depseudonymize_structured(context, df=df)
+    return result_df.value, metrics.value
+
+
+def clear_vault_key(key_name: str):
+    """
+    Helper function to clear a key from the simulated Vault storage for test isolation.
+
+    Args:
+        key_name: Name of the key to delete from Vault
+    """
+    full_path = f"secret/PseudonymKeys/{key_name}"
+    if full_path in _test_vault_storage:
+        del _test_vault_storage[full_path]
+    if full_path in _test_vault_access_control:
+        del _test_vault_access_control[full_path]
+
+
+def set_vault_key(key_name: str, key_value: str):
+    """
+    Helper function to set a key in the simulated Vault storage.
+
+    Args:
+        key_name: Name of the key
+        key_value: Value of the key (Fernet key as string)
+    """
+    full_path = f"secret/PseudonymKeys/{key_name}"
+    _test_vault_storage[full_path] = key_value
+
+
+def deny_vault_access(key_name: str):
+    """
+    Helper function to deny access to a key for authorization testing (AC3).
+
+    Args:
+        key_name: Name of the key to deny access to
+    """
+    full_path = f"secret/PseudonymKeys/{key_name}"
+    _test_vault_access_control[full_path] = False
+
+
+def get_vault_key(key_name: str) -> bytes:
+    """
+    Helper function to retrieve a key from the simulated Vault storage.
+
+    Args:
+        key_name: Name of the key to retrieve
+
+    Returns:
+        bytes: The encryption key
+    """
+    full_path = f"secret/PseudonymKeys/{key_name}"
+    if full_path not in _test_vault_storage:
+        raise InvalidPath(f"Key not found: {key_name}")
+    return _test_vault_storage[full_path].encode()
diff --git a/tests/field_level_pseudo_anonymisation/test_config_models_coverage.py b/tests/field_level_pseudo_anonymisation/test_config_models_coverage.py
new file mode 100644
index 0000000..010b9a6
--- /dev/null
+++ b/tests/field_level_pseudo_anonymisation/test_config_models_coverage.py
@@ -0,0 +1,633 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
+    AnonymisePseudonymizeStructuredConfig,
+    DepseudonymizeStructuredConfig,
+    PseudoTechniqueConfig,
+    DepseudoTechniqueConfig,
+    HashConfig,
+    EncryptConfig,
+    RedactConfig,
+    ReplaceConfig,
+    DecryptConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.config_models.unstructured_config import (
+    AnonymisePseudonymizeUnstructuredConfig,
+    DepseudonymizeUnstructuredConfig,
+    PseudoTechniqueConfig as UnstructuredPseudoTechniqueConfig,
+    DepseudoTechniqueConfig as UnstructuredDepseudoTechniqueConfig,
+    HashConfig as UnstructuredHashConfig,
+    EncryptConfig as UnstructuredEncryptConfig,
+    RedactConfig as UnstructuredRedactConfig,
+    ReplaceConfig as UnstructuredReplaceConfig,
+    RetainConfig,
+    DecryptConfig as UnstructuredDecryptConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.config_models.languages import LanguageEnum
+from template_code_location.field_level_pseudo_anonymisation.config_models.pii_entities import PIIEntityEnum
+
+
+# ==================== Structured Config Tests ====================
+
+class TestStructuredConfigValidators:
+    """Tests for structured_config.py validators and validators."""
+    
+    def test_ensure_unique_columns_valid_single_technique(self):
+        """Test that single technique with single column passes validation."""
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        columns=["email"],
+                        key_name="key1"
+                    )
+                )
+            ]
+        )
+        assert config is not None
+        assert len(config.used_function) == 1
+    
+    def test_ensure_unique_columns_valid_multiple_techniques_different_columns(self):
+        """Test that multiple techniques with different columns passes validation."""
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        columns=["email"],
+                        key_name="key1"
+                    )
+                ),
+                PseudoTechniqueConfig(
+                    technique=HashConfig(
+                        columns=["ssn"],
+                        algorithm="sha256"
+                    )
+                )
+            ]
+        )
+        assert config is not None
+        assert len(config.used_function) == 2
+    
+    def test_ensure_unique_columns_duplicate_columns_same_technique(self):
+        """Test that duplicate columns in different techniques raises error."""
+        with pytest.raises(ValueError) as exc_info:
+            AnonymisePseudonymizeStructuredConfig(
+                used_function=[
+                    PseudoTechniqueConfig(
+                        technique=EncryptConfig(
+                            columns=["email"],
+                            key_name="key1"
+                        )
+                    ),
+                    PseudoTechniqueConfig(
+                        technique=HashConfig(
+                            columns=["email"],
+                            algorithm="sha256"
+                        )
+                    )
+                ]
+            )
+        assert "Duplicate column" in str(exc_info.value)
+        assert "email" in str(exc_info.value)
+    
+    def test_ensure_unique_columns_multiple_duplicates(self):
+        """Test error message with multiple duplicate columns."""
+        with pytest.raises(ValueError) as exc_info:
+            AnonymisePseudonymizeStructuredConfig(
+                used_function=[
+                    PseudoTechniqueConfig(
+                        technique=EncryptConfig(
+                            columns=["email", "phone"],
+                            key_name="key1"
+                        )
+                    ),
+                    PseudoTechniqueConfig(
+                        technique=HashConfig(
+                            columns=["email", "phone"],
+                            algorithm="sha256"
+                        )
+                    )
+                ]
+            )
+        error_msg = str(exc_info.value)
+        assert "Duplicate column" in error_msg
+        assert "email" in error_msg
+        assert "phone" in error_msg
+    
+    def test_collect_column_to_techniques_single_technique(self):
+        """Test _collect_column_to_techniques with single technique."""
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        columns=["email", "phone"],
+                        key_name="key1"
+                    )
+                )
+            ]
+        )
+        mapping = config._collect_column_to_techniques()
+        assert mapping == {
+            "email": ["encrypt"],
+            "phone": ["encrypt"]
+        }
+    
+    def test_extract_technique_and_columns_dict_with_type_field(self):
+        """Test _extract_technique_and_columns with dict containing 'type' field."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {
+                "technique": {
+                    "type": "encrypt",
+                    "columns": ["email", "ssn"],
+                    "key_name": "test_key"
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert columns == ["email", "ssn"]
+    
+    def test_extract_technique_and_columns_dict_with_variant_mapping(self):
+        """Test _extract_technique_and_columns with variant-key mapping {'hash': {...}}."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {
+                "technique": {
+                    "encrypt": {
+                        "columns": ["ssn"],
+                        "key_name": "test_key"
+                    }
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert columns == ["ssn"]
+    
+    def test_extract_technique_and_columns_model_instance(self):
+        """Test _extract_technique_and_columns with PseudoTechniqueConfig model instance."""
+        pseudo_config = PseudoTechniqueConfig(
+            technique=RedactConfig(columns=["address"])
+        )
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(pseudo_config)
+        assert technique_type == "redact"
+        assert columns == ["address"]
+    
+    def test_extract_technique_and_columns_empty_dict(self):
+        """Test _extract_technique_and_columns with empty dict."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {"technique": {}}
+        )
+        assert technique_type is None
+        assert columns == []
+    
+    def test_extract_technique_and_columns_none_technique(self):
+        """Test _extract_technique_and_columns with None technique."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {"technique": None}
+        )
+        assert technique_type is None
+        assert columns == []
+    
+    def test_extract_technique_and_columns_missing_columns_key(self):
+        """Test _extract_technique_and_columns when 'columns' key is missing."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {
+                "technique": {
+                    "type": "encrypt",
+                    "key_name": "test_key"
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert columns == []
+    
+    def test_extract_technique_and_columns_model_without_columns_attr(self):
+        """Test _extract_technique_and_columns with model instance missing columns attribute."""
+        pseudo_config = PseudoTechniqueConfig(
+            technique=ReplaceConfig(columns=["old_value"], new_value="NEW")
+        )
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(pseudo_config)
+        assert technique_type == "replace"
+        assert columns == ["old_value"]
+
+
+class TestStructuredDepseudonymizeConfig:
+    """Tests for DepseudonymizeStructuredConfig."""
+    
+    def test_depseudonymize_config_normalize_used_function_with_dict(self):
+        """Test _normalize_depseudo_used_function with dict input."""
+        config = DepseudonymizeStructuredConfig(
+            used_function=[
+                {
+                    "technique": {
+                        "type": "decrypt",
+                        "columns": ["email"],
+                        "key_name": "key1"
+                    }
+                }
+            ]
+        )
+        assert len(config.used_function) == 1
+        assert isinstance(config.used_function[0], DepseudoTechniqueConfig)
+        assert config.used_function[0].technique.type == "decrypt"
+    
+    def test_depseudonymize_config_normalize_used_function_with_model(self):
+        """Test _normalize_depseudo_used_function with model instance."""
+        depseudo_tech = DepseudoTechniqueConfig(
+            technique=DecryptConfig(
+                columns=["email"],
+                key_name="key1"
+            )
+        )
+        config = DepseudonymizeStructuredConfig(
+            used_function=[depseudo_tech]
+        )
+        assert len(config.used_function) == 1
+        assert config.used_function[0] is depseudo_tech
+    
+    def test_depseudonymize_config_ensure_unique_columns_no_op(self):
+        """Test that ensure_unique_columns is a no-op for depseudonymize."""
+        # For depseudonymize, there's no per-column uniqueness constraint
+        config = DepseudonymizeStructuredConfig(
+            used_function=[
+                DepseudoTechniqueConfig(
+                    technique=DecryptConfig(
+                        columns=["email"],
+                        key_name="key1"
+                    )
+                ),
+                DepseudoTechniqueConfig(
+                    technique=DecryptConfig(
+                        columns=["email"],
+                        key_name="key2"
+                    )
+                )
+            ]
+        )
+        # Should not raise - no-op validator
+        assert config is not None
+
+
+# ==================== Unstructured Config Tests ====================
+
+class TestUnstructuredConfigValidators:
+    """Tests for unstructured_config.py validators."""
+    
+    def test_normalize_used_function_with_dict(self):
+        """Test _normalize_used_function with dict input."""
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[
+                {
+                    "technique": {
+                        "encrypt": {
+                            "pii": [PIIEntityEnum.EMAIL.value],
+                            "key_name": "key1"
+                        }
+                    }
+                }
+            ]
+        )
+        assert len(config.used_function) == 1
+    
+    def test_normalize_used_function_with_model(self):
+        """Test _normalize_used_function with model instance."""
+        pseudo_tech = UnstructuredPseudoTechniqueConfig(
+            technique=UnstructuredEncryptConfig(
+                pii=[PIIEntityEnum.EMAIL.value],
+                key_name="key1"
+            )
+        )
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[pseudo_tech]
+        )
+        assert len(config.used_function) == 1
+    
+    def test_ensure_unique_pii_valid_different_pii_types(self):
+        """Test that different PII types pass validation."""
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredEncryptConfig(
+                        pii=[PIIEntityEnum.EMAIL.value],
+                        key_name="key1"
+                    )
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredHashConfig(
+                        pii=[PIIEntityEnum.PERSON.value],
+                        algorithm="sha256"
+                    )
+                )
+            ]
+        )
+        assert config is not None
+        assert len(config.used_function) == 2
+    
+    def test_ensure_unique_pii_duplicate_pii_types(self):
+        """Test that duplicate PII types raise error."""
+        with pytest.raises(ValueError) as exc_info:
+            AnonymisePseudonymizeUnstructuredConfig(
+                language=LanguageEnum.en,
+                used_function=[
+                    UnstructuredPseudoTechniqueConfig(
+                        technique=UnstructuredEncryptConfig(
+                            pii=[PIIEntityEnum.EMAIL.value],
+                            key_name="key1"
+                        )
+                    ),
+                    UnstructuredPseudoTechniqueConfig(
+                        technique=UnstructuredHashConfig(
+                            pii=[PIIEntityEnum.EMAIL.value],
+                            algorithm="sha256"
+                        )
+                    )
+                ]
+            )
+        assert "Duplicate PII" in str(exc_info.value)
+        # Error message shows PIIEntityEnum.EMAIL (the enum repr) rather than the value
+        assert "EMAIL" in str(exc_info.value)
+    
+    def test_collect_pii_to_techniques_single_technique(self):
+        """Test _collect_pii_to_techniques with single technique."""
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredEncryptConfig(
+                        pii=[PIIEntityEnum.EMAIL.value, PIIEntityEnum.PERSON.value],
+                        key_name="key1"
+                    )
+                )
+            ]
+        )
+        mapping = config._collect_pii_to_techniques()
+        assert mapping == {
+            PIIEntityEnum.EMAIL.value: ["encrypt"],
+            PIIEntityEnum.PERSON.value: ["encrypt"]
+        }
+    
+    def test_extract_technique_and_pii_dict_with_type_field(self):
+        """Test _extract_technique_and_pii with dict containing 'type' field."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {
+                "technique": {
+                    "type": "encrypt",
+                    "pii": [PIIEntityEnum.EMAIL.value],
+                    "key_name": "test_key"
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert piis == [PIIEntityEnum.EMAIL.value]
+    
+    def test_extract_technique_and_pii_dict_with_variant_mapping(self):
+        """Test _extract_technique_and_pii with variant-key mapping."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {
+                "technique": {
+                    "hash": {
+                        "pii": [PIIEntityEnum.PERSON.value],
+                        "algorithm": "sha256"
+                    }
+                }
+            }
+        )
+        assert technique_type == "hash"
+        assert piis == [PIIEntityEnum.PERSON.value]
+    
+    def test_extract_technique_and_pii_dict_fallback_to_columns(self):
+        """Test _extract_technique_and_pii fallback to 'columns' key when 'pii' is missing."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {
+                "technique": {
+                    "type": "redact",
+                    "columns": ["fallback_col"]
+                }
+            }
+        )
+        assert technique_type == "redact"
+        assert piis == ["fallback_col"]
+    
+    def test_extract_technique_and_pii_model_instance(self):
+        """Test _extract_technique_and_pii with model instance."""
+        pseudo_tech = UnstructuredPseudoTechniqueConfig(
+            technique=UnstructuredRedactConfig(
+                pii=[PIIEntityEnum.EMAIL.value]
+            )
+        )
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(pseudo_tech)
+        assert technique_type == "redact"
+        assert piis == [PIIEntityEnum.EMAIL.value]
+    
+    def test_extract_technique_and_pii_model_with_getattr_fallback(self):
+        """Test _extract_technique_and_pii model with getattr fallback to columns."""
+        # Create a mock-like scenario where pii attribute doesn't exist
+        pseudo_tech = UnstructuredPseudoTechniqueConfig(
+            technique=RetainConfig(pii=[PIIEntityEnum.PERSON.value])
+        )
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(pseudo_tech)
+        assert technique_type == "retain"
+        assert piis == [PIIEntityEnum.PERSON.value]
+    
+    def test_extract_technique_and_pii_empty_dict(self):
+        """Test _extract_technique_and_pii with empty dict."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {"technique": {}}
+        )
+        assert technique_type is None
+        assert piis == []
+    
+    def test_extract_technique_and_pii_missing_pii_key(self):
+        """Test _extract_technique_and_pii when 'pii' key is missing."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {
+                "technique": {
+                    "type": "encrypt",
+                    "key_name": "test_key"
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert piis == []
+
+
+class TestUnstructuredDepseudonymizeConfig:
+    """Tests for DepseudonymizeUnstructuredConfig."""
+    
+    def test_depseudonymize_unstructured_config_default(self):
+        """Test default DepseudonymizeUnstructuredConfig."""
+        config = DepseudonymizeUnstructuredConfig()
+        assert config is not None
+        assert len(config.used_function) >= 1
+    
+    def test_depseudonymize_unstructured_config_with_custom_function(self):
+        """Test DepseudonymizeUnstructuredConfig with custom function."""
+        config = DepseudonymizeUnstructuredConfig(
+            used_function=[
+                UnstructuredDepseudoTechniqueConfig(
+                    technique=UnstructuredDecryptConfig(
+                        key_name="custom_key"
+                    )
+                )
+            ]
+        )
+        assert len(config.used_function) == 1
+        assert config.used_function[0].technique.key_name == "custom_key"
+
+
+class TestLanguageSupport:
+    """Tests for language configuration support."""
+    
+    def test_all_supported_languages(self):
+        """Test that all supported languages can be set."""
+        supported_languages = [
+            LanguageEnum.hr, LanguageEnum.da, LanguageEnum.nl, LanguageEnum.en,
+            LanguageEnum.fi, LanguageEnum.fr, LanguageEnum.de, LanguageEnum.el,
+            LanguageEnum.it, LanguageEnum.lt, LanguageEnum.pl, LanguageEnum.pt,
+            LanguageEnum.ro, LanguageEnum.sl, LanguageEnum.es, LanguageEnum.sv
+        ]
+        
+        for lang in supported_languages:
+            config = AnonymisePseudonymizeUnstructuredConfig(language=lang)
+            assert config.language == lang
+    
+    def test_default_language_is_english(self):
+        """Test that default language is English."""
+        config = AnonymisePseudonymizeUnstructuredConfig()
+        assert config.language == LanguageEnum.en
+
+
+class TestTechniqueConfigDefaults:
+    """Tests for technique config defaults."""
+    
+    def test_hash_config_default_algorithm(self):
+        """Test HashConfig default algorithm."""
+        config = HashConfig()
+        assert config.algorithm == "sha256"
+        assert config.type == "hash"
+    
+    def test_encrypt_config_defaults(self):
+        """Test EncryptConfig defaults."""
+        config = EncryptConfig()
+        assert config.type == "encrypt"
+        assert config.key_name == "my_key"
+    
+    def test_redact_config_defaults(self):
+        """Test RedactConfig defaults."""
+        config = RedactConfig()
+        assert config.type == "redact"
+    
+    def test_replace_config_defaults(self):
+        """Test ReplaceConfig defaults."""
+        config = ReplaceConfig()
+        assert config.type == "replace"
+        assert config.new_value == "REPLACED"
+    
+    def test_decrypt_config_defaults(self):
+        """Test DecryptConfig defaults."""
+        config = DecryptConfig()
+        assert config.type == "decrypt"
+        assert config.key_name == "my_key"
+    
+    def test_unstructured_retain_config_defaults(self):
+        """Test RetainConfig defaults."""
+        config = RetainConfig()
+        assert config.type == "retain"
+
+
+class TestPseudoTechniqueConfigDefaults:
+    """Tests for PseudoTechniqueConfig defaults."""
+    
+    def test_pseudo_technique_default_to_hash(self):
+        """Test PseudoTechniqueConfig defaults to hash technique."""
+        config = PseudoTechniqueConfig()
+        # For Dagster Config, technique may be a dict with the discriminator structure
+        if isinstance(config.technique, dict):
+            # Check if it has hash configuration
+            assert "hash" in config.technique or config.technique.get("type") == "hash"
+        else:
+            assert config.technique.type == "hash"
+    
+    def test_unstructured_pseudo_technique_default_to_hash(self):
+        """Test UnstructuredPseudoTechniqueConfig defaults to hash technique."""
+        config = UnstructuredPseudoTechniqueConfig()
+        # For Dagster Config, technique may be a dict with the discriminator structure
+        if isinstance(config.technique, dict):
+            # Check if it has hash configuration
+            assert "hash" in config.technique or config.technique.get("type") == "hash"
+        else:
+            assert config.technique.type == "hash"
+
+
+class TestConfigModelIntegration:
+    """Integration tests for config models."""
+    
+    def test_structured_config_with_all_technique_types(self):
+        """Test structured config with all technique types."""
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=HashConfig(columns=["col1"])
+                ),
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(columns=["col2"], key_name="k1")
+                ),
+                PseudoTechniqueConfig(
+                    technique=RedactConfig(columns=["col3"])
+                ),
+                PseudoTechniqueConfig(
+                    technique=ReplaceConfig(columns=["col4"], new_value="X")
+                )
+            ]
+        )
+        assert len(config.used_function) == 4
+        techniques = {f.technique.type for f in config.used_function}
+        assert techniques == {"hash", "encrypt", "redact", "replace"}
+    
+    def test_unstructured_config_with_all_technique_types(self):
+        """Test unstructured config with all technique types."""
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredHashConfig(pii=[PIIEntityEnum.EMAIL.value])
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredEncryptConfig(
+                        pii=[PIIEntityEnum.PERSON.value],
+                        key_name="k1"
+                    )
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredRedactConfig(pii=[PIIEntityEnum.PHONE_NUMBERS.value])
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredReplaceConfig(
+                        pii=[PIIEntityEnum.CREDIT_CARD.value],
+                        new_value="X"
+                    )
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=RetainConfig(pii=[PIIEntityEnum.DATE_OF_BIRTH.value])
+                )
+            ]
+        )
+        assert len(config.used_function) == 5
+        techniques = {f.technique.type for f in config.used_function}
+        assert techniques == {"hash", "encrypt", "redact", "replace", "retain"}
diff --git a/tests/field_level_pseudo_anonymisation/test_decrypt_structured.py b/tests/field_level_pseudo_anonymisation/test_decrypt_structured.py
new file mode 100644
index 0000000..9ed013a
--- /dev/null
+++ b/tests/field_level_pseudo_anonymisation/test_decrypt_structured.py
@@ -0,0 +1,1090 @@
+"""
+Test suite for data restoration (depseudonymization) operations.
+
+This test suite validates the data restoration feature against the following Acceptance Criteria:
+
+## Test Coverage Summary
+
+### Acceptance Criteria Coverage:
+- AC1 (Data Restoration with Valid Key): 7 tests
+- AC2 (Restoration Denial - Missing Key): 3 tests
+- AC3 (Restoration Denial - Unauthorized Access): 2 tests
+- AC4 (Restoration Denial - Invalid Key): 3 tests
+- Additional Coverage: 3 tests
+
+### Test Pattern:
+- Each test uses build_op_context with .model_dump() for configuration
+- Tests validate dual outputs (data, metrics)
+- Tests verify complete restoration of original values
+- Tests validate security controls and error handling
+
+"""
+
+import pandas as pd
+import pytest
+from cryptography.fernet import Fernet
+
+from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
+    AnonymisePseudonymizeStructuredConfig,
+    DepseudonymizeStructuredConfig,
+    EncryptConfig,
+    DecryptConfig,
+    PseudoTechniqueConfig,
+    DepseudoTechniqueConfig,
+)
+
+# Import helper functions (fixtures are auto-discovered by pytest)
+from .conftest import (
+    run_encrypt_op,
+    run_decrypt_op,
+    clear_vault_key,
+    set_vault_key,
+    deny_vault_access,
+    get_vault_key,
+)
+
+
+# -------------------------------- Test Markers Configuration --------------------------------
+
+# Register custom markers
+pytest.mark.slow = pytest.mark.slow
+pytest.mark.security = pytest.mark.security
+pytest.mark.edge_case = pytest.mark.edge_case
+pytest.mark.integration = pytest.mark.integration
+
+
+# ---------------------- AC1: Data Restoration with Valid Key --------------------------------
+
+
+def test_ac1_restore_single_encrypted_field_with_valid_key(
+    sample_df, encrypt_config_single_field, decrypt_config_single_field
+):
+    """
+    AC1: Data Restoration using Secret Management Tool-Stored Decryption Key
+
+    Scenario: Restore encrypted field with a valid key
+    Given: A pseudonymised dataset with encrypted email field
+    And: A valid decryption key stored in secret management tool
+    And: The participant provided the field that needs to be restored (email)
+    And: The participant is authorized
+    When: The participant requests data restoration
+    And: Provides the correct key name
+    Then: The system retrieves the key from secret management tool
+    And: Decrypts the dataset accurately
+    And: All original values are restored
+    And: A success message is presented to the user (via successful return)
+    And: The result is presented to the user
+    """
+    # Clear any existing test key
+    clear_vault_key("test_restoration_key_single")
+
+    # Step 1: Encrypt the data (pseudonymisation phase)
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Verify encryption occurred
+    assert not encrypted_df["email"].equals(sample_df["email"]), "Email field should be encrypted"
+
+    # Verify key was created in Vault
+    key = get_vault_key("test_restoration_key_single")
+    assert key is not None, "Encryption key should exist in Vault"
+
+    # Step 2: Restore the data (depseudonymisation phase)
+    restored_df, metrics = run_decrypt_op(decrypt_config_single_field, encrypted_df.copy())
+
+    # Verify restoration succeeded
+    assert restored_df is not None, "Restored DataFrame should not be None"
+    assert metrics is not None, "Metrics should not be None"
+
+    # Verify all original values are restored exactly
+    assert restored_df["email"].equals(
+        sample_df["email"]
+    ), "Email field should be restored to original values"
+
+    # Verify each individual value
+    for idx, (original, restored) in enumerate(zip(sample_df["email"], restored_df["email"])):
+        assert (
+            original == restored
+        ), f"Row {idx}: Original '{original}' should match restored '{restored}'"
+
+    # Verify row count preserved
+    assert len(restored_df) == len(sample_df), "Row count should be preserved during restoration"
+
+    # Verify non-encrypted columns remain unchanged
+    assert restored_df["name"].equals(
+        sample_df["name"]
+    ), "Non-encrypted fields should remain unchanged"
+    assert restored_df["age"].equals(
+        sample_df["age"]
+    ), "Non-encrypted fields should remain unchanged"
+    assert restored_df["department"].equals(
+        sample_df["department"]
+    ), "Non-encrypted fields should remain unchanged"
+
+
+def test_ac1_restore_multiple_encrypted_fields_with_valid_key(
+    sample_df, encrypt_config_multiple_fields, decrypt_config_multiple_fields
+):
+    """
+    AC1: Data Restoration of multiple encrypted fields with a valid key
+
+    Scenario: Restore multiple encrypted fields (name, email, ssn) with a valid key
+    Given: A pseudonymised dataset with multiple encrypted fields
+    And: A valid decryption key stored in secret management tool
+    And: The participant provided the fields that need to be restored
+    When: The participant requests data restoration
+    Then: All specified fields are decrypted accurately
+    And: All original values are restored
+    """
+    clear_vault_key("test_restoration_key_multi")
+
+    # Encrypt multiple fields
+    encrypted_df, _ = run_encrypt_op(encrypt_config_multiple_fields, sample_df.copy())
+
+    # Verify all specified fields were encrypted
+    assert not encrypted_df["name"].equals(sample_df["name"]), "Name should be encrypted"
+    assert not encrypted_df["email"].equals(sample_df["email"]), "Email should be encrypted"
+    assert not encrypted_df["ssn"].equals(sample_df["ssn"]), "SSN should be encrypted"
+
+    # Restore all encrypted fields
+    restored_df, _ = run_decrypt_op(decrypt_config_multiple_fields, encrypted_df.copy())
+
+    # Verify all fields restored to original values
+    assert restored_df["name"].equals(
+        sample_df["name"]
+    ), "Name field should be restored to original values"
+    assert restored_df["email"].equals(
+        sample_df["email"]
+    ), "Email field should be restored to original values"
+    assert restored_df["ssn"].equals(
+        sample_df["ssn"]
+    ), "SSN field should be restored to original values"
+
+    # Verify non-encrypted columns remain unchanged
+    assert restored_df["age"].equals(
+        sample_df["age"]
+    ), "Non-encrypted fields should remain unchanged"
+    assert restored_df["salary"].equals(
+        sample_df["salary"]
+    ), "Non-encrypted fields should remain unchanged"
+
+
+def test_ac1_restore_partial_fields_leaves_others_encrypted(
+    sample_df, encrypt_config_multiple_fields
+):
+    """
+    AC1: Partial restoration - participant specifies only some fields to restore
+
+    Scenario: Restore only selected fields while leaving others encrypted
+    Given: A pseudonymised dataset with multiple encrypted fields (name, email, ssn)
+    And: The participant specifies only some fields to restore (e.g., only email)
+    When: The participant requests partial restoration
+    Then: Only the specified fields are decrypted
+    And: Other encrypted fields remain encrypted
+    """
+    clear_vault_key("test_restoration_key_multi")
+
+    # Encrypt multiple fields
+    encrypted_df, _ = run_encrypt_op(encrypt_config_multiple_fields, sample_df.copy())
+
+    # Create config to restore only email field
+    partial_decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],  # Only restore email
+                    key_name="test_restoration_key_multi",
+                )
+            )
+        ]
+    )
+
+    # Restore only email field
+    restored_df, _ = run_decrypt_op(partial_decrypt_config, encrypted_df.copy())
+
+    # Verify email is restored
+    assert restored_df["email"].equals(
+        sample_df["email"]
+    ), "Email field should be restored to original values"
+
+    # Verify other fields remain encrypted (different from original)
+    assert not restored_df["name"].equals(sample_df["name"]), "Name field should remain encrypted"
+    assert not restored_df["ssn"].equals(sample_df["ssn"]), "SSN field should remain encrypted"
+
+
+def test_ac1_restore_preserves_data_types(sample_df):
+    """
+    AC1: Data restoration preserves original data types for all fields
+
+    Scenario: Restore encrypted numeric and string fields
+    Given: A dataset with mixed data types (strings, integers, floats)
+    When: Fields are encrypted and then restored
+    Then: Original data types are preserved after restoration
+    """
+    # Create config to encrypt mixed types
+    encrypt_config = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    columns=["name", "age", "salary"],
+                    key_name="test_restoration_types",
+                )
+            )
+        ]
+    )
+
+    decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["name", "age", "salary"],
+                    key_name="test_restoration_types",
+                )
+            )
+        ]
+    )
+
+    clear_vault_key("test_restoration_types")
+
+    # Encrypt and restore
+    encrypted_df, _ = run_encrypt_op(encrypt_config, sample_df.copy())
+    restored_df, _ = run_decrypt_op(decrypt_config, encrypted_df.copy())
+
+    # Verify values are restored (as strings due to encryption/decryption)
+    # Note: Fernet encryption/decryption converts everything to strings
+    # This is expected behavior - original types are preserved via string representation
+    assert (
+        restored_df["name"].tolist() == sample_df["name"].tolist()
+    ), "String values should be restored"
+    assert (
+        restored_df["age"].tolist() == sample_df["age"].astype(str).tolist()
+    ), "Integer values should be restored as strings"
+    assert (
+        restored_df["salary"].tolist() == sample_df["salary"].astype(str).tolist()
+    ), "Float values should be restored as strings"
+
+
+def test_ac1_restore_empty_dataframe(encrypt_config_single_field, decrypt_config_single_field):
+    """
+    AC1: Edge case - restore an empty dataset
+
+    Scenario: Attempt to restore an empty pseudonymised dataset
+    Given: An empty DataFrame with correct schema
+    When: Restoration is attempted
+    Then: Operation completes successfully without errors
+    And: Returns an empty DataFrame
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Create empty DataFrame with same schema
+    empty_df = pd.DataFrame(columns=["id", "name", "email", "ssn", "age", "salary", "department"])
+
+    # Encrypt (should handle empty DataFrame)
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, empty_df.copy())
+
+    # Restore (should also handle empty DataFrame)
+    restored_df, metrics = run_decrypt_op(decrypt_config_single_field, encrypted_df.copy())
+
+    # Verify empty DataFrame returned
+    assert len(restored_df) == 0, "Restored DataFrame should be empty"
+    assert list(restored_df.columns) == list(empty_df.columns), "Column schema should be preserved"
+
+
+def test_ac1_restore_with_special_characters(
+    encrypt_config_single_field, decrypt_config_single_field
+):
+    """
+    AC1: Data restoration with special characters and edge case values
+
+    Scenario: Restore data containing special characters, unicode, etc.
+    Given: A dataset with special characters in string fields
+    When: Data is encrypted and then restored
+    Then: All special characters are preserved accurately
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Create DataFrame with special characters
+    special_df = pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4],
+            "name": ["José García", "François Müller", "李明", "O'Brien"],
+            "email": [
+                "josé@example.com",
+                "françois@example.com",
+                "li@example.cn",
+                "o'brien@example.ie",
+            ],
+            "ssn": ["123-45-6789", "234-56-7890", "345-67-8901", "456-78-9012"],
+            "age": [25, 30, 35, 40],
+            "salary": [50000.0, 60000.0, 70000.0, 80000.0],
+            "department": ["HR", "IT", "Finance", "IT"],
+        }
+    )
+
+    # Encrypt and restore
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, special_df.copy())
+    restored_df, _ = run_decrypt_op(decrypt_config_single_field, encrypted_df.copy())
+
+    # Verify special characters preserved
+    assert restored_df["email"].equals(
+        special_df["email"]
+    ), "Special characters should be preserved during restoration"
+
+    for idx, (original, restored) in enumerate(zip(special_df["email"], restored_df["email"])):
+        assert (
+            original == restored
+        ), f"Row {idx}: Special characters in '{original}' should be preserved"
+
+
+# ------------------- AC2: Restoration Denial when Key is Missing ----------------------------
+
+
+def test_ac2_restore_fails_when_key_missing(sample_df, encrypt_config_single_field):
+    """
+    AC2: Restoration Denial when Decryption Key is missing
+
+    Scenario: Attempt to restore encrypted fields when decryption key is missing
+    Given: A pseudonymised dataset
+    And: The decryption key is missing from Vault
+    And: The participant provides the correct key name
+    When: The participant attempts to restore the data
+    Then: The system fails the restoration request
+    And: Logs the failed key retrieval for auditing (via exception)
+    And: An error message is presented to the user
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Encrypt data first
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Delete the key from Vault to simulate missing key
+    clear_vault_key("test_restoration_key_single")
+
+    # Create decrypt config with missing key
+    decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+    # Attempt restoration - should fail with clear error
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config, encrypted_df.copy())
+
+    # Verify error message is informative
+    error_message = str(exc_info.value)
+    assert (
+        "not found" in error_message.lower() or "decrypt" in error_message.lower()
+    ), "Error message should indicate key not found for decrypt operation"
+    assert (
+        "test_restoration_key_single" in error_message
+    ), "Error message should include the key name for auditing"
+
+
+def test_ac2_restore_fails_with_nonexistent_key_name(sample_df, encrypt_config_single_field):
+    """
+    AC2: Restoration fails when using a key name that never existed
+
+    Scenario: Attempt to restore with a key name that was never created
+    Given: A pseudonymised dataset
+    And: A key name that does not exist in Vault
+    When: The participant attempts to restore the data
+    Then: The system fails the restoration request with appropriate error
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Encrypt data with one key
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Try to decrypt with a different, non-existent key
+    decrypt_config_wrong_key = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt", columns=["email"], key_name="nonexistent_key_name"
+                )
+            )
+        ]
+    )
+
+    # Attempt restoration - should fail
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config_wrong_key, encrypted_df.copy())
+
+    error_message = str(exc_info.value)
+    assert "not found" in error_message.lower(), "Error message should indicate key not found"
+
+
+def test_ac2_restore_fails_when_key_corrupted(sample_df, encrypt_config_single_field):
+    """
+    AC2: Restoration Denial when Decryption Key is corrupted
+
+    Scenario: Attempt to restore when key is corrupted in Vault
+    Given: A pseudonymised dataset
+    And: The decryption key is corrupted (invalid format)
+    When: The participant attempts to restore the data
+    Then: The system fails the restoration request
+    And: An appropriate error message is presented
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Encrypt data first
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Corrupt the key by replacing it with invalid data
+    set_vault_key("test_restoration_key_single", "corrupted_invalid_key_data")
+
+    # Create decrypt config
+    decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+    # Attempt restoration - should fail due to corrupted key
+    with pytest.raises(Exception) as exc_info:
+        run_decrypt_op(decrypt_config, encrypted_df.copy())
+
+    # Should raise either ValueError or Fernet-related exception
+    assert "Fernet" in str(type(exc_info.value)) or "ValueError" in str(
+        type(exc_info.value)
+    ), "Should raise Fernet or ValueError for corrupted key"
+
+
+# ------------- AC3: Restoration Denial when Access is Unauthorized --------------------------
+
+
+def test_ac3_restore_fails_when_access_unauthorized(sample_df, encrypt_config_single_field):
+    """
+    AC3: Restoration Denial when Decryption Key access is unauthorized
+
+    Scenario: Attempt to restore encrypted fields without authorization
+    Given: A pseudonymised dataset
+    And: A decryption key in secret management tool
+    And: The participant is not authorized to access the key
+    When: The participant attempts to restore the data
+    Then: The system denies the participant access to the key
+    And: The system denies the initiation of the restoration process
+    And: The system logs the unauthorized access attempt (via exception)
+    And: An appropriate error message is presented to the user
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Encrypt data first
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Set access control to deny access
+    deny_vault_access("test_restoration_key_single")
+
+    # Create decrypt config
+    decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+    # Attempt restoration - should fail with ValueError (wrapping Forbidden)
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config, encrypted_df.copy())
+
+    # Verify error indicates access denial
+    error_message = str(exc_info.value)
+    assert (
+        "access denied" in error_message.lower() or "error while reading" in error_message.lower()
+    ), "Error message should indicate access denial or error reading key"
+    assert (
+        "test_restoration_key_single" in error_message
+    ), "Error message should include the key name for auditing"
+
+
+def test_ac3_restore_multiple_keys_with_mixed_authorization(sample_df):
+    """
+    AC3: Restoration with mixed authorization - some keys authorized, others not
+
+    Scenario: Attempt to restore multiple fields where user has access to some keys but not others
+    Given: A pseudonymised dataset with multiple encrypted fields using different keys
+    And: The participant is authorized for some keys but not others
+    When: The participant attempts to restore all fields
+    Then: The system denies access when unauthorized key is encountered
+    """
+    # Encrypt email with one key, ssn with another
+    encrypt_config_multi_keys = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt", columns=["email"], key_name="authorized_key"
+                )
+            )
+        ]
+    )
+
+    clear_vault_key("authorized_key")
+    clear_vault_key("unauthorized_key")
+
+    # Encrypt data
+    encrypted_df, _ = run_encrypt_op(encrypt_config_multi_keys, sample_df.copy())
+
+    # Manually encrypt another field with different key (simulating separate encryption)
+    encrypt_config_ssn = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt", columns=["ssn"], key_name="unauthorized_key"
+                )
+            )
+        ]
+    )
+    encrypted_df, _ = run_encrypt_op(encrypt_config_ssn, encrypted_df.copy())
+
+    # Deny access to unauthorized_key
+    deny_vault_access("unauthorized_key")
+
+    # Try to decrypt both fields
+    decrypt_config_both = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt", columns=["email"], key_name="authorized_key"
+                )
+            ),
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt", columns=["ssn"], key_name="unauthorized_key"
+                )
+            ),
+        ]
+    )
+
+    # Should fail when trying to access unauthorized_key with ValueError (wrapping Forbidden)
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config_both, encrypted_df.copy())
+
+    # Verify error indicates access issue with unauthorized key
+    error_message = str(exc_info.value)
+    assert (
+        "access denied" in error_message.lower() or "error while reading" in error_message.lower()
+    ), "Error message should indicate access denial"
+    assert "unauthorized_key" in error_message, "Error message should mention the unauthorized key"
+
+
+# ------------------- AC4: Restoration Denial when Key is Invalid ----------------------------
+
+
+def test_ac4_restore_fails_with_wrong_key(sample_df):
+    """
+    AC4: Restoration Denial when Decryption Key is invalid
+
+    Scenario: Attempt to restore encrypted fields with a key that doesn't match the encryption key
+    Given: A pseudonymised dataset encrypted with key A
+    And: A different valid decryption key B is stored in secret management tool
+    And: The participant provides key B (which is not the correct key)
+    And: Key B does not correspond to the fields to be restored
+    When: The participant attempts to restore the data
+    Then: The system fails the restoration request
+    And: Logs the failed decryption attempt for auditing (via exception)
+    And: An error message is presented to the user
+    """
+    # Encrypt with one key
+    encrypt_config_key_a = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt", columns=["email"], key_name="encryption_key_a"
+                )
+            )
+        ]
+    )
+
+    clear_vault_key("encryption_key_a")
+    clear_vault_key("encryption_key_b")
+
+    # Encrypt data with key A
+    encrypted_df, _ = run_encrypt_op(encrypt_config_key_a, sample_df.copy())
+
+    # Generate a different valid key B in Vault
+    different_key = Fernet.generate_key().decode()
+    set_vault_key("encryption_key_b", different_key)
+
+    # Try to decrypt with key B (wrong key)
+    decrypt_config_key_b = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt", columns=["email"], key_name="encryption_key_b"
+                )
+            )
+        ]
+    )
+
+    # Attempt restoration - should fail with InvalidToken or ValueError
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config_key_b, encrypted_df.copy())
+
+    # Verify error message indicates decryption failure
+    error_message = str(exc_info.value)
+    assert (
+        "invalid" in error_message.lower() or "token" in error_message.lower()
+    ), "Error message should indicate invalid token or decryption failure"
+    assert (
+        "encryption_key_b" in error_message
+    ), "Error message should include the key name for auditing"
+
+
+def test_ac4_restore_fails_with_key_from_different_field(sample_df):
+    """
+    AC4: Restoration fails when using a key intended for a different field
+
+    Scenario: Attempt to restore field A using the key for field B
+    Given: A dataset with multiple fields encrypted with different keys
+    And: The participant provides the key for field B to decrypt field A
+    When: The participant attempts to restore field A
+    Then: The system fails the restoration request
+    """
+    # Encrypt email and ssn with different keys
+    encrypt_config_email = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(type="encrypt", columns=["email"], key_name="email_key")
+            )
+        ]
+    )
+
+    encrypt_config_ssn = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(type="encrypt", columns=["ssn"], key_name="ssn_key")
+            )
+        ]
+    )
+
+    clear_vault_key("email_key")
+    clear_vault_key("ssn_key")
+
+    # Encrypt both fields
+    encrypted_df, _ = run_encrypt_op(encrypt_config_email, sample_df.copy())
+    encrypted_df, _ = run_encrypt_op(encrypt_config_ssn, encrypted_df.copy())
+
+    # Try to decrypt email field using ssn_key
+    decrypt_config_wrong_field = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],  # Trying to decrypt email
+                    key_name="ssn_key",  # But using ssn's key
+                )
+            )
+        ]
+    )
+
+    # Should fail with InvalidToken
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config_wrong_field, encrypted_df.copy())
+
+    error_message = str(exc_info.value)
+    assert (
+        "invalid" in error_message.lower() or "token" in error_message.lower()
+    ), "Error message should indicate invalid token"
+
+
+def test_ac4_restore_fails_with_tampered_encrypted_data(sample_df, encrypt_config_single_field):
+    """
+    AC4: Restoration fails when encrypted data has been tampered with
+
+    Scenario: Attempt to restore encrypted data that has been modified
+    Given: A pseudonymised dataset
+    And: Some encrypted values have been tampered with
+    And: The correct decryption key is provided
+    When: The participant attempts to restore the data
+    Then: The system fails the restoration for tampered values
+    And: An appropriate error message is presented
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Encrypt data
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Tamper with encrypted data (modify one encrypted value)
+    encrypted_df.loc[0, "email"] = "tampered_invalid_encrypted_data"
+
+    # Create decrypt config
+    decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+    # Attempt restoration - should fail on tampered data
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config, encrypted_df.copy())
+
+    error_message = str(exc_info.value)
+    assert (
+        "invalid" in error_message.lower() or "token" in error_message.lower()
+    ), "Error message should indicate invalid token due to tampering"
+
+
+# ---------------- Additional Edge Cases and Integration Tests -------------------------------
+
+
+def test_integration_full_cycle_encrypt_decrypt_multiple_operations(sample_df):
+    """
+    Integration test: Full cycle of multiple encrypt/decrypt operations
+
+    Scenario: Complex workflow with multiple encryption and restoration operations
+    Given: A dataset
+    When: Multiple fields are encrypted at different times
+    And: Fields are restored in different orders
+    Then: All operations complete successfully
+    And: Final restored data matches original
+    """
+    # Phase 1: Encrypt email
+    encrypt_config_1 = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(type="encrypt", columns=["email"], key_name="key_1")
+            )
+        ]
+    )
+    clear_vault_key("key_1")
+    encrypted_df_1, _ = run_encrypt_op(encrypt_config_1, sample_df.copy())
+
+    # Phase 2: Encrypt name and ssn
+    encrypt_config_2 = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(type="encrypt", columns=["name", "ssn"], key_name="key_2")
+            )
+        ]
+    )
+    clear_vault_key("key_2")
+    encrypted_df_2, _ = run_encrypt_op(encrypt_config_2, encrypted_df_1.copy())
+
+    # Phase 3: Restore email first
+    decrypt_config_1 = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", columns=["email"], key_name="key_1")
+            )
+        ]
+    )
+    restored_df_1, _ = run_decrypt_op(decrypt_config_1, encrypted_df_2.copy())
+    assert restored_df_1["email"].equals(sample_df["email"]), "Email should be restored"
+
+    # Phase 4: Restore name and ssn
+    decrypt_config_2 = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", columns=["name", "ssn"], key_name="key_2")
+            )
+        ]
+    )
+    restored_df_2, _ = run_decrypt_op(decrypt_config_2, restored_df_1.copy())
+
+    # Verify all fields restored
+    assert restored_df_2["email"].equals(sample_df["email"]), "Email should remain restored"
+    assert restored_df_2["name"].equals(sample_df["name"]), "Name should be restored"
+    assert restored_df_2["ssn"].equals(sample_df["ssn"]), "SSN should be restored"
+
+
+def test_restore_with_null_values(encrypt_config_single_field, decrypt_config_single_field):
+    """
+    Edge case: Restoration of dataset with null/NaN values
+
+    Scenario: Dataset contains null values in encrypted fields
+    Given: A dataset with null values in fields to be encrypted
+    When: Data is encrypted and then restored
+    Then: Null values are handled appropriately
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Create DataFrame with null values
+    df_with_nulls = pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4],
+            "name": ["Alice", "Bob", None, "David"],
+            "email": [
+                "alice@example.com",
+                None,
+                "charlie@example.com",
+                "david@example.com",
+            ],
+            "ssn": ["123-45-6789", "234-56-7890", "345-67-8901", None],
+            "age": [25, 30, 35, 40],
+            "salary": [50000.0, 60000.0, 70000.0, 80000.0],
+            "department": ["HR", "IT", "Finance", "IT"],
+        }
+    )
+
+    # Note: Encryption of NaN/None values will convert them to string "nan" or "None"
+    # This is expected behavior - Fernet encryption requires string input
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, df_with_nulls.copy())
+    restored_df, _ = run_decrypt_op(decrypt_config_single_field, encrypted_df.copy())
+
+    # Verify non-null values are restored correctly
+    assert restored_df.loc[0, "email"] == "alice@example.com"
+    assert restored_df.loc[2, "email"] == "charlie@example.com"
+    assert restored_df.loc[3, "email"] == "david@example.com"
+
+
+def test_restore_large_dataset_performance():
+    """
+    Performance test: Restoration of large dataset
+
+    Scenario: Restore a large dataset with many rows
+    Given: A large dataset with 10,000 rows
+    When: Data is encrypted and then restored
+    Then: Operation completes without errors or timeout
+    And: All values are restored correctly
+    """
+    # Create large dataset
+    large_df = pd.DataFrame(
+        {
+            "id": range(1, 10001),
+            "email": [f"user{i}@example.com" for i in range(1, 10001)],
+            "name": [f"User {i}" for i in range(1, 10001)],
+            "ssn": [f"{i:03d}-{i:02d}-{i:04d}" for i in range(1, 10001)],
+            "age": [20 + (i % 50) for i in range(1, 10001)],
+            "salary": [30000 + (i * 10) for i in range(1, 10001)],
+            "department": [["HR", "IT", "Finance", "Sales"][i % 4] for i in range(1, 10001)],
+        }
+    )
+
+    encrypt_config = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt", columns=["email"], key_name="test_large_dataset"
+                )
+            )
+        ]
+    )
+
+    decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt", columns=["email"], key_name="test_large_dataset"
+                )
+            )
+        ]
+    )
+
+    clear_vault_key("test_large_dataset")
+
+    # Encrypt and restore
+    encrypted_df, _ = run_encrypt_op(encrypt_config, large_df.copy())
+    restored_df, _ = run_decrypt_op(decrypt_config, encrypted_df.copy())
+
+    # Verify sample of values
+    assert len(restored_df) == 10000, "Should restore all 10,000 rows"
+    assert restored_df["email"].equals(large_df["email"]), "All emails should be restored"
+
+    # Spot check specific values
+    assert restored_df.loc[0, "email"] == "user1@example.com"
+    assert restored_df.loc[5000, "email"] == "user5001@example.com"
+    assert restored_df.loc[9999, "email"] == "user10000@example.com"
+
+
+@pytest.mark.edge_case
+@pytest.mark.security
+def test_restore_after_key_rotation(sample_df, encrypt_config_single_field):
+    """
+    AC4: Restoration fails after key rotation (key changed in Vault)
+
+    Scenario: Key is rotated in Vault after encryption
+    Given: Data encrypted with key version 1
+    And: Key is rotated to version 2 in Vault
+    When: Participant attempts to restore using new key version
+    Then: Restoration fails with clear error message
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Encrypt with original key
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Rotate key (replace with new key)
+    new_key = Fernet.generate_key().decode()
+    set_vault_key("test_restoration_key_single", new_key)
+
+    decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+    # Should fail - key mismatch
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config, encrypted_df.copy())
+
+    assert (
+        "invalid" in str(exc_info.value).lower() or "decrypt" in str(exc_info.value).lower()
+    ), "Should indicate invalid token due to key rotation"
+
+
+@pytest.mark.edge_case
+def test_restore_partially_encrypted_column(sample_df, encrypt_config_single_field):
+    """
+    Edge case: Attempt to restore column where only some rows are encrypted
+
+    Scenario: Column has mixed encrypted/plaintext values (data corruption scenario)
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # Encrypt data
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Corrupt by replacing some encrypted values with plaintext
+    encrypted_df.loc[0, "email"] = "plaintext@example.com"
+    encrypted_df.loc[2, "email"] = "another_plaintext@example.com"
+
+    decrypt_config = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+    # Should fail on plaintext values
+    with pytest.raises(ValueError) as exc_info:
+        run_decrypt_op(decrypt_config, encrypted_df.copy())
+
+    assert (
+        "invalid" in str(exc_info.value).lower() or "decrypt" in str(exc_info.value).lower()
+    ), "Should indicate invalid token for plaintext values"
+
+
+@pytest.mark.edge_case
+def test_restore_with_missing_column_in_encrypted_data(
+    sample_df, encrypt_config_single_field, decrypt_config_single_field
+):
+    """
+    AC2: Restoration fails when specified column doesn't exist in encrypted dataset
+    """
+    clear_vault_key("test_restoration_key_single")
+
+    # First encrypt the sample data to create the key
+    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
+
+    # Create encrypted DataFrame missing the 'email' column
+    incomplete_df = pd.DataFrame(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            # Missing 'email' column that decrypt config expects
+            "age": [25, 30, 35],
+            "salary": [50000.0, 60000.0, 70000.0],
+            "department": ["HR", "IT", "Finance"],
+        }
+    )
+
+    with pytest.raises((ValueError, KeyError)) as exc_info:
+        run_decrypt_op(decrypt_config_single_field, incomplete_df)
+
+    error_msg = str(exc_info.value)
+    assert (
+        "email" in error_msg or "not present" in error_msg or "not found" in error_msg
+    ), f"Error should indicate missing column, got: {error_msg}"
+
+
+@pytest.mark.integration
+def test_restore_with_multiple_encryption_keys(sample_df):
+    """
+    Integration test: Restore data encrypted with multiple different keys
+
+    Scenario: Different fields encrypted with different keys
+    Given: name encrypted with key_a, email encrypted with key_b
+    When: Participant provides both keys for restoration
+    Then: Both fields are restored correctly
+    """
+    clear_vault_key("key_a")
+    clear_vault_key("key_b")
+
+    # Encrypt name with key_a
+    encrypt_config_name = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(type="encrypt", columns=["name"], key_name="key_a")
+            )
+        ]
+    )
+
+    # Encrypt email with key_b
+    encrypt_config_email = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(type="encrypt", columns=["email"], key_name="key_b")
+            )
+        ]
+    )
+
+    # Encrypt both fields
+    df_encrypted = sample_df.copy()
+    df_encrypted, _ = run_encrypt_op(encrypt_config_name, df_encrypted)
+    df_encrypted, _ = run_encrypt_op(encrypt_config_email, df_encrypted)
+
+    # Decrypt name with key_a
+    decrypt_config_name = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", columns=["name"], key_name="key_a")
+            )
+        ]
+    )
+
+    # Decrypt email with key_b
+    decrypt_config_email = DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", columns=["email"], key_name="key_b")
+            )
+        ]
+    )
+
+    # Restore both fields
+    df_restored = df_encrypted.copy()
+    df_restored, _ = run_decrypt_op(decrypt_config_name, df_restored)
+    df_restored, _ = run_decrypt_op(decrypt_config_email, df_restored)
+
+    # Verify both fields restored
+    assert df_restored["name"].equals(sample_df["name"]), "Name field should be restored with key_a"
+    assert df_restored["email"].equals(
+        sample_df["email"]
+    ), "Email field should be restored with key_b"
diff --git a/tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py b/tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py
new file mode 100644
index 0000000..1ce8585
--- /dev/null
+++ b/tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py
@@ -0,0 +1,288 @@
+"""
+Test suite for data restoration (depseudonymisation) of unstructured text.
+
+## Test Coverage Summary
+
+### Acceptance Criteria Coverage:
+- AC1 (Data Restoration with Valid Key): 2 tests
+- AC2 (Restoration Denial - Missing Key): 1 test
+- AC3 (Restoration Denial - Unauthorized Access): 1 test
+- AC4 (Restoration Denial - Invalid Key): 1 test
+- Additional Coverage: 2 tests (edge cases)
+
+### Test Pattern:
+- Each test uses build_op_context with .model_dump() for configuration
+- Tests validate dual outputs (data, metrics)
+- Tests verify complete restoration of original text
+- Tests validate security controls and error handling
+- Tests use descriptive names mapping to AC scenarios
+
+"""
+
+import pytest
+from unittest.mock import patch
+from cryptography.fernet import Fernet
+from dagster import build_op_context
+
+from src.field_level_pseudo_anonymisation.unstructured_ops import (
+    depseudonymize_unstructured,
+)
+from src.field_level_pseudo_anonymisation.config_models.unstructured_config import (
+    DepseudonymizeUnstructuredConfig,
+    DecryptConfig,
+    DepseudoTechniqueConfig,
+)
+
+
+@pytest.fixture
+def fernet_key() -> bytes:
+    """Generate a valid Fernet key for encryption in tests."""
+    return Fernet.generate_key()
+
+
+@pytest.fixture
+def encrypted_text_data(fernet_key: bytes) -> dict:
+    """
+    Create encrypted data for testing decryption.
+
+    Returns a dict with:
+    - original_text: The unencrypted text
+    - encrypted_text: Text with PII values encrypted in {encrypt:...} format
+    """
+    original_text = "My name is John Doe and my email is john.doe@example.com."
+    fernet = Fernet(fernet_key)
+    encrypted_name = fernet.encrypt(b"John Doe").decode()
+    encrypted_email = fernet.encrypt(b"john.doe@example.com").decode()
+    encrypted_text = (
+        f"My name is {{encrypt:{encrypted_name}}} and my email is {{encrypt:{encrypted_email}}}."
+    )
+    return {
+        "original_text": original_text,
+        "encrypted_text": encrypted_text,
+    }
+
+
+# ---------------------- AC1: Data Restoration with Valid Key --------------------------------
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac1_restore_encrypted_pii_entities_with_valid_key(
+    mock_create_get_key, fernet_key: bytes, encrypted_text_data: dict
+):
+    """AC1: Restore encrypted PII entities with a valid key from secret management tool."""
+    # Arrange - Mock the Vault key retrieval to return the valid key
+    mock_create_get_key.return_value = fernet_key
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="test_key"))
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act - Request data restoration
+    result_gen = depseudonymize_unstructured(
+        context, input_text=encrypted_text_data["encrypted_text"]
+    )
+    data_output = next(result_gen)
+    metrics_output = next(result_gen)
+
+    # Assert - Verify successful restoration
+    # 1. All original values are restored exactly
+    assert (
+        data_output.value == encrypted_text_data["original_text"]
+    ), "Original text should be fully restored"
+
+    # 2. Correct output structure
+    assert data_output.output_name == "data", "Output should be named 'data'"
+
+    # 3. Metrics show correct number of restored entities
+    assert (
+        metrics_output.value["total_depseudo_count"] == 2
+    ), "Should restore 2 encrypted entities (name and email)"
+
+    # 4. System retrieved key from secret management tool
+    mock_create_get_key.assert_called_once_with("decrypt", "test_key")
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac1_restore_multiple_pii_types_with_valid_key(mock_create_get_key, fernet_key: bytes):
+    """AC1: Restore multiple encrypted PII entity types (name, email, phone) with a valid key."""
+    # Arrange - Create text with multiple PII types encrypted
+    original_text = "Contact John Doe at john.doe@example.com or call 555-1234."
+    fernet = Fernet(fernet_key)
+    encrypted_name = fernet.encrypt(b"John Doe").decode()
+    encrypted_email = fernet.encrypt(b"john.doe@example.com").decode()
+    encrypted_phone = fernet.encrypt(b"555-1234").decode()
+    encrypted_text = (
+        f"Contact {{encrypt:{encrypted_name}}} at "
+        f"{{encrypt:{encrypted_email}}} or call {{encrypt:{encrypted_phone}}}."
+    )
+
+    mock_create_get_key.return_value = fernet_key
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", key_name="multi_pii_key")
+            )
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act
+    result_gen = depseudonymize_unstructured(context, input_text=encrypted_text)
+    data_output = next(result_gen)
+    metrics_output = next(result_gen)
+
+    # Assert
+    assert data_output.value == original_text, "All PII types should be restored"
+    assert (
+        metrics_output.value["total_depseudo_count"] == 3
+    ), "Should restore 3 encrypted entities (name, email, phone)"
+    mock_create_get_key.assert_called_once_with("decrypt", "multi_pii_key")
+
+
+# ------------------- AC2: Restoration Denial when Key is Missing ----------------------------
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac2_restoration_denial_when_key_missing(mock_create_get_key, encrypted_text_data: dict):
+    """AC2: Deny restoration when decryption key is missing from secret management tool."""
+    # Arrange - Mock Vault to indicate key is missing
+    mock_create_get_key.side_effect = ValueError(
+        "Fernet key 'non_existent_key' not found in Vault for decrypt."
+    )
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", key_name="non_existent_key")
+            )
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act & Assert - Verify system fails the restoration request
+    with pytest.raises(
+        ValueError,
+        match="Fernet key 'non_existent_key' not found in Vault for decrypt.",
+    ) as exc_info:
+        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
+
+    # Verify error message is clear and actionable
+    assert "not found in Vault" in str(
+        exc_info.value
+    ), "Error message should indicate key is missing from Vault"
+
+    # Verify system attempted to retrieve the key (logged attempt)
+    mock_create_get_key.assert_called_once_with("decrypt", "non_existent_key")
+
+
+# ------------- AC3: Restoration Denial when Access is Unauthorized --------------------------
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac3_restoration_denial_when_unauthorized_access(
+    mock_create_get_key, encrypted_text_data: dict
+):
+    """AC3: Deny restoration when participant is not authorized to access the decryption key."""
+    # Arrange - Mock Vault to deny access
+    mock_create_get_key.side_effect = ValueError("Access denied to secret: unauthorized_key")
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", key_name="unauthorized_key")
+            )
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act & Assert - Verify system denies access
+    with pytest.raises(ValueError, match="Access denied to secret: unauthorized_key") as exc_info:
+        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
+
+    # Verify error message clearly indicates access denial
+    assert "Access denied" in str(
+        exc_info.value
+    ), "Error message should clearly indicate access was denied"
+
+    # Verify the unauthorized access attempt was logged (function was called)
+    mock_create_get_key.assert_called_once_with("decrypt", "unauthorized_key")
+
+
+# ------------------- AC4: Restoration Denial when Key is Invalid ----------------------------
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac4_restoration_denial_when_key_invalid(mock_create_get_key, encrypted_text_data: dict):
+    """AC4: Deny restoration when decryption key does not correspond to the encrypted fields."""
+    # Arrange - Mock Vault to return a different (wrong) key
+    invalid_key = Fernet.generate_key()  # A different, incorrect key
+    mock_create_get_key.return_value = invalid_key
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="wrong_key"))
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act & Assert - Verify system fails the restoration
+    with pytest.raises(ValueError, match="Invalid Fernet token") as exc_info:
+        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
+
+    # Verify error message indicates decryption failure
+    assert "Invalid Fernet token" in str(
+        exc_info.value
+    ), "Error message should indicate the key is invalid for this data"
+
+    # Verify key was retrieved (system attempted decryption)
+    mock_create_get_key.assert_called_once_with("decrypt", "wrong_key")
+
+
+# -------------------------------- Additional Edge Cases ----------------------------------------
+
+
+def test_depseudonymize_unstructured_no_decrypt_config():
+    """Edge case: Text is returned unchanged when no decryption techniques are configured."""
+    # Arrange
+    original_text = "This text has no {encrypt:values} to decrypt."
+    config = DepseudonymizeUnstructuredConfig(used_function=[])  # No techniques
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act
+    result_gen = depseudonymize_unstructured(context, input_text=original_text)
+    result_output = next(result_gen)
+    metrics_output = next(result_gen)
+
+    # Assert
+    assert (
+        result_output.value == original_text
+    ), "Text should remain unchanged when no decryption is configured"
+    assert (
+        metrics_output.value["total_depseudo_count"] == 0
+    ), "Should report zero decryptions performed"
+
+
+def test_depseudonymize_unstructured_empty_text():
+    """Edge case: Empty input text is returned unchanged with zero decryptions performed."""
+    # Arrange
+    empty_text = ""
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="test_key"))
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act
+    with patch(
+        "src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key"
+    ) as mock_key:
+        mock_key.return_value = Fernet.generate_key()
+        result_gen = depseudonymize_unstructured(context, input_text=empty_text)
+        result_output = next(result_gen)
+        metrics_output = next(result_gen)
+
+    # Assert
+    assert result_output.value == "", "Empty text should remain empty"
+    assert (
+        metrics_output.value["total_depseudo_count"] == 0
+    ), "Should report zero decryptions for empty text"
diff --git a/tests/field_level_pseudo_anonymisation/test_encrypt_structured.py b/tests/field_level_pseudo_anonymisation/test_encrypt_structured.py
new file mode 100644
index 0000000..b89fad3
--- /dev/null
+++ b/tests/field_level_pseudo_anonymisation/test_encrypt_structured.py
@@ -0,0 +1,1119 @@
+"""
+Test suite for field-level pseudonymisation operations (encrypt technique).
+
+This test suite covers the encryption pseudonymisation technique for structured dataframes,
+validating the following Acceptance Criteria:
+
+## Test Coverage Summary
+
+### Acceptance Criteria Coverage:
+- AC1 (Supported Technique Applied Correctly): 7 tests
+- AC2 (Invalid Execution Handling): 7 tests
+- AC3 (DataFrame Compliance): 6 tests
+- AC4 (Audit Logging - Success): 2 tests
+- AC5 (Audit Logging - Failure): 3 tests
+- Additional Coverage: 7 tests
+
+### Test Pattern:
+- Each test uses build_op_context with config_to_dagster_dict for configuration
+- Tests validate dual outputs (data, metrics)
+- Vault access is mocked for isolation
+
+"""
+
+import pandas as pd
+import pytest
+from dagster import build_op_context
+from cryptography.fernet import Fernet
+from hvac.exceptions import InvalidPath
+from unittest.mock import patch, MagicMock
+
+from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
+    AnonymisePseudonymizeStructuredConfig,
+    EncryptConfig,
+    HashConfig,
+    PseudoTechniqueConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.ops import anonymize_pseudonymize_structured
+
+# Import helper functions (fixtures are auto-discovered by pytest)
+from .conftest import (
+    run_encrypt_op,
+    clear_vault_key,
+    get_vault_key,
+    config_to_dagster_dict,
+)
+
+
+# -------------------------------- Test Markers Configuration --------------------------------
+
+# Register custom markers
+pytest.mark.slow = pytest.mark.slow
+pytest.mark.security = pytest.mark.security
+pytest.mark.edge_case = pytest.mark.edge_case
+
+
+# -------------------------------- Test-Specific Fixtures ----------------------------------------
+
+
+@pytest.fixture
+def encrypt_single_column_config():
+    """
+    Configuration for encrypting a single column (email).
+    Tests basic encryption functionality.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt", columns=["email"], key_name="test_email_key"
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def encrypt_multiple_columns_config():
+    """
+    Configuration for encrypting multiple columns (name, email).
+    Tests encryption across multiple fields.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt", columns=["name", "email"], key_name="test_multi_key"
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def encrypt_mixed_types_config():
+    """
+    Configuration for encrypting columns with different data types.
+    Tests that encryption handles type conversion (int, float -> string).
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    columns=["id", "age", "salary"],
+                    key_name="test_numeric_key",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def encrypt_with_unchanged_columns_config():
+    """
+    Configuration that encrypts some columns while leaving others unchanged.
+    Tests AC3 requirement for unchanged column preservation.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt", columns=["email"], key_name="test_partial_key"
+                )
+            )
+        ]
+    )
+
+
+# -------------------------------- Test-Specific Fixtures ----------------------------------------
+
+
+def test_encrypt_single_column_applied_correctly(sample_df, encrypt_single_column_config):
+    """
+    AC1: Tests that encryption is applied correctly to a single column.
+
+    Scenario: The system applies encryption to the 'email' field
+    Given: A structured dataset with an email column
+    And: A valid encryption configuration for the email field
+    When: The participant triggers the execution
+    Then: The email field must be transformed with Fernet encryption
+    And: The encrypted values must be different from the original values
+    And: The encrypted values must be valid Fernet tokens (decodable)
+    """
+    # Clear any existing test key
+    clear_vault_key("test_email_key")
+
+    result_df, metrics = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
+
+    # Verify output structure
+    assert result_df is not None, "Result DataFrame should not be None"
+    assert metrics is not None, "Metrics should not be None"
+
+    # Verify email column is encrypted (values changed)
+    assert not result_df["email"].equals(
+        sample_df["email"]
+    ), "Email column should be encrypted (values should change)"
+
+    # Verify all encrypted values are different from originals
+    for orig, enc in zip(sample_df["email"], result_df["email"]):
+        assert orig != enc, f"Original value '{orig}' should be encrypted"
+
+    # Verify encrypted values are valid Fernet tokens (can be decrypted)
+    key = get_vault_key("test_email_key")
+    f = Fernet(key)
+    for enc_value in result_df["email"]:
+        decrypted = f.decrypt(enc_value.encode()).decode()
+        assert (
+            decrypted in sample_df["email"].values
+        ), f"Decrypted value '{decrypted}' should match an original email"
+
+    # Verify row count is preserved
+    assert len(result_df) == len(sample_df), "Row count should be preserved"
+
+
+def test_encrypt_multiple_columns_applied_correctly(sample_df, encrypt_multiple_columns_config):
+    """
+    AC1: Tests that encryption is applied correctly to multiple columns.
+
+    Scenario: The system applies encryption to multiple fields (name, email)
+    Given: A structured dataset with name and email columns
+    And: A valid encryption configuration for both fields
+    When: The participant triggers the execution
+    Then: Both fields must be transformed with Fernet encryption
+    And: Each field uses the same encryption key (as specified)
+    """
+    clear_vault_key("test_multi_key")
+
+    result_df, metrics = run_encrypt_op(encrypt_multiple_columns_config, sample_df.copy())
+
+    # Verify both columns are encrypted
+    assert not result_df["name"].equals(sample_df["name"]), "Name column should be encrypted"
+    assert not result_df["email"].equals(sample_df["email"]), "Email column should be encrypted"
+
+    # Verify all values are encrypted
+    key = get_vault_key("test_multi_key")
+    f = Fernet(key)
+
+    for enc_name in result_df["name"]:
+        decrypted = f.decrypt(enc_name.encode()).decode()
+        assert decrypted in sample_df["name"].values
+
+    for enc_email in result_df["email"]:
+        decrypted = f.decrypt(enc_email.encode()).decode()
+        assert decrypted in sample_df["email"].values
+
+
+def test_encrypt_numeric_columns_applied_correctly(sample_df, encrypt_mixed_types_config):
+    """
+    AC1: Tests that encryption handles numeric data types correctly.
+
+    Scenario: The system applies encryption to numeric fields (id, age, salary)
+    Given: A structured dataset with integer and float columns
+    And: A valid encryption configuration for numeric fields
+    When: The participant triggers the execution
+    Then: Numeric values must be converted to strings and encrypted
+    And: Original numeric values should be recoverable via decryption
+    """
+    clear_vault_key("test_numeric_key")
+
+    result_df, metrics = run_encrypt_op(encrypt_mixed_types_config, sample_df.copy())
+
+    # Verify all numeric columns are now string type (encrypted)
+    assert result_df["id"].dtype == object, "Encrypted id should be object/string type"
+    assert result_df["age"].dtype == object, "Encrypted age should be object/string type"
+    assert result_df["salary"].dtype == object, "Encrypted salary should be object/string type"
+
+    # Verify original numeric values can be recovered
+    key = get_vault_key("test_numeric_key")
+    f = Fernet(key)
+
+    for enc_id in result_df["id"]:
+        decrypted = int(f.decrypt(enc_id.encode()).decode())
+        assert decrypted in sample_df["id"].values
+
+
+def test_encrypt_key_generation_on_first_use(sample_df, encrypt_single_column_config):
+    """
+    AC1: Tests that encryption key is automatically generated and stored in Vault.
+
+    Scenario: First-time encryption generates a key automatically
+    Given: A structured dataset with valid configuration
+    And: No encryption key exists in Vault for the specified key_name
+    When: The participant triggers the execution
+    Then: The system must generate a new Fernet key
+    And: Store it in Vault at the specified path
+    And: Use it for encryption
+    """
+    clear_vault_key("test_email_key")
+
+    # Verify key doesn't exist before encryption
+    with pytest.raises(InvalidPath):
+        get_vault_key("test_email_key")
+
+    result_df, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
+
+    # Verify key was created
+    key = get_vault_key("test_email_key")
+    assert key is not None, "Encryption key should be created in Vault"
+    assert len(key) == 44, "Fernet key should be 44 bytes (base64 encoded 32 bytes)"
+
+    # Verify the key works for decryption
+    f = Fernet(key)
+    for enc_email in result_df["email"]:
+        decrypted = f.decrypt(enc_email.encode()).decode()
+        assert decrypted in sample_df["email"].values
+
+
+def test_encrypt_uses_existing_vault_key(sample_df, encrypt_single_column_config):
+    """
+    AC1: Tests that encryption uses an existing key from Vault if present.
+
+    Scenario: Encryption reuses existing key for consistent pseudonymisation
+    Given: A structured dataset
+    And: An encryption key already exists in Vault
+    When: The participant triggers the execution
+    Then: The system must use the existing key (not generate a new one)
+    And: The same input produces the same encrypted output (deterministic with same key)
+    """
+    clear_vault_key("test_email_key")
+
+    # First encryption - generates key
+    result_df_1, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
+    key_1 = get_vault_key("test_email_key")
+
+    # Second encryption - should use same key
+    result_df_2, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
+    key_2 = get_vault_key("test_email_key")
+
+    # Verify same key is used
+    assert key_1 == key_2, "Encryption should reuse existing Vault key"
+
+
+# ----------------------- AC2: Invalid Execution Handling ------------------------------------
+
+
+def test_encrypt_missing_column_error(encrypt_single_column_config):
+    """
+    AC2: Tests graceful error handling when a specified column doesn't exist.
+
+    Scenario: The system aborts gracefully when column is missing
+    Given: A structured dataset
+    And: A configuration specifying a non-existent column
+    When: The participant triggers the execution
+    Then: The system must raise a clear ValueError
+    And: The error message must indicate which columns are missing
+    """
+    df_missing_column = pd.DataFrame(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            "age": [25, 30, 35],
+            # Missing 'email' column
+        }
+    )
+
+    with pytest.raises(ValueError) as exc_info:
+        run_encrypt_op(encrypt_single_column_config, df_missing_column)
+
+    assert "not present in the DataFrame" in str(
+        exc_info.value
+    ), "Error message should indicate missing columns"
+    assert "email" in str(exc_info.value), "Error message should mention the missing 'email' column"
+
+
+def test_encrypt_empty_dataframe_handled(encrypt_single_column_config):
+    """
+    AC2: Tests graceful handling of empty DataFrame input.
+
+    Scenario: The system processes empty DataFrame without errors
+    Given: An empty structured dataset (no rows)
+    And: A valid encryption configuration
+    When: The participant triggers the execution
+    Then: The system must return an empty DataFrame with correct schema
+    And: No errors should be raised
+    """
+    clear_vault_key("test_email_key")
+
+    empty_df = pd.DataFrame(columns=["id", "name", "email", "age", "salary", "department"])
+
+    result_df, metrics = run_encrypt_op(encrypt_single_column_config, empty_df)
+
+    assert len(result_df) == 0, "Result should be empty"
+    assert "email" in result_df.columns, "Email column should exist in schema"
+
+
+def test_encrypt_vault_connection_error():
+    """
+    AC2: Tests error handling when Vault is unreachable.
+
+    Scenario: The system fails gracefully when Vault is unavailable
+    Given: A structured dataset with valid configuration
+    When: Vault service is unreachable or misconfigured
+    Then: The system must raise a clear error
+    And: The error message must indicate the Vault connection issue
+
+    Note: This test requires Vault to be down or uses a bad URL.
+    For testing purposes, we simulate by using invalid credentials.
+    """
+    # Create a mock client that raises an exception when accessing Vault
+    mock_client_instance = MagicMock()
+    mock_client_instance.secrets.kv.v2.read_secret_version.side_effect = Exception(
+        "Simulated Vault connection error"
+    )
+
+    with patch("hvac.Client", return_value=mock_client_instance):
+        df = pd.DataFrame(
+            {
+                "id": [1],
+                "name": ["Test"],
+                "email": ["test@example.com"],
+                "age": [30],
+                "salary": [50000.0],
+                "department": ["IT"],
+            }
+        )
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        type="encrypt", columns=["email"], key_name="test_email_key"
+                    )
+                )
+            ]
+        )
+        with pytest.raises(ValueError) as exc_info:
+            run_encrypt_op(config, df)
+
+        error_message = str(exc_info.value)
+        assert (
+            "Simulated Vault connection error" in error_message
+        ), "Error should indicate Vault connection issue"
+
+
+def test_encrypt_null_values_handled(encrypt_single_column_config):
+    """
+    AC2: Tests handling of NULL/NaN values in encrypted columns.
+
+    Scenario: The system handles null values appropriately
+    Given: A structured dataset with NULL values in the column to encrypt
+    And: A valid encryption configuration
+    When: The participant triggers the execution
+    Then: The system must process null values (encrypt "nan" string or handle appropriately)
+    And: Not raise an exception
+    """
+    clear_vault_key("test_email_key")
+
+    df_with_nulls = pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4],
+            "name": ["Alice", "Bob", "Charlie", "David"],
+            "email": ["alice@example.com", None, "charlie@example.com", pd.NA],
+            "age": [25, 30, 35, 40],
+            "salary": [50000.0, 60000.0, 70000.0, 80000.0],
+            "department": ["HR", "IT", "Finance", "IT"],
+        }
+    )
+
+    result_df, metrics = run_encrypt_op(encrypt_single_column_config, df_with_nulls)
+
+    # Verify execution completed without errors
+    assert result_df is not None
+    assert len(result_df) == 4
+
+    # Verify null values were processed (encrypted as string "None" or "nan")
+    key = get_vault_key("test_email_key")
+    f = Fernet(key)
+
+    # The null values get converted to string "None" or "nan" before encryption
+    for enc_email in result_df["email"]:
+        decrypted = f.decrypt(enc_email.encode()).decode()
+        # Decrypted value should be original or string representation of null
+        assert decrypted in [
+            "alice@example.com",
+            "charlie@example.com",
+            "None",
+            "nan",
+            "<NA>",
+        ]
+
+
+def test_encrypt_duplicate_column_configuration_error():
+    """
+    AC2: Tests that duplicate columns across techniques are rejected.
+
+    Scenario: Configuration validation prevents duplicate column assignments
+    Given: A configuration that assigns the same column to multiple techniques
+    When: The configuration is validated
+    Then: The system must raise a ValueError during configuration creation
+    And: The error message must indicate duplicate column assignment
+    """
+    with pytest.raises(ValueError) as exc_info:
+        AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(type="encrypt", columns=["email"], key_name="key1")
+                ),
+                PseudoTechniqueConfig(
+                    technique=HashConfig(
+                        type="hash",
+                        columns=["email"],  # Duplicate column
+                        algorithm="sha256",
+                    )
+                ),
+            ]
+        )
+
+    assert "Duplicate column" in str(
+        exc_info.value
+    ), "Error should indicate duplicate column configuration"
+
+
+# ------------------ AC3: DataFrame Input and Output Compliance ------------------------------
+
+
+def test_encrypt_dataframe_input_output_format(sample_df, encrypt_single_column_config):
+    """
+    AC3: Tests that input and output are both pandas DataFrames.
+
+    Scenario: The system accepts DataFrame input and returns DataFrame output
+    Given: A structured dataset as pandas DataFrame
+    And: A valid encryption configuration
+    When: The participant triggers the execution
+    Then: The system must return a pandas DataFrame
+    And: The DataFrame structure must be preserved
+    """
+    clear_vault_key("test_email_key")
+
+    result_df, metrics = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
+
+    # Verify output is a DataFrame
+    assert isinstance(result_df, pd.DataFrame), "Output must be a pandas DataFrame"
+
+    # Verify DataFrame structure preserved
+    assert list(result_df.columns) == list(sample_df.columns), "Column names should be preserved"
+    assert len(result_df) == len(sample_df), "Row count should be preserved"
+
+
+def test_encrypt_data_types_transformed_correctly(sample_df, encrypt_mixed_types_config):
+    """
+    AC3: Tests that data types are transformed appropriately after encryption.
+
+    Scenario: Encrypted columns change to string type
+    Given: A structured dataset with various data types (int, float, str)
+    And: An encryption configuration for multiple columns
+    When: The participant triggers the execution
+    Then: All encrypted columns must be of type object/string
+    And: This transformation is valid and consistent with encryption technique
+    """
+    clear_vault_key("test_numeric_key")
+
+    # Store original types
+    original_types = sample_df.dtypes.to_dict()
+
+    result_df, _ = run_encrypt_op(encrypt_mixed_types_config, sample_df.copy())
+
+    # Verify encrypted columns are now object/string type
+    assert result_df["id"].dtype == object, "Encrypted integer column should become object type"
+    assert result_df["age"].dtype == object, "Encrypted integer column should become object type"
+    assert result_df["salary"].dtype == object, "Encrypted float column should become object type"
+
+    # Verify data types changed (not same as original)
+    assert result_df["id"].dtype != original_types["id"], "Data type should change after encryption"
+
+
+def test_encrypt_unchanged_columns_preserved(sample_df, encrypt_with_unchanged_columns_config):
+    """
+    AC3: Tests that columns not specified for encryption remain unchanged.
+
+    Scenario: Non-encrypted columns remain identical
+    Given: A structured dataset with multiple columns
+    And: An encryption configuration for only one column (email)
+    When: The participant triggers the execution
+    Then: Columns not specified (id, name, age, salary, department) must remain unchanged
+    And: Their values and data types must be identical to the input
+    """
+    clear_vault_key("test_partial_key")
+
+    result_df, _ = run_encrypt_op(encrypt_with_unchanged_columns_config, sample_df.copy())
+
+    # Verify unchanged columns are identical
+    assert result_df["id"].equals(sample_df["id"]), "ID column should remain unchanged"
+    assert result_df["name"].equals(sample_df["name"]), "Name column should remain unchanged"
+    assert result_df["age"].equals(sample_df["age"]), "Age column should remain unchanged"
+    assert result_df["salary"].equals(sample_df["salary"]), "Salary column should remain unchanged"
+    assert result_df["department"].equals(
+        sample_df["department"]
+    ), "Department column should remain unchanged"
+
+    # Verify encrypted column is changed
+    assert not result_df["email"].equals(
+        sample_df["email"]
+    ), "Email column should be encrypted (changed)"
+
+
+def test_encrypt_schema_consistency(sample_df, encrypt_multiple_columns_config):
+    """
+    AC3: Tests that DataFrame schema is consistent and coherent.
+
+    Scenario: Output DataFrame has consistent schema
+    Given: A structured dataset
+    And: A multi-column encryption configuration
+    When: The participant triggers the execution
+    Then: Output DataFrame must have same column names as input
+    And: Column order must be preserved
+    And: No columns should be added or removed
+    """
+    clear_vault_key("test_multi_key")
+
+    result_df, _ = run_encrypt_op(encrypt_multiple_columns_config, sample_df.copy())
+
+    # Verify column names are identical
+    assert list(result_df.columns) == list(sample_df.columns), "Column names must be identical"
+
+    # Verify column order is preserved
+    for i, col in enumerate(sample_df.columns):
+        assert result_df.columns[i] == col, f"Column order should be preserved at position {i}"
+
+    # Verify no extra columns added
+    assert len(result_df.columns) == len(
+        sample_df.columns
+    ), "Number of columns should remain the same"
+
+
+def test_encrypt_index_preservation(sample_df, encrypt_single_column_config):
+    """
+    AC3: Tests that DataFrame index is preserved after encryption.
+
+    Scenario: DataFrame index remains unchanged
+    Given: A structured dataset with default index
+    And: A valid encryption configuration
+    When: The participant triggers the execution
+    Then: The output DataFrame must preserve the original index
+    And: No extraneous index column should be added
+    """
+    clear_vault_key("test_email_key")
+
+    # Set custom index to verify preservation
+    sample_df_with_index = sample_df.copy()
+    sample_df_with_index.index = [10, 20, 30, 40, 50]
+
+    result_df, _ = run_encrypt_op(encrypt_single_column_config, sample_df_with_index)
+
+    # Verify index is preserved
+    assert list(result_df.index) == list(
+        sample_df_with_index.index
+    ), "DataFrame index should be preserved"
+
+
+# ------------- AC4: Execution Audit & Logging - Positive Scenario ---------------------------
+
+
+def test_encrypt_successful_execution_logging(sample_df, encrypt_single_column_config):
+    """
+    AC4: Tests that successful execution produces appropriate logs/metadata.
+
+    Scenario: Successful pseudonymisation execution is logged
+    Given: A structured dataset with valid configuration
+    When: The participant triggers the execution
+    And: The execution completes successfully
+    Then: The system must return metrics output
+    And: Metrics should confirm successful operation
+
+    Note: Dagster automatically logs:
+    - Timestamp of execution (run start/end times)
+    - Workflow run identifier (run_id)
+    - Configuration parameters (captured in op_config)
+    - Success status (run status in Dagster UI)
+
+    This test validates the op returns proper outputs for Dagster to log.
+    """
+    clear_vault_key("test_email_key")
+
+    op_config_dict = config_to_dagster_dict(encrypt_single_column_config)
+    context = build_op_context(op_config=op_config_dict)
+
+    # Capture run context information
+    run_id = context.run_id
+
+    # Execute the operation
+    result_df, metrics = anonymize_pseudonymize_structured(context, df=sample_df.copy())
+
+    # Verify outputs for logging
+    assert result_df is not None, "Data output should be present for logging"
+    assert metrics is not None, "Metrics output should be present for logging"
+    assert isinstance(metrics.value, dict), "Metrics should be a dict"
+
+    # Verify run context is available (Dagster provides this automatically)
+    assert run_id is not None, "Run ID should be available for audit logging"
+
+    # Verify configuration is captured (can be logged)
+    assert "used_function" in op_config_dict, "Configuration should be captured for audit"
+    # In Dagster format, technique is nested under the discriminator key
+    technique_config = op_config_dict["used_function"][0]["technique"]
+    assert "encrypt" in technique_config, "Encrypt technique should be present"
+    assert (
+        technique_config["encrypt"]["key_name"] == "test_email_key"
+    ), "Key name should be logged (but not key value)"
+
+    # Verify no PII is in metrics (compliance requirement)
+    metrics_str = str(metrics.value)
+    for email in sample_df["email"]:
+        assert email not in metrics_str, "PII values should not appear in metrics/logs"
+
+
+def test_encrypt_configuration_parameters_logged(sample_df, encrypt_multiple_columns_config):
+    """
+    AC4: Tests that configuration parameters are properly captured for audit.
+
+    Scenario: Configuration details are available for compliance logging
+    Given: A multi-column encryption configuration
+    When: The participant triggers the execution
+    Then: The system must capture configuration parameters including:
+    - Selected technique (encrypt)
+    - Columns to encrypt
+    - Key name (but not key value)
+    And: These parameters should be accessible for audit logging
+    """
+    clear_vault_key("test_multi_key")
+
+    op_config_dict = config_to_dagster_dict(encrypt_multiple_columns_config)
+    context = build_op_context(op_config=op_config_dict)
+
+    result_df, metrics = anonymize_pseudonymize_structured(context, df=sample_df.copy())
+
+    # Verify configuration details are captured
+    technique_config = op_config_dict["used_function"][0]["technique"]
+    assert "encrypt" in technique_config, "Encrypt technique should be present"
+    assert set(technique_config["encrypt"]["columns"]) == {"name", "email"}
+    assert technique_config["encrypt"]["key_name"] == "test_multi_key"
+
+    # Verify encryption key itself is NOT in config (security)
+    config_str = str(op_config_dict)
+    try:
+        key = get_vault_key("test_multi_key")
+        assert (
+            key.decode() not in config_str
+        ), "Encryption key value should never be in logged configuration"
+    except Exception:
+        pass  # Key might not exist yet
+
+
+# ------------- AC5: Execution Audit & Logging - Negative Scenario ---------------------------
+
+
+def test_encrypt_failed_execution_logging(encrypt_single_column_config):
+    """
+    AC5: Tests that failed execution provides error details for audit.
+
+    Scenario: Failed pseudonymisation execution is logged with error details
+    Given: A structured dataset with valid configuration
+    When: The participant triggers the execution
+    And: The execution fails (e.g., missing column)
+    Then: The system must raise an exception with clear error message
+    And: The error message should indicate the failure reason
+    And: Configuration parameters should still be accessible for audit
+    And: No PII should be exposed in error messages
+    """
+    df_missing_column = pd.DataFrame(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            # Missing 'email' column - will cause failure
+        }
+    )
+
+    op_config_dict = config_to_dagster_dict(encrypt_single_column_config)
+    context = build_op_context(op_config=op_config_dict)
+    run_id = context.run_id
+
+    # Execute and capture failure
+    with pytest.raises(ValueError) as exc_info:
+        # Need to consume the generator to trigger execution
+        list(anonymize_pseudonymize_structured(context, df=df_missing_column))
+
+    # Verify error details are available for logging
+    error_message = str(exc_info.value)
+    assert (
+        "not present in the DataFrame" in error_message
+    ), "Error message should explain failure reason"
+    assert "email" in error_message, "Error message should mention the problematic column"
+
+    # Verify run context is available for failure logging
+    assert run_id is not None, "Run ID should be available for failure audit"
+
+    # Verify configuration is still accessible for audit
+    assert op_config_dict is not None, "Configuration should be accessible for failure audit"
+
+    # Verify no actual data values in error message (PII protection)
+    for name in ["Alice", "Bob", "Charlie"]:
+        assert name not in error_message, "PII values should not appear in error messages"
+
+
+def test_encrypt_stack_trace_available_on_failure(encrypt_single_column_config):
+    """
+    AC5: Tests that stack trace is available for debugging failed executions.
+
+    Scenario: Failed execution provides stack trace for troubleshooting
+    Given: A configuration that will cause failure
+    When: The execution fails
+    Then: Python exception with stack trace should be raised
+    And: Stack trace should be available for logging (Dagster captures this)
+    And: Stack trace should not contain PII values
+    """
+    df_missing_column = pd.DataFrame({"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]})
+
+    try:
+        run_encrypt_op(encrypt_single_column_config, df_missing_column)
+        pytest.fail("Should have raised ValueError")
+    except ValueError:
+        # Verify exception information is available
+        import traceback
+
+        stack_trace = traceback.format_exc()
+
+        assert "ValueError" in stack_trace, "Exception type should be in stack trace"
+        assert (
+            "not present in the DataFrame" in stack_trace
+        ), "Error message should be in stack trace"
+
+        # Verify stack trace contains code location
+        assert (
+            "ops.py" in stack_trace or "anonymize_pseudonymize_structured" in stack_trace
+        ), "Stack trace should indicate error location"
+
+
+def test_encrypt_vault_error_logged_appropriately(sample_df):
+    """
+    AC5: Tests that Vault-related errors are logged with appropriate detail.
+
+    Scenario: Vault connection/authentication errors are captured
+    Given: A configuration with invalid Vault setup
+    When: The execution attempts to access Vault
+    And: Vault access fails
+    Then: The system must raise an error with Vault-specific details
+    And: The error should indicate the Vault-related nature of the failure
+
+    Note: This test validates error handling structure; actual Vault errors
+    depend on Vault availability.
+    """
+    # Create a mock client that raises an exception when accessing Vault
+    mock_client_instance = MagicMock()
+    mock_client_instance.secrets.kv.v2.read_secret_version.side_effect = Exception(
+        "Simulated Vault authentication error"
+    )
+
+    with patch("hvac.Client", return_value=mock_client_instance):
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        type="encrypt", columns=["email"], key_name="test_email_key"
+                    )
+                )
+            ]
+        )
+        with pytest.raises(ValueError) as exc_info:
+            run_encrypt_op(config, sample_df)
+
+        error_message = str(exc_info.value)
+        assert (
+            "Simulated Vault authentication error" in error_message
+        ), "Error should indicate Vault-related failure"
+
+
+# --------------- Additional Edge Cases & Integration Tests ----------------------------------
+
+
+def test_encrypt_large_dataset_performance(encrypt_single_column_config):
+    """
+    Additional test: Validates encryption works with larger datasets.
+
+    Tests that encryption scales to realistic dataset sizes without errors.
+    """
+    clear_vault_key("test_email_key")
+
+    # Create a larger dataset (1000 rows)
+    large_df = pd.DataFrame(
+        {
+            "id": range(1000),
+            "name": [f"Person{i}" for i in range(1000)],
+            "email": [f"person{i}@example.com" for i in range(1000)],
+            "age": [25 + (i % 50) for i in range(1000)],
+            "salary": [50000.0 + (i * 100) for i in range(1000)],
+            "department": ["HR", "IT", "Finance"] * 333 + ["HR"],
+        }
+    )
+
+    # Save original values for comparison
+    original_emails = large_df["email"].copy()
+
+    result_df, metrics = run_encrypt_op(encrypt_single_column_config, large_df)
+
+    assert len(result_df) == 1000, "All rows should be processed"
+    assert not result_df["email"].equals(original_emails), "All email values should be encrypted"
+
+
+def test_encrypt_special_characters_in_data(encrypt_single_column_config):
+    """
+    Additional test: Validates encryption handles special characters correctly.
+
+    Tests that encryption works with unicode, special chars, emojis, etc.
+    """
+    clear_vault_key("test_email_key")
+
+    df_special = pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4],
+            "name": ["Müller", "José", "李明", "🙂 John"],
+            "email": [
+                "test@müller.de",
+                "josé@example.com",
+                "李明@example.cn",
+                "emoji@😀.com",
+            ],
+            "age": [25, 30, 35, 40],
+            "salary": [50000.0, 60000.0, 70000.0, 80000.0],
+            "department": ["HR", "IT", "Finance", "IT"],
+        }
+    )
+
+    # Save original values for comparison
+    original_emails = df_special["email"].copy().tolist()
+
+    result_df, metrics = run_encrypt_op(encrypt_single_column_config, df_special)
+
+    # Verify special characters are encrypted and recoverable
+    key = get_vault_key("test_email_key")
+    f = Fernet(key)
+
+    decrypted_emails = [f.decrypt(enc.encode()).decode() for enc in result_df["email"]]
+    assert set(decrypted_emails) == set(
+        original_emails
+    ), "Special characters should be preserved through encryption/decryption"
+
+
+def test_encrypt_deterministic_within_session(sample_df, encrypt_single_column_config):
+    """
+    Additional test: Validates encryption produces consistent results with same key.
+
+    Note: Fernet encryption includes a timestamp, so it's NOT deterministic.
+    This test validates that decryption recovers the original value consistently.
+    """
+    clear_vault_key("test_email_key")
+
+    # First encryption
+    result_df_1, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
+
+    # Get the key used
+    key = get_vault_key("test_email_key")
+    f = Fernet(key)
+
+    # Verify first encryption decrypts correctly
+    decrypted_1 = [f.decrypt(enc.encode()).decode() for enc in result_df_1["email"]]
+    assert decrypted_1 == sample_df["email"].tolist(), "Decryption should recover original values"
+
+    # Second encryption with same key (different encrypted values due to timestamp)
+    result_df_2, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
+
+    # Verify second encryption also decrypts correctly
+    decrypted_2 = [f.decrypt(enc.encode()).decode() for enc in result_df_2["email"]]
+    assert (
+        decrypted_2 == sample_df["email"].tolist()
+    ), "Decryption should consistently recover original values"
+
+    # Note: Encrypted values will be different due to Fernet's timestamp
+    assert not result_df_1["email"].equals(
+        result_df_2["email"]
+    ), "Fernet encryption includes timestamp, so outputs differ"
+
+
+def test_encrypt_empty_string_values(encrypt_single_column_config):
+    """
+    Additional test: Validates encryption handles empty strings correctly.
+    """
+    clear_vault_key("test_email_key")
+
+    df_empty_strings = pd.DataFrame(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "", "Charlie"],
+            "email": ["alice@example.com", "", "charlie@example.com"],
+            "age": [25, 30, 35],
+            "salary": [50000.0, 60000.0, 70000.0],
+            "department": ["HR", "IT", "Finance"],
+        }
+    )
+
+    result_df, _ = run_encrypt_op(encrypt_single_column_config, df_empty_strings)
+
+    # Verify empty strings are encrypted
+    key = get_vault_key("test_email_key")
+    f = Fernet(key)
+
+    decrypted_emails = [f.decrypt(enc.encode()).decode() for enc in result_df["email"]]
+    assert "" in decrypted_emails, "Empty strings should be encrypted and recoverable"
+
+
+@pytest.mark.edge_case
+def test_encrypt_very_long_strings(encrypt_single_column_config):
+    """
+    Edge case: Encryption of very long string values (e.g., 10KB+)
+
+    Validates that Fernet encryption handles large strings without truncation.
+    """
+    clear_vault_key("test_email_key")
+
+    # Create DataFrame with very long strings
+    long_string = "x" * 10000  # 10KB string
+    df_long_strings = pd.DataFrame(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            "email": [
+                f"{long_string}@example.com",
+                "bob@example.com",
+                "charlie@example.com",
+            ],
+            "age": [25, 30, 35],
+            "salary": [50000.0, 60000.0, 70000.0],
+            "department": ["HR", "IT", "Finance"],
+        }
+    )
+
+    result_df, _ = run_encrypt_op(encrypt_single_column_config, df_long_strings)
+
+    # Verify long string is encrypted and recoverable
+    key = get_vault_key("test_email_key")
+    f = Fernet(key)
+    decrypted = f.decrypt(result_df.loc[0, "email"].encode()).decode()
+    assert (
+        decrypted == f"{long_string}@example.com"
+    ), "Very long strings should be encrypted and recoverable"
+
+
+@pytest.mark.edge_case
+def test_encrypt_column_with_all_identical_values(encrypt_single_column_config):
+    """
+    Edge case: Encryption when all values in a column are identical
+
+    Validates that encryption produces different outputs for identical inputs
+    (due to Fernet's timestamp-based nonce).
+    """
+    clear_vault_key("test_email_key")
+
+    df_identical = pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4, 5],
+            "name": ["Alice"] * 5,
+            "email": ["same@example.com"] * 5,  # All identical
+            "age": [30] * 5,
+            "salary": [60000.0] * 5,
+            "department": ["IT"] * 5,
+        }
+    )
+
+    result_df, _ = run_encrypt_op(encrypt_single_column_config, df_identical)
+
+    # Verify all encrypted values are unique (due to Fernet timestamp)
+    encrypted_values = result_df["email"].tolist()
+    assert (
+        len(set(encrypted_values)) == 5
+    ), "Fernet should produce unique ciphertexts even for identical plaintexts"
+
+    # Verify all decrypt to same original value
+    key = get_vault_key("test_email_key")
+    f = Fernet(key)
+    decrypted_values = [f.decrypt(enc.encode()).decode() for enc in encrypted_values]
+    assert all(
+        val == "same@example.com" for val in decrypted_values
+    ), "All encrypted values should decrypt to same original"
+
+
+@pytest.mark.edge_case
+def test_encrypt_whitespace_only_values(encrypt_single_column_config):
+    """
+    Edge case: Encryption of whitespace-only values
+    """
+    clear_vault_key("test_email_key")
+
+    df_whitespace = pd.DataFrame(
+        {
+            "id": [1, 2, 3],
+            "name": ["Alice", "Bob", "Charlie"],
+            "email": ["   ", "\t\t", "\n\n"],  # Various whitespace
+            "age": [25, 30, 35],
+            "salary": [50000.0, 60000.0, 70000.0],
+            "department": ["HR", "IT", "Finance"],
+        }
+    )
+
+    # Store original values before encryption
+    original_emails = df_whitespace["email"].tolist()
+
+    result_df, _ = run_encrypt_op(encrypt_single_column_config, df_whitespace)
+
+    # Verify whitespace values are encrypted and recoverable
+    key = get_vault_key("test_email_key")
+    f = Fernet(key)
+    encrypted_emails = result_df["email"].tolist()
+
+    for orig_ws, enc_val in zip(original_emails, encrypted_emails):
+        decrypted = f.decrypt(enc_val.encode()).decode()
+        assert (
+            decrypted == orig_ws
+        ), f"Whitespace value {repr(orig_ws)} should be preserved, but got {repr(decrypted)}"
+
+
+@pytest.mark.edge_case
+@pytest.mark.parametrize(
+    "column_type,test_values",
+    [
+        ("integer", [1, 2, 3, 4, 5]),
+        ("float", [1.1, 2.2, 3.3, 4.4, 5.5]),
+        ("string", ["a", "b", "c", "d", "e"]),
+    ],
+)
+def test_encrypt_various_data_types(column_type, test_values):
+    """
+    Parameterized test: Encryption across different pandas data types
+    """
+    clear_vault_key("test_type_key")
+
+    df = pd.DataFrame(
+        {
+            "id": range(len(test_values)),
+            "test_column": test_values,
+            "name": ["Person"] * len(test_values),
+            "email": ["test@example.com"] * len(test_values),
+            "age": [30] * len(test_values),
+            "salary": [60000.0] * len(test_values),
+            "department": ["IT"] * len(test_values),
+        }
+    )
+
+    config = AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt", columns=["test_column"], key_name="test_type_key"
+                )
+            )
+        ]
+    )
+
+    result_df, _ = run_encrypt_op(config, df)
+
+    # Verify encryption occurred (values changed to strings)
+    assert (
+        result_df["test_column"].dtype == object
+    ), f"Encrypted {column_type} should become object type"
+
+    # Verify decryption recovers original values
+    key = get_vault_key("test_type_key")
+    f = Fernet(key)
+    for idx, orig_val in enumerate(test_values):
+        decrypted = f.decrypt(result_df.loc[idx, "test_column"].encode()).decode()
+        assert decrypted == str(
+            orig_val
+        ), f"Decrypted value should match original {column_type} value"
diff --git a/tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py b/tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py
new file mode 100644
index 0000000..8d6a3cc
--- /dev/null
+++ b/tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py
@@ -0,0 +1,853 @@
+"""
+Test suite for field-level pseudonymisation operations on unstructured data.
+
+This test suite validates the pseudonymisation of unstructured text with PII detection,
+covering the following Acceptance Criteria:
+
+## Test Coverage Summary
+
+### Acceptance Criteria Coverage:
+- AC1 (Pseudonymisation and Retention Applied Correctly): 8 tests
+- AC2 (Invalid Execution Handling): 5 tests
+- AC3 (Execution Audit & Logging - Positive Scenario): 3 tests
+- AC4 (Execution Audit & Logging - Negative Scenario): 4 tests
+- Additional Coverage: 3 tests
+
+### Test Pattern:
+- Each test uses build_op_context with config_to_dagster_dict for configuration
+- Tests validate dual outputs (data, metrics)
+- Vault access is mocked for isolation
+- Tests validate Scrubadub automatic PII detection
+- Tests ensure placeholder replacement for unconfigured PII
+"""
+
+import pytest
+import re
+from dagster import build_op_context
+from unittest.mock import patch, MagicMock
+
+from template_code_location.field_level_pseudo_anonymisation.config_models.unstructured_config import (
+    AnonymisePseudonymizeUnstructuredConfig,
+    EncryptConfig,
+    RetainConfig,
+    PseudoTechniqueConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.config_models import PIIEntityEnum, LanguageEnum
+from template_code_location.field_level_pseudo_anonymisation.unstructured_ops import (
+    anonymize_pseudonymize_unstructured,
+)
+
+from .conftest import clear_vault_key
+
+
+def config_to_dagster_dict_unstructured(config):
+    """Convert unstructured config to Dagster format."""
+    config_dict = {"language": config.language.value, "used_function": []}
+
+    for func_config in config.used_function:
+        technique = func_config.technique
+        technique_type = technique.type
+        technique_dict = technique.model_dump()
+
+        if "pii" in technique_dict:
+            technique_dict["pii"] = [pii_enum.name for pii_enum in technique.pii]
+
+        technique_dict_without_type = {k: v for k, v in technique_dict.items() if k != "type"}
+
+        config_dict["used_function"].append(
+            {"technique": {technique_type: technique_dict_without_type}}
+        )
+
+    return config_dict
+
+
+def run_unstructured_op(config, text):
+    """
+    Helper to run unstructured pseudonymisation op.
+
+    Returns:
+        tuple: (result_text: str, metrics_markdown: str)
+    """
+    context = build_op_context(op_config=config_to_dagster_dict_unstructured(config))
+    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=text)
+
+    # Extract actual values from Output objects
+    return result_text.value, metrics.value
+
+
+def parse_metrics_markdown(metrics_md: str) -> dict:
+    """
+    Parse markdown metrics into structured dict for easier testing.
+
+    Args:
+        metrics_md: Markdown metrics string from op output
+
+    Returns:
+        dict with keys: total_pii_detected, pii_by_type, techniques_applied, language
+    """
+    result = {
+        "total_pii_detected": 0,
+        "pii_by_type": {},
+        "techniques_applied": {},
+        "language": "",
+    }
+
+    # Extract total PII detected
+    total_match = re.search(r"\*\*Total PII Detected\*\*:\s*(\d+)", metrics_md)
+    if total_match:
+        result["total_pii_detected"] = int(total_match.group(1))
+
+    # Extract language
+    lang_match = re.search(r"\*\*Language\*\*:\s*(\w+)", metrics_md)
+    if lang_match:
+        result["language"] = lang_match.group(1)
+
+    # Extract PII by type from table
+    pii_table_section = re.search(
+        r"### PII by Type\n\| Entity Type \| Count \|\n\|[^\n]+\n((?:\|[^\n]+\n)+)",
+        metrics_md,
+    )
+    if pii_table_section:
+        for line in pii_table_section.group(1).strip().split("\n"):
+            parts = [p.strip() for p in line.split("|") if p.strip()]
+            if len(parts) == 2:
+                entity_type, count = parts
+                result["pii_by_type"][entity_type] = int(count)
+
+    # Extract techniques applied
+    techniques_section = re.search(r"### Techniques Applied\n((?:- \*\*[^\n]+\n)+)", metrics_md)
+    if techniques_section:
+        for line in techniques_section.group(1).strip().split("\n"):
+            tech_match = re.match(r"-\s*\*\*(.+?)\*\*:\s*(.+)", line)
+            if tech_match:
+                pii_type, technique = tech_match.groups()
+                result["techniques_applied"][pii_type] = technique
+
+    return result
+
+
+# -------------------------------- Fixtures ----------------------------------------
+
+
+@pytest.fixture
+def sample_text_en():
+    """English text with various PII types."""
+    return """
+    John Smith works at Acme Corporation. His email is john.smith@example.com
+    and his phone number is +1-555-123-4567. He lives in New York City at
+    123 Main Street, Apartment 4B. His SSN is 123-45-6789.
+    """
+
+
+@pytest.fixture
+def sample_text_multi_person():
+    """Text with multiple person names."""
+    return """
+    The meeting included Alice Johnson, Bob Williams, and Charlie Brown.
+    They discussed the project with Maria Garcia and David Wilson.
+    """
+
+
+@pytest.fixture
+def sample_text_mixed_pii():
+    """Text with multiple PII types for AC1 comprehensive testing."""
+    return """
+    Contact Information:
+    Name: Dr. Emily Watson
+    Email: emily.watson@hospital.com
+    Phone: +44-20-7946-0958
+    Website: https://patient-portal.hospital.com/records
+    """
+
+
+@pytest.fixture
+def encrypt_person_config():
+    """Configuration to encrypt PERSON entities."""
+    return AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_person_key",
+                )
+            )
+        ],
+    )
+
+
+@pytest.fixture
+def retain_person_config():
+    """Configuration to retain PERSON entities unchanged."""
+    return AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PERSON]))
+        ],
+    )
+
+
+@pytest.fixture
+def mixed_technique_config():
+    """Configuration with encryption and retention for AC1 testing."""
+    return AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
+                    key_name="test_mixed_key",
+                )
+            ),
+            PseudoTechniqueConfig(
+                technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PHONE_NUMBERS])
+            ),
+        ],
+    )
+
+
+# ================================================================================================
+# AC1: Pseudonymisation and Retention Are Applied Correctly
+# ================================================================================================
+
+
+def test_ac1_encrypt_configured_pii_types(sample_text_mixed_pii, encrypt_person_config):
+    """AC1: Test that configured PII types are encrypted correctly."""
+    clear_vault_key("test_person_key")
+
+    result_text, metrics_md = run_unstructured_op(encrypt_person_config, sample_text_mixed_pii)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify person name is encrypted (not in plaintext)
+    assert "Emily Watson" not in result_text, "Configured PERSON PII should be encrypted"
+
+    # Verify encryption token is present
+    assert "{encrypt:" in result_text, "Encrypted token should be present in result"
+
+    # Verify PII was detected and processed
+    assert metrics["total_pii_detected"] > 0, "System should detect PII entities"
+    assert "PERSON" in metrics["pii_by_type"], "PERSON type should be in detected PII"
+
+    # Verify text structure is preserved (surrounding text intact)
+    assert "Contact Information:" in result_text, "Non-PII text structure should be preserved"
+
+
+def test_ac1_retain_configured_pii_unchanged(sample_text_multi_person):
+    """AC1: Test that PII types marked for retention remain unchanged."""
+    retain_config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PERSON]))
+        ],
+    )
+
+    result_text, metrics_md = run_unstructured_op(retain_config, sample_text_multi_person)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify retained PII types remain in plaintext
+    assert "Alice Johnson" in result_text, "Retained PERSON PII should remain unchanged"
+    assert "Bob Williams" in result_text, "Retained PERSON PII should remain unchanged"
+
+    # Verify technique applied is 'retain'
+    assert (
+        "retain" in metrics["techniques_applied"].get("PERSON", "").lower()
+    ), "Retain technique should be recorded for PERSON type"
+
+
+def test_ac1_unconfigured_pii_replaced_with_placeholders(sample_text_mixed_pii):
+    """AC1: Test that unconfigured PII types are replaced with placeholders."""
+    encrypt_person_only = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_person_only_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_person_only_key")
+
+    result_text, metrics_md = run_unstructured_op(encrypt_person_only, sample_text_mixed_pii)
+
+    # Verify person is encrypted (configured)
+    assert "Emily Watson" not in result_text, "Configured PERSON should be encrypted"
+
+    # Verify unconfigured PII types have placeholders
+    assert (
+        "{{" in result_text and "}}" in result_text
+    ), "Unconfigured PII should be replaced with placeholders"
+
+    # Verify original unconfigured PII values are not in result
+    assert (
+        "emily.watson@hospital.com" not in result_text
+    ), "Unconfigured EMAIL should be replaced with placeholder"
+
+    # Verify placeholder format
+    assert (
+        "{{EMAIL}}" in result_text or "{{URL}}" in result_text
+    ), "Placeholders should indicate entity type"
+
+
+def test_ac1_mixed_techniques_applied_correctly(sample_text_mixed_pii, mixed_technique_config):
+    """AC1: Test that multiple techniques (encrypt, retain) are applied correctly."""
+    clear_vault_key("test_mixed_key")
+
+    result_text, metrics_md = run_unstructured_op(mixed_technique_config, sample_text_mixed_pii)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify encrypted PII types (PERSON, EMAIL)
+    assert "Emily Watson" not in result_text, "Configured PERSON should be encrypted"
+    assert "emily.watson@hospital.com" not in result_text, "Configured EMAIL should be encrypted"
+
+    # Verify retained PII type (PHONE_NUMBERS)
+    assert "+44-20-7946-0958" in result_text, "Configured PHONE_NUMBERS should be retained"
+
+    # Verify metrics reflect different techniques
+    assert (
+        "encrypt" in metrics["techniques_applied"].get("PERSON", "").lower()
+    ), "Encrypt technique should be applied to PERSON"
+    assert (
+        "encrypt" in metrics["techniques_applied"].get("EMAIL", "").lower()
+    ), "Encrypt technique should be applied to EMAIL"
+    assert (
+        "retain" in metrics["techniques_applied"].get("PHONE_NUMBERS", "").lower()
+    ), "Retain technique should be applied to PHONE_NUMBERS"
+
+
+def test_ac1_multiple_instances_same_pii_type(sample_text_multi_person, encrypt_person_config):
+    """AC1: Test that all instances of a configured PII type are processed."""
+    clear_vault_key("test_person_key")
+
+    result_text, metrics_md = run_unstructured_op(encrypt_person_config, sample_text_multi_person)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify all person names are encrypted
+    person_names = [
+        "Alice Johnson",
+        "Bob Williams",
+        "Charlie Brown",
+        "Maria Garcia",
+        "David Wilson",
+    ]
+    for name in person_names:
+        assert name not in result_text, f"All PERSON instances should be encrypted: {name}"
+
+    # Verify metrics count multiple instances
+    assert metrics["pii_by_type"].get("PERSON", 0) >= len(
+        person_names
+    ), f"Should detect at least {len(person_names)} PERSON entities"
+
+
+def test_ac1_empty_text_returns_empty(encrypt_person_config):
+    """AC1: Test that empty or null text input raises a ValueError."""
+    clear_vault_key("test_person_key")
+
+    with pytest.raises(ValueError) as exc_info:
+        run_unstructured_op(encrypt_person_config, "")
+
+    assert "empty" in str(exc_info.value).lower(), "Error should indicate empty input"
+
+
+def test_ac1_text_without_pii_remains_unchanged():
+    """AC1: Test that text without any PII remains unchanged after processing."""
+    no_pii_text = """
+    The weather today is sunny with a high of 25 degrees Celsius.
+    The conference starts at 9:00 AM in Room 301.
+    """
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_no_pii_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_no_pii_key")
+
+    result_text, metrics_md = run_unstructured_op(config, no_pii_text)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    assert result_text.strip() == no_pii_text.strip(), "Text without PII should remain unchanged"
+    assert metrics["total_pii_detected"] == 0, "No PII should be detected"
+
+
+def test_ac1_placeholder_format_indicates_entity_type(sample_text_mixed_pii):
+    """AC1: Test that placeholders for unconfigured PII indicate the entity type."""
+    encrypt_person_only = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_placeholder_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_placeholder_key")
+
+    result_text, metrics_md = run_unstructured_op(encrypt_person_only, sample_text_mixed_pii)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify placeholder format (scrubadub uses {{TYPE}} format)
+    placeholder_pattern = r"\{\{[A-Z_]+\}\}"
+    placeholders = re.findall(placeholder_pattern, result_text)
+
+    assert (
+        len(placeholders) > 0
+    ), "Result should contain entity-type placeholders for unconfigured PII"
+
+    # Verify metrics track which PII types were detected
+    assert len(metrics["pii_by_type"]) > 0, "Metrics should list detected PII types"
+
+
+# ================================================================================================
+# AC2: Invalid Execution Handling
+# ================================================================================================
+
+
+def test_ac2_graceful_abort_on_scrubadub_failure():
+    """AC2: Test graceful abort when the PII detection engine (Scrubadub) fails."""
+    text = "Test user John Smith with email john@example.com"
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_abort_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_abort_key")
+
+    # Mock Scrubadub to fail at the right import path
+    with patch(
+        "field_level_pseudo_anonymisation.unstructured_ops.scrubadub.Scrubber"
+    ) as mock_scrubber_class:
+        mock_scrubber = MagicMock()
+        mock_scrubber.iter_filth.side_effect = RuntimeError("Scrubadub internal error")
+        mock_scrubber_class.return_value = mock_scrubber
+
+        with pytest.raises(RuntimeError) as exc_info:
+            run_unstructured_op(config, text)
+
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "pii" in error_msg
+            or "detection" in error_msg
+            or "scrubadub" in error_msg
+            or "failed" in error_msg
+        ), "Error message should indicate PII detection failure"
+
+
+def test_ac2_graceful_abort_on_encryption_failure(sample_text_en):
+    """AC2: Test graceful abort when an encryption technique fails during execution."""
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_encrypt_fail_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_encrypt_fail_key")
+
+    # Mock encrypt function at correct path - it's imported from techniques module
+    encrypt_path = (
+        "field_level_pseudo_anonymisation"
+        ".techniques.anonymisation_pseudonymisation_techniques.encrypt"
+    )
+    with patch(encrypt_path) as mock_encrypt:
+        mock_encrypt.side_effect = Exception("Encryption algorithm failure")
+
+        with pytest.raises(RuntimeError) as exc_info:
+            run_unstructured_op(config, sample_text_en)
+
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "encrypt" in error_msg or "failed" in error_msg or "technique" in error_msg
+        ), "Error message should indicate encryption failure"
+
+
+def test_ac2_null_text_input_raises_error(encrypt_person_config):
+    """AC2: Test that a null (None) text input is rejected with an error."""
+    clear_vault_key("test_person_key")
+
+    # Dagster will raise DagsterTypeCheckDidNotPass before op executes
+    from dagster import DagsterTypeCheckDidNotPass
+
+    with pytest.raises((ValueError, DagsterTypeCheckDidNotPass, TypeError)):
+        run_unstructured_op(encrypt_person_config, None)
+
+
+def test_ac2_invalid_language_configuration():
+    """AC2: Test that an unsupported language in the config raises a validation error."""
+    # This should fail at config creation due to Pydantic validation
+    with pytest.raises((ValueError, TypeError)):
+        AnonymisePseudonymizeUnstructuredConfig(
+            language="invalid_lang",  # Should fail Pydantic validation
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        type="encrypt", pii=[PIIEntityEnum.PERSON], key_name="test_key"
+                    )
+                )
+            ],
+        )
+
+
+def test_ac2_very_large_text_processing():
+    """AC2: Test that very large text inputs are processed successfully without memory errors."""
+    # Create large text with repeated PII patterns
+    large_text = (
+        """
+    John Smith works at company. Email: john.smith@example.com.
+    """
+        * 1000
+    )  # ~60KB of text with repeated PII
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
+                    key_name="test_large_text_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_large_text_key")
+
+    result_text, metrics_md = run_unstructured_op(config, large_text)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify processing completed
+    assert result_text is not None, "Large text should be processed successfully"
+    assert len(result_text) > 0, "Result should not be empty"
+    assert metrics["total_pii_detected"] > 0, "PII should be detected in large text"
+
+
+# ================================================================================================
+# AC3: Execution Audit & Logging - Positive Scenario
+# ================================================================================================
+
+
+def test_ac3_successful_execution_logs_timestamp_and_run_id(sample_text_en, encrypt_person_config):
+    """AC3: Test that successful execution context contains a run ID for logging."""
+    clear_vault_key("test_person_key")
+
+    op_config_dict = config_to_dagster_dict_unstructured(encrypt_person_config)
+    context = build_op_context(op_config=op_config_dict)
+
+    # Capture run context
+    run_id = context.run_id
+
+    # Execute operation
+    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_en)
+
+    # Verify run identifier is available for logging
+    assert run_id is not None, "Run ID must be available for audit logging"
+
+    # Verify outputs are returned (for Dagster to log)
+    assert result_text is not None, "Result text should be available for logging"
+    assert metrics is not None, "Metrics should be available for logging"
+
+
+def test_ac3_successful_execution_logs_configuration_parameters(
+    sample_text_en, mixed_technique_config
+):
+    """AC3: Test that the used configuration is accessible for logging on success."""
+    clear_vault_key("test_mixed_key")
+
+    op_config_dict = config_to_dagster_dict_unstructured(mixed_technique_config)
+    context = build_op_context(op_config=op_config_dict)
+
+    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_en)
+
+    # Verify configuration is captured and accessible
+    assert "used_function" in op_config_dict, "Configuration must be accessible for logging"
+    assert len(op_config_dict["used_function"]) == 2, "Multiple techniques should be captured"
+
+    # Verify techniques are logged
+    techniques = [func["technique"] for func in op_config_dict["used_function"]]
+    assert any(
+        "encrypt" in str(tech) for tech in techniques
+    ), "Encrypt technique should be in configuration"
+    assert any(
+        "retain" in str(tech) for tech in techniques
+    ), "Retain technique should be in configuration"
+
+    # Verify metrics contain technique information (in markdown string)
+    metrics_str = metrics.value
+    assert (
+        "Techniques Applied" in metrics_str
+    ), "Applied techniques should be in metrics for logging"
+
+
+def test_ac3_successful_execution_logs_no_raw_pii(sample_text_mixed_pii, encrypt_person_config):
+    """AC3: Test that logs and metrics from a successful run do not contain raw PII."""
+    clear_vault_key("test_person_key")
+
+    op_config_dict = config_to_dagster_dict_unstructured(encrypt_person_config)
+    context = build_op_context(op_config=op_config_dict)
+
+    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_mixed_pii)
+
+    # Verify raw PII values are not in metrics
+    metrics_str = metrics.value
+
+    sensitive_values = ["Emily Watson", "emily.watson@hospital.com", "+44-20-7946-0958"]
+
+    for pii_value in sensitive_values:
+        assert (
+            pii_value not in metrics_str
+        ), f"Raw PII value should not appear in metrics: {pii_value}"
+
+    # Verify configuration logs do not contain raw PII
+    config_str = str(op_config_dict)
+    for pii_value in sensitive_values:
+        assert (
+            pii_value not in config_str
+        ), f"Raw PII value should not appear in configuration logs: {pii_value}"
+
+
+# ================================================================================================
+# AC4: Execution Audit & Logging - Negative Scenario
+# ================================================================================================
+
+
+def test_ac4_failed_execution_logs_error_details():
+    """AC4: Negative execution should surface clear error details (encryption key failure)."""
+    text = "Test user John Smith"
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_fail_log_key",
+                )
+            )
+        ],
+    )
+    clear_vault_key("test_fail_log_key")
+    ctx = build_op_context(op_config=config_to_dagster_dict_unstructured(config))
+
+    # Patch the key retrieval used inside unstructured_ops to force failure
+    with patch(
+        "field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key",
+        side_effect=RuntimeError("Encryption key retrieval failed"),
+    ):
+        with pytest.raises(RuntimeError) as exc_info:
+            # Consume the generator to trigger execution and raise the exception
+            list(anonymize_pseudonymize_unstructured(ctx, text=text))
+
+        msg = str(exc_info.value).lower()
+        assert "key" in msg and "failed" in msg, "Error message should mention key failure"
+
+
+def test_ac4_failed_execution_logs_configuration_used():
+    """AC4: Test that the attempted configuration is available for logging on failure."""
+    text = "Test data with person John Doe"
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_config_fail_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_config_fail_key")
+
+    op_config_dict = config_to_dagster_dict_unstructured(config)
+    context = build_op_context(op_config=op_config_dict)
+
+    # Mock _initialize_scrubber to fail
+    with patch(
+        "field_level_pseudo_anonymisation.unstructured_ops._initialize_scrubber"
+    ) as mock_init_scrubber:
+        mock_init_scrubber.side_effect = Exception("Scrubber module not available")
+
+        with pytest.raises((RuntimeError, Exception)) as exc_info:
+            list(anonymize_pseudonymize_unstructured(context, text=text))
+
+        # Verify configuration is still accessible despite failure
+        assert op_config_dict is not None, "Configuration must be accessible for failure audit"
+        assert (
+            "used_function" in op_config_dict
+        ), "Technique configuration should be available for diagnosis"
+
+        # Verify error was raised with proper message
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "pii" in error_msg
+            or "detection" in error_msg
+            or "failed" in error_msg
+            or "scrubber" in error_msg
+            or "module" in error_msg
+        ), "Error should indicate detection/processing failed"
+
+
+def test_ac4_failed_execution_logs_failure_reason():
+    """AC4: Test that the reason for a failure is clearly indicated in the error message."""
+    text = "User: Alice Smith, Email: alice@example.com"
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
+                    key_name="test_failure_reason_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_failure_reason_key")
+
+    # Mock key retrieval function to fail
+    with patch(
+        "field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key"
+    ) as mock_get_key:
+        mock_get_key.side_effect = RuntimeError("Vault connection timeout")
+
+        with pytest.raises(RuntimeError) as exc_info:
+            run_unstructured_op(config, text)
+
+        # Verify failure reason is in error message
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "encrypt" in error_msg
+            or "key" in error_msg
+            or "timeout" in error_msg
+            or "failed" in error_msg
+        ), "Error should indicate key retrieval/encryption failure"
+
+
+# ================================================================================================
+# Additional Tests - Edge Cases and Integration
+# ================================================================================================
+
+
+def test_multi_language_support_italian():
+    """Additional test: Verify that Italian text is processed correctly."""
+    italian_text = """
+    Il dottor Marco Rossi lavora presso l'ospedale.
+    Email: marco.rossi@ospedale.it
+    Telefono: +39-06-12345678
+    """
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.it,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_italian_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_italian_key")
+
+    result_text, metrics_md = run_unstructured_op(config, italian_text)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify processing occurred
+    assert result_text != italian_text, "Italian text should be processed"
+    assert metrics["total_pii_detected"] > 0, "PII should be detected in Italian text"
+
+
+def test_special_characters_in_text():
+    """Additional test: Verify handling of text with special Unicode characters."""
+    special_text = """
+    User: João da Silva 🇧🇷
+    Email: joão@empresa.com.br
+    Message: "Hello, World!" — Testing special chars: €, £, ¥, ©, ®
+    """
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.pt,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
+                    key_name="test_special_chars_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_special_chars_key")
+
+    result_text, metrics_md = run_unstructured_op(config, special_text)
+
+    # Verify processing completed without encoding errors
+    assert result_text is not None, "Special characters should not cause processing failure"
+    assert len(result_text) > 0, "Result should not be empty"
+
+
+def test_deterministic_encryption_within_session(sample_text_en, encrypt_person_config):
+    """Additional test: Verify encryption format consistency across runs."""
+    clear_vault_key("test_person_key")
+
+    result1, metrics_md1 = run_unstructured_op(encrypt_person_config, sample_text_en)
+    result2, metrics_md2 = run_unstructured_op(encrypt_person_config, sample_text_en)
+
+    # Both should have encryption tokens
+    assert "{encrypt:" in result1, "First run should produce encrypted tokens"
+    assert "{encrypt:" in result2, "Second run should produce encrypted tokens"
+
+    # Verify consistent PII detection
+    metrics1 = parse_metrics_markdown(metrics_md1)
+    metrics2 = parse_metrics_markdown(metrics_md2)
+
+    assert (
+        metrics1["total_pii_detected"] == metrics2["total_pii_detected"]
+    ), "PII detection should be consistent across runs"
+
+    # Verify token format is consistent (Fernet base64 pattern)
+    token_pattern = r"\{encrypt:gAAAAAB[A-Za-z0-9+/=_-]+\}"
+    tokens1 = re.findall(token_pattern, result1)
+    tokens2 = re.findall(token_pattern, result2)
+
+    assert len(tokens1) == len(tokens2), "Same number of encryption tokens should be generated"
diff --git a/tests/field_level_pseudo_anonymisation/test_jobs.py b/tests/field_level_pseudo_anonymisation/test_jobs.py
new file mode 100644
index 0000000..616c3d5
--- /dev/null
+++ b/tests/field_level_pseudo_anonymisation/test_jobs.py
@@ -0,0 +1,58 @@
+from template_code_location.field_level_pseudo_anonymisation.jobs import (
+    anonymize_pseudonymize_structured_job,
+    anonymize_pseudonymize_structured_job_s3,
+    depseudonymize_structured_job,
+    depseudonymize_structured_job_s3,
+    anonymize_pseudonymize_unstructured_job_s3,
+    anonymize_pseudonymize_unstructured_job,
+    depseudonymize_unstructured_job_s3,
+    depseudonymize_unstructured_job
+)
+
+
+def test_anonymize_pseudonymize_structured_job_is_callable():
+    """Test anonymize_pseudonymize_structured_job is a valid Dagster job"""
+    assert callable(anonymize_pseudonymize_structured_job)
+    assert hasattr(anonymize_pseudonymize_structured_job, 'execute_in_process')
+
+
+def test_anonymize_pseudonymize_structured_job_s3_is_callable():
+    """Test anonymize_pseudonymize_structured_job_s3 is a valid Dagster job"""
+    assert callable(anonymize_pseudonymize_structured_job_s3)
+    assert hasattr(anonymize_pseudonymize_structured_job_s3, 'execute_in_process')
+
+
+def test_depseudonymize_structured_job_is_callable():
+    """Test depseudonymize_structured_job is a valid Dagster job"""
+    assert callable(depseudonymize_structured_job)
+    assert hasattr(depseudonymize_structured_job, 'execute_in_process')
+
+
+def test_depseudonymize_structured_job_s3_is_callable():
+    """Test depseudonymize_structured_job_s3 is a valid Dagster job"""
+    assert callable(depseudonymize_structured_job_s3)
+    assert hasattr(depseudonymize_structured_job_s3, 'execute_in_process')
+
+
+def test_anonymize_pseudonymize_unstructured_job_is_callable():
+    """Test anonymize_pseudonymize_unstructured_job is a valid Dagster job"""
+    assert callable(anonymize_pseudonymize_unstructured_job)
+    assert hasattr(anonymize_pseudonymize_unstructured_job, 'execute_in_process')
+
+
+def test_anonymize_pseudonymize_unstructured_job_s3_is_callable():
+    """Test anonymize_pseudonymize_unstructured_job_s3 is a valid Dagster job"""
+    assert callable(anonymize_pseudonymize_unstructured_job_s3)
+    assert hasattr(anonymize_pseudonymize_unstructured_job_s3, 'execute_in_process')
+
+
+def test_depseudonymize_unstructured_job_is_callable():
+    """Test depseudonymize_unstructured_job is a valid Dagster job"""
+    assert callable(depseudonymize_unstructured_job)
+    assert hasattr(depseudonymize_unstructured_job, 'execute_in_process')
+
+
+def test_depseudonymize_unstructured_job_s3_is_callable():
+    """Test depseudonymize_unstructured_job_s3 is a valid Dagster job"""
+    assert callable(depseudonymize_unstructured_job_s3)
+    assert hasattr(depseudonymize_unstructured_job_s3, 'execute_in_process')

From 49f3afd6abbd7d60ac23654402d2d01f574af42e Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Fri, 24 Apr 2026 18:42:44 +0200
Subject: [PATCH 03/15] docs(SIMPL-24642): update Development Guide to reflect
 consolidated structure

---
 documents/Development Guide.md | 39 ++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/documents/Development Guide.md b/documents/Development Guide.md
index 0f140ad..23c60d7 100644
--- a/documents/Development Guide.md	
+++ b/documents/Development Guide.md	
@@ -9,18 +9,35 @@ By following a *code-first approach*, developers ensure consistency, traceabilit
 Development must always begin in a local environment. This allows developers to rapidly iterate, test business logic, and validate DAG (Directed Acyclic Graph) structures without impacting production data.
 
 ### 2.1 Project Layout
-To ensure compatibility with the Simpl-Open platform, every Dagster code location must adhere to the following directory structure:
+This repository (`template-code-location`) serves as the **single consolidated code location** for all data services workflows. It contains the jobs, ops, and configurations previously spread across `data-processing`, `dataframe-level-anonymisation`, and `field-level-pseudo-anonymisation`.
+
 ```text
-project-root/
-├── dagster_code_location/
-│   ├── jobs/           # Executable workflows
-│   ├── ops/            # Individual functional units (business logic)
-│   ├── resources/      # External connections (Object storage, APIs, etc...)
-│   └── repository.py   # Central entry point for the code location
-├── tests/              # Unit and integration tests
-├── Dockerfile          # Containerization instructions
-├── pyproject.toml      # Dependency management (Poetry/Pip/UV)
-└── README.md           # Documentation
+template-code-location/
+├── src/
+│   └── template_code_location/
+│       ├── repository.py                  # Unified entry point (all jobs/sensors/resources)
+│       ├── data_processing/               # Data cleaning & transformation ops/jobs
+│       │   ├── config_models/
+│       │   ├── jobs.py
+│       │   └── ops.py
+│       ├── dataframe_level_anonymisation/  # k-anonymity, l-diversity, t-closeness
+│       │   ├── config_models/
+│       │   ├── jobs.py
+│       │   ├── ops.py
+│       │   └── utils.py
+│       ├── field_level_pseudo_anonymisation/  # Field-level encryption/hashing/redaction
+│       │   ├── config_models/
+│       │   ├── techniques/
+│       │   ├── jobs.py
+│       │   ├── ops.py
+│       │   ├── unstructured_ops.py
+│       │   └── utils.py
+│       ├── jobs/                           # Template example jobs
+│       └── ops/                            # Template example ops
+├── tests/                                  # All tests (migrated from source repos)
+├── Dockerfile
+├── pyproject.toml
+└── README.md
 ```
 
 ### 2.2 Code Examples (Ops, Jobs, and Definitions)

From 0847026b3243a4d4d8179b68b5b20339c0d6c765 Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Fri, 24 Apr 2026 19:14:36 +0200
Subject: [PATCH 04/15] fix: loosen numpy>=2.0.1 to resolve anjana dependency
 conflict

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3b2741f..4c6f2dc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ dependencies = [
     # Data processing
     "pandas>=2.1.4",
     "pyarrow>=23.0",
-    "numpy>=2.4",
+    "numpy>=2.0.1",
     "lxml>=6.0",
     "xmltodict>=1.0",
     "rdflib>=7.6",

From bdfbe3d3102227e0859f655372dfadc2be976d31 Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Mon, 27 Apr 2026 18:18:38 +0200
Subject: [PATCH 05/15] change pip to uv and update dependencies

---
 Dockerfile            | 64 +++++++++++++++++++++++++++++++++++++++----
 pipeline.variables.sh |  2 +-
 pyproject.toml        | 26 +++++++++++++-----
 3 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index fd4e780..0c997fb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,15 +1,67 @@
 FROM python:3.12-slim-bookworm
 
-# Install git for git-based dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
+# --- Install uv (pinned for reproducibility) ---
+COPY --from=ghcr.io/astral-sh/uv:0.10.8 /uv /uvx /bin/
 
 WORKDIR /app
 
-COPY pyproject.toml .
-COPY src/ src/
+# Create non-root user with explicit UID/GID 1000
+RUN addgroup --gid 1000 appgroup && \
+    adduser --uid 1000 --gid 1000 --disabled-password --gecos "" appuser
 
-# Install the package and all dependencies
-RUN pip install --no-cache-dir .
+# Install system dependencies:
+#   - git: required to fetch util-services from GitLab (tool.uv.sources)
+#   - build-essential / gcc / g++ / python3-dev / cmake: native extensions
+#     (scrubadub-spacy → spaCy, pycanon, etc.)
+#   - curl: optional healthcheck / runtime tooling
+RUN apt-get update && apt-get upgrade -y \
+    && apt-get install -y --no-install-recommends \
+    build-essential=12.9 \
+    cmake=3.25.1-1 \
+    gcc=4:12.2.0-3 \
+    g++=4:12.2.0-3 \
+    python3-dev=3.11.2-1+b1 \
+    git=1:2.39.5-0+deb12u3 \
+    curl=7.88.1-10+deb12u14 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    && rm -rf /tmp/* \
+    && rm -rf /var/tmp/*
+
+# Pre-own /app so appuser can write to it
+RUN chown -R appuser:appgroup /app
+
+# Copy project metadata and source
+COPY pyproject.toml .
+COPY src/ ./src/
+
+# uv environment knobs:
+#   UV_COMPILE_BYTECODE  → compile .pyc files at install time for faster cold start
+#   UV_LINK_MODE=copy    → copy files instead of symlinks (required in Docker layers)
+#   UV_SYSTEM_PYTHON=1   → install into the system Python (no extra venv needed)
+ENV UV_COMPILE_BYTECODE=1
+ENV UV_LINK_MODE=copy
+ENV UV_SYSTEM_PYTHON=1
+
+# Install the project and all dependencies, respecting [tool.uv.sources]
+# (git source for util-services and pytorch-cpu index for torch)
+# BuildKit cache mount keeps the uv package cache across builds
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install .
+
+ENV PYTHONPATH="/app/src"
+
+# Make /app writable for the non-root user (e.g. spaCy model downloads)
+RUN chown -R 1000:1000 /app && chmod -R u+w /app
+
+# Provide a real home directory for appuser
+RUN mkdir -p /home/appuser && chown -R 1000:1000 /home/appuser
+ENV HOME=/home/appuser
+
+USER appuser
+
+# Sanity-check: fail the build early if the dagster CLI is missing
+RUN dagster --version
 
 EXPOSE 4000
 
diff --git a/pipeline.variables.sh b/pipeline.variables.sh
index 3292612..4a3f9c4 100644
--- a/pipeline.variables.sh
+++ b/pipeline.variables.sh
@@ -1 +1 @@
-PROJECT_VERSION_NUMBER="0.0.1"
\ No newline at end of file
+PROJECT_VERSION_NUMBER="0.1.0"
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 4c6f2dc..7897316 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ dependencies = [
     "lxml>=6.0",
     "xmltodict>=1.0",
     "rdflib>=7.6",
-    "openpyxl",
+    "openpyxl>=3.1.0",
     "xlrd>=2.0.1",
     "tabulate==0.8.10",
     "pyspellchecker>=0.8.4",
@@ -35,14 +35,26 @@ dependencies = [
     "pycanon==1.0.1.post2",
     "anjana>=1.0.0",
     # Field-level pseudo-anonymisation
-    "scrubadub",
-    "scrubadub_spacy",
-    "hvac",
-    "cryptography",
-    # Util services (git dependency)
-    "util-services @ git+https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git@v0.4.1",
+    "scrubadub>=2.0.0",
+    "scrubadub_spacy>=1.0.0",
+    "hvac>=2.0.0",
+    "cryptography>=42.0.0",
+    # Util services — resolved via [tool.uv.sources] (git)
+    "util-services",
 ]
 
+[tool.uv]
+exclude-dependencies = ["transformers", "spacy-transformers"]
+
+[tool.uv.sources]
+torch = { index = "pytorch-cpu" }
+util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "feature/SIMPL-24631" }
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
 [project.optional-dependencies]
 dev = [
     "pytest>=8.0.0",

From b58e399130691ef93869b9d2003dbb45d4de4c5b Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Mon, 27 Apr 2026 18:52:34 +0200
Subject: [PATCH 06/15] update data processing jobs to use structured data
 functions

---
 .../data_processing/jobs.py                   | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/template_code_location/data_processing/jobs.py b/src/template_code_location/data_processing/jobs.py
index 54fb939..674e3a1 100644
--- a/src/template_code_location/data_processing/jobs.py
+++ b/src/template_code_location/data_processing/jobs.py
@@ -1,8 +1,8 @@
 from dagster import job
 from util_services.util_ops import (
     preview_dataframe,
-    read_csv_from_s3,
-    write_csv_to_s3,
+    read_structured_from_s3,
+    write_df_to_s3,
 )
 from .ops import (
     remove_duplicates,
@@ -21,10 +21,10 @@ from .ops import (
     "resource_type": "RD_DATA"
 })
 def remove_duplicates_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = remove_duplicates(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)
 
 
@@ -33,10 +33,10 @@ def remove_duplicates_job_s3():
     "resource_type": "RD_DATA"
 })
 def fill_missing_values_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = fill_missing_values(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)
 
 
@@ -45,10 +45,10 @@ def fill_missing_values_job_s3():
     "resource_type": "RD_DATA"
 })
 def standardize_categorical_values_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = standardize_categorical_values(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)
 
 
@@ -57,10 +57,10 @@ def standardize_categorical_values_job_s3():
     "resource_type": "RD_DATA"
 })
 def correct_typos_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = correct_typos(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)
 
 @job(tags={
@@ -68,10 +68,10 @@ def correct_typos_job_s3():
     "resource_type": "RD_DATA"
 })
 def normalize_numeric_min_max_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = normalize_numeric_min_max(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)
 
 @job(tags={
@@ -79,10 +79,10 @@ def normalize_numeric_min_max_job_s3():
     "resource_type": "RD_DATA"
 })
 def normalize_datetime_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = normalize_datetime(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)
 
 @job(tags={
@@ -90,10 +90,10 @@ def normalize_datetime_job_s3():
     "resource_type": "RD_DATA"
 })
 def normalize_coordinates_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = normalize_coordinates(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)
 
 @job(tags={
@@ -101,10 +101,10 @@ def normalize_coordinates_job_s3():
     "resource_type": "RD_DATA"
 })
 def add_global_aggregations_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = add_global_aggregations(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)
 
 @job(tags={
@@ -112,8 +112,8 @@ def add_global_aggregations_job_s3():
     "resource_type": "RD_DATA"
 })
 def filter_dataset_job_s3():
-    org_df = read_csv_from_s3()
+    org_df = read_structured_from_s3()
     anon_df = filter_dataset(org_df)
     preview_dataframe(org_df)
-    write_csv_to_s3(anon_df)
+    write_df_to_s3(anon_df)
     preview_dataframe(anon_df)

From 1fc7c7864a5a4f896fae45a2a2386496a9f154dd Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Wed, 29 Apr 2026 15:33:18 +0200
Subject: [PATCH 07/15] fix: update tabulate

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7897316..ba3c8c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "rdflib>=7.6",
     "openpyxl>=3.1.0",
     "xlrd>=2.0.1",
-    "tabulate==0.8.10",
+    "tabulate>=0.9",
     "pyspellchecker>=0.8.4",
     "PyGeodesy>=24.6.11",
     # Validation

From bba5b99420a5c79fc3a6e9ac3e51ce5e29c395cf Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Tue, 5 May 2026 16:37:38 +0200
Subject: [PATCH 08/15] expose data_processing_job for test

---
 src/template_code_location/repository.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/template_code_location/repository.py b/src/template_code_location/repository.py
index cf97606..f825e85 100644
--- a/src/template_code_location/repository.py
+++ b/src/template_code_location/repository.py
@@ -36,8 +36,11 @@ from template_code_location.field_level_pseudo_anonymisation.jobs import (
     depseudonymize_unstructured_job_s3,
 )
 
+from template_code_location.jobs import data_processing_job
+
 defs = Definitions(
     jobs=[
+        data_processing_job,
         # Data processing
         remove_duplicates_job_s3,
         fill_missing_values_job_s3,

From f0cac061b8e4af5b13bff3f559c4d15e7ed8135e Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Tue, 5 May 2026 16:48:47 +0200
Subject: [PATCH 09/15] update util-services

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index ba3c8c7..de7ac13 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,7 @@ exclude-dependencies = ["transformers", "spacy-transformers"]
 
 [tool.uv.sources]
 torch = { index = "pytorch-cpu" }
-util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "feature/SIMPL-24631" }
+util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "v0.5.0" }
 
 [[tool.uv.index]]
 name = "pytorch-cpu"

From 2e6e78855290354e69c5645b53dbbb29ff9ddbde Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Tue, 5 May 2026 17:07:07 +0200
Subject: [PATCH 10/15] rename field-level ops and jobs

---
 .../field_level_pseudo_anonymisation/jobs.py  | 64 +++++++++----------
 src/template_code_location/repository.py      | 16 ++---
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/src/template_code_location/field_level_pseudo_anonymisation/jobs.py b/src/template_code_location/field_level_pseudo_anonymisation/jobs.py
index 56baf11..0f39cfb 100644
--- a/src/template_code_location/field_level_pseudo_anonymisation/jobs.py
+++ b/src/template_code_location/field_level_pseudo_anonymisation/jobs.py
@@ -3,13 +3,13 @@ from util_services.util_ops import (
     preview_dataframe,
     read_structured_to_df,
     write_df_to_local,
-    write_string_to_txt,
-    read_txt_to_string,
-    preview_txt,
+    write_string_to_unstructured,
+    read_unstructured_to_string,
+    preview_unstructured,
     read_structured_from_s3,
     write_df_to_s3,
-    read_txt_from_s3,
-    write_text_to_s3,
+    read_unstructured_from_s3,
+    write_unstructured_to_s3,
 )
 from .ops import (
     anonymize_pseudonymize_structured,
@@ -23,7 +23,7 @@ from .unstructured_ops import (
 @job(tags={
     "business_operation": "ANONYMISATION_PSEUDONYMISATION"
 })
-def anonymize_pseudonymize_structured_job():
+def anonymise_pseudonymise_structured_job():
     df = read_structured_to_df()
     preview_dataframe(df)
     df_anon, metrics = anonymize_pseudonymize_structured(df)
@@ -35,7 +35,7 @@ def anonymize_pseudonymize_structured_job():
     "business_operation": "ANONYMISATION_PSEUDONYMISATION",
     "resource_type": "RD_DATA"
 })
-def anonymize_pseudonymize_structured_job_s3():
+def anonymise_pseudonymise_structured_job_s3():
     df = read_structured_from_s3()
     preview_dataframe(df)
     df_anon, metrics = anonymize_pseudonymize_structured(df)
@@ -46,7 +46,7 @@ def anonymize_pseudonymize_structured_job_s3():
 @job(tags={
     "business_operation": "DEPSEUDONYMISATION"
 })
-def depseudonymize_structured_job():
+def depseudonymise_structured_job():
     df = read_structured_to_df()
     preview_dataframe(df)
     df_anon, metrics = depseudonymize_structured(df)
@@ -58,7 +58,7 @@ def depseudonymize_structured_job():
     "business_operation": "DEPSEUDONYMISATION",
     "resource_type": "RD_DATA"
 })
-def depseudonymize_structured_job_s3():
+def depseudonymise_structured_job_s3():
     df = read_structured_from_s3()
     preview_dataframe(df)
     df_anon, metrics = depseudonymize_structured(df)
@@ -69,7 +69,7 @@ def depseudonymize_structured_job_s3():
 @job(tags={
     "business_operation": "ANONYMISATION_PSEUDONYMISATION"
 })
-def anonymize_pseudonymize_depseudonymize_structured_job():
+def anonymise_pseudonymise_depseudonymise_structured_job():
     df = read_structured_to_df()
     preview_dataframe(df)
     df_pseduo, metrics = anonymize_pseudonymize_structured(df)
@@ -81,46 +81,46 @@ def anonymize_pseudonymize_depseudonymize_structured_job():
 @job(tags={
     "business_operation": "ANONYMISATION_PSEUDONYMISATION"
 })
-def anonymize_pseudonymize_unstructured_job():
-    text = read_txt_to_string()
-    preview_txt(text)
+def anonymise_pseudonymise_unstructured_job():
+    text = read_unstructured_to_string()
+    preview_unstructured(text)
     text_anon, metrics = anonymize_pseudonymize_unstructured(text)
-    preview_txt(text_anon)
-    preview_txt(metrics)
-    write_string_to_txt(text_anon)
+    preview_unstructured(text_anon)
+    preview_unstructured(metrics)
+    write_string_to_unstructured(text_anon)
 
 
 @job(tags={
     "business_operation": "ANONYMISATION_PSEUDONYMISATION",
     "resource_type": "RD_DATA"
 })
-def anonymize_pseudonymize_unstructured_job_s3():
-    text = read_txt_from_s3()
-    preview_txt(text)
+def anonymise_pseudonymise_unstructured_job_s3():
+    text = read_unstructured_from_s3()
+    preview_unstructured(text)
     text_anon, metrics = anonymize_pseudonymize_unstructured(text)
-    preview_txt(text_anon)
-    preview_txt(metrics)
-    write_text_to_s3(text_anon)
+    preview_unstructured(text_anon)
+    preview_unstructured(metrics)
+    write_unstructured_to_s3(text_anon)
 
 
 @job(tags={
     "business_operation": "DEPSEUDONYMISATION"
 })
-def depseudonymize_unstructured_job():
-    text = read_txt_to_string()
-    preview_txt(text)
+def depseudonymise_unstructured_job():
+    text = read_unstructured_to_string()
+    preview_unstructured(text)
     text_anon, metrics = depseudonymize_unstructured(text)
-    preview_txt(text_anon)
-    write_string_to_txt(text_anon)
+    preview_unstructured(text_anon)
+    write_string_to_unstructured(text_anon)
 
 
 @job(tags={
     "business_operation": "DEPSEUDONYMISATION",
     "resource_type": "RD_DATA"
 })
-def depseudonymize_unstructured_job_s3():
-    text = read_txt_from_s3()
-    preview_txt(text)
+def depseudonymise_unstructured_job_s3():
+    text = read_unstructured_from_s3()
+    preview_unstructured(text)
     text_anon, metrics = depseudonymize_unstructured(text)
-    preview_txt(text_anon)
-    write_text_to_s3(text_anon)
+    preview_unstructured(text_anon)
+    write_unstructured_to_s3(text_anon)
diff --git a/src/template_code_location/repository.py b/src/template_code_location/repository.py
index f825e85..d19d6fd 100644
--- a/src/template_code_location/repository.py
+++ b/src/template_code_location/repository.py
@@ -30,10 +30,10 @@ from template_code_location.dataframe_level_anonymisation.jobs import (
 
 # Field-level pseudo-anonymisation jobs
 from template_code_location.field_level_pseudo_anonymisation.jobs import (
-    anonymize_pseudonymize_structured_job_s3,
-    depseudonymize_structured_job_s3,
-    anonymize_pseudonymize_unstructured_job_s3,
-    depseudonymize_unstructured_job_s3,
+    anonymise_pseudonymise_structured_job_s3,
+    depseudonymise_structured_job_s3,
+    anonymise_pseudonymise_unstructured_job_s3,
+    depseudonymise_unstructured_job_s3,
 )
 
 from template_code_location.jobs import data_processing_job
@@ -57,10 +57,10 @@ defs = Definitions(
         t_closeness_job_s3,
         read_write_semistructured_job_s3,
         # Field-level pseudo-anonymisation
-        anonymize_pseudonymize_structured_job_s3,
-        depseudonymize_structured_job_s3,
-        anonymize_pseudonymize_unstructured_job_s3,
-        depseudonymize_unstructured_job_s3,
+        anonymise_pseudonymise_structured_job_s3,
+        depseudonymise_structured_job_s3,
+        anonymise_pseudonymise_unstructured_job_s3,
+        depseudonymise_unstructured_job_s3,
     ],
     sensors=[notify_success, notify_failure, notify_canceled],
     resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},

From 733a38e128ec350dac562099474b6c40a6c85f0a Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Tue, 5 May 2026 17:26:47 +0200
Subject: [PATCH 11/15] fix: correct import path for data_processing_job

---
 src/template_code_location/repository.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/template_code_location/repository.py b/src/template_code_location/repository.py
index d19d6fd..1d0be85 100644
--- a/src/template_code_location/repository.py
+++ b/src/template_code_location/repository.py
@@ -36,7 +36,7 @@ from template_code_location.field_level_pseudo_anonymisation.jobs import (
     depseudonymise_unstructured_job_s3,
 )
 
-from template_code_location.jobs import data_processing_job
+from template_code_location.jobs.jobs import data_processing_job
 
 defs = Definitions(
     jobs=[

From 004bcd5c01007b79469a56ff7659d12e6042856d Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Wed, 6 May 2026 10:58:17 +0200
Subject: [PATCH 12/15] change to import from modules

---
 pyproject.toml                                |    7 +
 .../data_processing/__init__.py               |    0
 .../data_processing/config_models/__init__.py |   18 -
 .../aggregation_configuration.py              |   25 -
 .../columns_select_configuration.py           |   17 -
 ...coordinates_normalization_configuration.py |   22 -
 .../config_models/fill_missing_config.py      |    9 -
 .../config_models/filter_configuration.py     |   52 -
 .../spell_check_configuration.py              |    8 -
 .../data_processing/jobs.py                   |  119 --
 .../data_processing/ops.py                    |  256 ----
 .../dataframe_level_anonymisation/__init__.py |    0
 .../config_models/__init__.py                 |   13 -
 .../config_models/base_config.py              |   33 -
 .../config_models/hierarchies.py              |   18 -
 .../k_anonymity_configuration.py              |   11 -
 .../l_diversity_configuration.py              |    8 -
 .../t_closeness_configuration.py              |    8 -
 .../dataframe_level_anonymisation/jobs.py     |   86 --
 .../dataframe_level_anonymisation/ops.py      |  187 ---
 .../dataframe_level_anonymisation/utils.py    |   19 -
 .../__init__.py                               |    0
 .../config_models/__init__.py                 |   28 -
 .../config_models/languages.py                |   72 --
 .../config_models/pii_entities.py             |   24 -
 .../config_models/structured_config.py        |  110 --
 .../config_models/unstructured_config.py      |  115 --
 .../field_level_pseudo_anonymisation/jobs.py  |  126 --
 .../field_level_pseudo_anonymisation/ops.py   |   77 --
 .../techniques/__init__.py                    |    3 -
 ...onymisation_pseudonymisation_techniques.py |   42 -
 .../depseudonymisation_techniques.py          |    9 -
 .../unstructured_ops.py                       |  428 -------
 .../field_level_pseudo_anonymisation/utils.py |   32 -
 src/template_code_location/repository.py      |    6 +-
 tests/__init__.py                             |    1 -
 tests/data_processing/__init__.py             |    1 -
 tests/data_processing/conftest.py             |   53 -
 tests/data_processing/conftest_utils.py       |    7 -
 tests/data_processing/test_config_models.py   |  202 ---
 tests/data_processing/test_integration.py     |  185 ---
 tests/data_processing/test_jobs.py            |   56 -
 tests/data_processing/test_ops.py             |  700 -----------
 .../dataframe_level_anonymisation/__init__.py |    1 -
 .../config_models/__init__.py                 |    1 -
 .../config_models/test_base_config.py         |   54 -
 .../config_models/test_hierarchies.py         |   48 -
 .../config_models/test_k_anonymity_config.py  |   41 -
 .../config_models/test_l_diversity_config.py  |   44 -
 .../config_models/test_t_closeness_config.py  |   56 -
 .../test_jobs.py                              |   44 -
 .../dataframe_level_anonymisation/test_ops.py |  230 ----
 .../test_utils.py                             |   70 --
 .../__init__.py                               |    1 -
 .../conftest.py                               |  444 -------
 .../test_config_models_coverage.py            |  633 ----------
 .../test_decrypt_structured.py                | 1090 ----------------
 .../test_decrypt_unstructured.py              |  288 -----
 .../test_encrypt_structured.py                | 1119 -----------------
 .../test_encrypt_unstructured.py              |  853 -------------
 .../test_jobs.py                              |   58 -
 61 files changed, 10 insertions(+), 8258 deletions(-)
 delete mode 100644 src/template_code_location/data_processing/__init__.py
 delete mode 100644 src/template_code_location/data_processing/config_models/__init__.py
 delete mode 100644 src/template_code_location/data_processing/config_models/aggregation_configuration.py
 delete mode 100644 src/template_code_location/data_processing/config_models/columns_select_configuration.py
 delete mode 100644 src/template_code_location/data_processing/config_models/coordinates_normalization_configuration.py
 delete mode 100644 src/template_code_location/data_processing/config_models/fill_missing_config.py
 delete mode 100644 src/template_code_location/data_processing/config_models/filter_configuration.py
 delete mode 100644 src/template_code_location/data_processing/config_models/spell_check_configuration.py
 delete mode 100644 src/template_code_location/data_processing/jobs.py
 delete mode 100644 src/template_code_location/data_processing/ops.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/__init__.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/__init__.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/base_config.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/hierarchies.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/k_anonymity_configuration.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/l_diversity_configuration.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/config_models/t_closeness_configuration.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/jobs.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/ops.py
 delete mode 100644 src/template_code_location/dataframe_level_anonymisation/utils.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/__init__.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/__init__.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/languages.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/pii_entities.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/structured_config.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/config_models/unstructured_config.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/jobs.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/ops.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/techniques/__init__.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/techniques/anonymisation_pseudonymisation_techniques.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/techniques/depseudonymisation_techniques.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/unstructured_ops.py
 delete mode 100644 src/template_code_location/field_level_pseudo_anonymisation/utils.py
 delete mode 100644 tests/__init__.py
 delete mode 100644 tests/data_processing/__init__.py
 delete mode 100644 tests/data_processing/conftest.py
 delete mode 100644 tests/data_processing/conftest_utils.py
 delete mode 100644 tests/data_processing/test_config_models.py
 delete mode 100644 tests/data_processing/test_integration.py
 delete mode 100644 tests/data_processing/test_jobs.py
 delete mode 100644 tests/data_processing/test_ops.py
 delete mode 100644 tests/dataframe_level_anonymisation/__init__.py
 delete mode 100644 tests/dataframe_level_anonymisation/config_models/__init__.py
 delete mode 100644 tests/dataframe_level_anonymisation/config_models/test_base_config.py
 delete mode 100644 tests/dataframe_level_anonymisation/config_models/test_hierarchies.py
 delete mode 100644 tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py
 delete mode 100644 tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py
 delete mode 100644 tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py
 delete mode 100644 tests/dataframe_level_anonymisation/test_jobs.py
 delete mode 100644 tests/dataframe_level_anonymisation/test_ops.py
 delete mode 100644 tests/dataframe_level_anonymisation/test_utils.py
 delete mode 100644 tests/field_level_pseudo_anonymisation/__init__.py
 delete mode 100644 tests/field_level_pseudo_anonymisation/conftest.py
 delete mode 100644 tests/field_level_pseudo_anonymisation/test_config_models_coverage.py
 delete mode 100644 tests/field_level_pseudo_anonymisation/test_decrypt_structured.py
 delete mode 100644 tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py
 delete mode 100644 tests/field_level_pseudo_anonymisation/test_encrypt_structured.py
 delete mode 100644 tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py
 delete mode 100644 tests/field_level_pseudo_anonymisation/test_jobs.py

diff --git a/pyproject.toml b/pyproject.toml
index de7ac13..5eb1ab4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,10 @@ dependencies = [
     "cryptography>=42.0.0",
     # Util services — resolved via [tool.uv.sources] (git)
     "util-services",
+    # Code location packages — resolved via [tool.uv.sources] (git)
+    "data-processing",
+    "dataframe-level-anonymisation",
+    "field-level-pseudo-anonymisation",
 ]
 
 [tool.uv]
@@ -49,6 +53,9 @@ exclude-dependencies = ["transformers", "spacy-transformers"]
 [tool.uv.sources]
 torch = { index = "pytorch-cpu" }
 util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "v0.5.0" }
+data-processing = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/data-processing.git", branch = "feature/SIMPL-24642" }
+dataframe-level-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/dataframe-level-anonymisation.git", branch = "feature/SIMPL-24642" }
+field-level-pseudo-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/field-level-pseudo-anonymisation.git", branch = "feature/SIMPL-24642" }
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
diff --git a/src/template_code_location/data_processing/__init__.py b/src/template_code_location/data_processing/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/template_code_location/data_processing/config_models/__init__.py b/src/template_code_location/data_processing/config_models/__init__.py
deleted file mode 100644
index 5833cab..0000000
--- a/src/template_code_location/data_processing/config_models/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""Configuration models for data processing."""
-
-from .columns_select_configuration import ColumnsSelectConfiguration
-from .fill_missing_config import FillMissingConfiguration
-from .spell_check_configuration import SpellCheckConfiguration
-from .coordinates_normalization_configuration import CoordinatesNormalizationConfiguration
-from .aggregation_configuration import AggregationConfiguration
-from .filter_configuration import DatasetFilterConfiguration, FilterCondition
-
-__all__ = [
-    "ColumnsSelectConfiguration",
-    "FillMissingConfiguration",
-    "SpellCheckConfiguration",
-    "CoordinatesNormalizationConfiguration",
-    "AggregationConfiguration",
-    "FilterCondition",
-    "DatasetFilterConfiguration"
-]
diff --git a/src/template_code_location/data_processing/config_models/aggregation_configuration.py b/src/template_code_location/data_processing/config_models/aggregation_configuration.py
deleted file mode 100644
index 553740f..0000000
--- a/src/template_code_location/data_processing/config_models/aggregation_configuration.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from typing import List
-
-from pydantic import Field, field_validator
-
-from .columns_select_configuration import ColumnsSelectConfiguration
-
-
-class AggregationConfiguration(ColumnsSelectConfiguration):
-
-    operation: str = Field(
-        default="sum",
-        description="Aggregation operations: sum, mean, min, max, count"
-    )
-
-    @field_validator("operation")
-    @classmethod
-    def validate_operations(cls, value):
-        allowed = {"sum", "mean", "min", "max", "count"}
-        if value not in allowed:
-            raise ValueError(
-                f"Invalid aggregation operation '{value}'. "
-                f"Allowed values: {allowed}"
-            )
-
-        return value
diff --git a/src/template_code_location/data_processing/config_models/columns_select_configuration.py b/src/template_code_location/data_processing/config_models/columns_select_configuration.py
deleted file mode 100644
index 658450d..0000000
--- a/src/template_code_location/data_processing/config_models/columns_select_configuration.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from typing import List
-from pydantic import Field,field_validator
-from dagster import Config
-
-
-class ColumnsSelectConfiguration(Config):
-    columns: List[str] = Field(
-        default=["Name"], description="List of columns to process."
-    )
-
-    @field_validator("columns")
-    @classmethod
-    def ensure_unique_columns(cls, v: List[str]) -> List[str]:
-
-        unique_values = list(dict.fromkeys(v)) 
-            
-        return unique_values
diff --git a/src/template_code_location/data_processing/config_models/coordinates_normalization_configuration.py b/src/template_code_location/data_processing/config_models/coordinates_normalization_configuration.py
deleted file mode 100644
index 64342e4..0000000
--- a/src/template_code_location/data_processing/config_models/coordinates_normalization_configuration.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from typing import Optional
-
-from pydantic import Field, model_validator
-from dagster import Config
-
-
-class CoordinatesNormalizationConfiguration(Config):
-    latColumn: Optional[str] = Field(
-        default="lat", description="Latitude column name"
-    )
-    lonColumn: Optional[str] = Field(
-        default="lon", description="Longitude column name"
-    )
-
-    @model_validator(mode="before")
-    @classmethod
-    def replace_nulls_with_defaults(cls, values):
-        if values.get("latColumn") is None:
-            values["latColumn"] = "lat"
-        if values.get("lonColumn") is None:
-            values["lonColumn"] = "lon"
-        return values
diff --git a/src/template_code_location/data_processing/config_models/fill_missing_config.py b/src/template_code_location/data_processing/config_models/fill_missing_config.py
deleted file mode 100644
index 4c9e5b2..0000000
--- a/src/template_code_location/data_processing/config_models/fill_missing_config.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from typing import Dict
-from dagster import Config
-from pydantic import Field
-
-
-class FillMissingConfiguration(Config):
-    fill_map: Dict[str, str] = Field(
-        default={"Age": "UNKNOWN_AGE"}, description="Missing values filling map."
-    )
diff --git a/src/template_code_location/data_processing/config_models/filter_configuration.py b/src/template_code_location/data_processing/config_models/filter_configuration.py
deleted file mode 100644
index 86bde37..0000000
--- a/src/template_code_location/data_processing/config_models/filter_configuration.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from enum import Enum
-import operator
-from typing import List, Literal, Callable
-from pydantic import Field, model_validator
-from dagster import Config
-import pandas as pd
-
-class FilterOperator(str, Enum):
-    EQ = "=="
-    NE = "!="
-    LT = "<"
-    LE = "<="
-    GT = ">"
-    GE = ">="
-
-    @property
-    def function(self) -> Callable:
-        mapping = {
-            FilterOperator.EQ: operator.eq,
-            FilterOperator.NE: operator.ne,
-            FilterOperator.LT: operator.lt,
-            FilterOperator.LE: operator.le,
-            FilterOperator.GT: operator.gt,
-            FilterOperator.GE: operator.ge,
-        }
-        return mapping[self]
-
-class FilterCondition(Config):
-    column: str = Field(..., description="Name of the column to filter")
-    type: Literal["string", "numeric"] = Field(..., description="Column type (string or numeric)")
-    value: str = Field(..., description="Value to compare against")
-    op: FilterOperator = Field(default=FilterOperator.EQ, description="Operator to apply (string supports only EQ and NE)")
-
-    @model_validator(mode="after")
-    def check_operator_compatibility(self) -> "FilterCondition":
-        if self.type == "string" and self.op not in [FilterOperator.EQ, FilterOperator.NE]:
-            raise ValueError(
-                f"Invalid operator '{self.op.name}' for type 'string'. "
-                "Only EQ (==) and NE (!=) are allowed."
-            )
-        return self
-
-    def apply(self, df: pd.DataFrame) -> pd.Series:
-        val = float(self.value) if self.type == "numeric" else self.value
-        return self.op.function(df[self.column], val)
-
-class DatasetFilterConfiguration(Config):
-    conditions: List[FilterCondition] = Field(
-        default=[],
-        description="List of filter conditions to apply on the dataset. "
-                    "String columns support only 'EQ' and 'NE', numeric columns also support 'LT', 'LE', 'GT' and 'GE'."
-    )
diff --git a/src/template_code_location/data_processing/config_models/spell_check_configuration.py b/src/template_code_location/data_processing/config_models/spell_check_configuration.py
deleted file mode 100644
index 7a12f87..0000000
--- a/src/template_code_location/data_processing/config_models/spell_check_configuration.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from typing import Literal
-from pydantic import Field
-
-from .columns_select_configuration import ColumnsSelectConfiguration
-
-
-class SpellCheckConfiguration(ColumnsSelectConfiguration):
-    language: Literal["en", "es", "it", "fr", "pt", "de", "nl"] = Field(default="en", description="Language to use in the SpellChecker module.")
diff --git a/src/template_code_location/data_processing/jobs.py b/src/template_code_location/data_processing/jobs.py
deleted file mode 100644
index 674e3a1..0000000
--- a/src/template_code_location/data_processing/jobs.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from dagster import job
-from util_services.util_ops import (
-    preview_dataframe,
-    read_structured_from_s3,
-    write_df_to_s3,
-)
-from .ops import (
-    remove_duplicates,
-    fill_missing_values,
-    standardize_categorical_values,
-    correct_typos,
-    normalize_numeric_min_max,
-    normalize_datetime,
-    normalize_coordinates,
-    add_global_aggregations,
-    filter_dataset
-)
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def remove_duplicates_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = remove_duplicates(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def fill_missing_values_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = fill_missing_values(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def standardize_categorical_values_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = standardize_categorical_values(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def correct_typos_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = correct_typos(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def normalize_numeric_min_max_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = normalize_numeric_min_max(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def normalize_datetime_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = normalize_datetime(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def normalize_coordinates_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = normalize_coordinates(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def add_global_aggregations_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = add_global_aggregations(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-@job(tags={
-    "business_operation": "PROCESSING",
-    "resource_type": "RD_DATA"
-})
-def filter_dataset_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df = filter_dataset(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
diff --git a/src/template_code_location/data_processing/ops.py b/src/template_code_location/data_processing/ops.py
deleted file mode 100644
index e380cb8..0000000
--- a/src/template_code_location/data_processing/ops.py
+++ /dev/null
@@ -1,256 +0,0 @@
-import pandas as pd
-from dagster import Out, op
-from spellchecker import SpellChecker
-
-from template_code_location.data_processing.config_models import (
-    AggregationConfiguration,
-    ColumnsSelectConfiguration,
-    CoordinatesNormalizationConfiguration,
-    FillMissingConfiguration,
-    SpellCheckConfiguration,
-    DatasetFilterConfiguration
-)
-
-
-def _parse_dms_to_decimal(value):
-    """Parse a DMS (degrees-minutes-seconds) string to decimal degrees using PyGeodesy.
-
-    Supported formats include (but are not limited to):
-        - 40°26'46"N / 40°26′46″N
-        - 40 26 46 N
-        - 40:26:46N
-        - 40d26m46sN
-        - -40.446  (already decimal – returned as-is)
-
-    Returns None if parsing fails.
-    """
-    from pygeodesy.dms import parseDMS
-
-    if pd.isna(value):
-        return None
-
-    text = str(value).strip()
-    if not text:
-        return None
-
-    try:
-        return float(parseDMS(text))
-    except (ValueError, TypeError):
-        try:
-            return float(text)
-        except (ValueError, TypeError):
-            return None
-
-
-@op(out={"data": Out()})
-def remove_duplicates(context, df: pd.DataFrame):
-    """Remove duplicate rows from the input DataFrame."""
-    logger = context.log
-
-    before = df.shape[0]
-
-    df = df.drop_duplicates()
-
-    after = df.shape[0]
-
-    logger.info(f"Removed {before - after} duplicate rows")
-
-    return df
-
-@op(out={"data": Out()})
-def fill_missing_values(context, config: FillMissingConfiguration, df: pd.DataFrame):
-    """Fill missing values in the DataFrame according to the configured column-to-value mapping."""
-    logger = context.log
-
-    logger.info(f"Filling missing values: {config.fill_map}")
-
-    return df.fillna(config.fill_map)
-
-@op(out={"data": Out()})
-def standardize_categorical_values(context, config: ColumnsSelectConfiguration, df: pd.DataFrame):
-    """Standardize categorical values in selected columns by trimming whitespace and converting text to lowercase."""
-    logger = context.log
-
-    for col in config.columns:
-        if col not in df.columns:
-            logger.warning(f"Column '{col}' not found in DataFrame, skipping.")
-            continue
-
-        original = df[col]
-
-        standardized = (
-            df[col]
-            .fillna("")
-            .astype(str)
-            .str.strip()
-            .str.lower()
-        )
-
-        changed_count = (original != standardized).sum()
-        df[col] = standardized
-
-        logger.info(f"Standardized '{col}' column – {changed_count} values modified")
-
-    return df
-
-@op(out={"data": Out()})
-def correct_typos(context, config: SpellCheckConfiguration, df: pd.DataFrame):
-    """Correct spelling mistakes in the specified text columns."""
-    logger = context.log
-
-    for column in config.columns:
-        if column not in df.columns:
-            logger.warning(f"Column '{column}' not found in DataFrame, skipping.")
-            continue
-
-        spell = SpellChecker(language=config.language)
-
-        original = df[column].astype(str)
-        corrected = original.apply(lambda x, spell_checker=spell: spell_checker.correction(x) if x else x)
-
-        changed_count = (original != corrected).sum()
-        logger.info(f"Corrected typos in '{column}' – {changed_count} values modified")
-
-        df[column] = corrected
-
-    return df
-
-@op(out={"data": Out()})
-def normalize_datetime(context, config: ColumnsSelectConfiguration, df: pd.DataFrame):
-    logger = context.log
-
-    for col in config.columns:
-        if col not in df.columns:
-            logger.warning(f"Column '{col}' not found, skipping normalization.")
-            continue
-
-        normalized = pd.to_datetime(df[col], utc=True, format="mixed", dayfirst=True, errors="coerce")
-
-        if normalized.notna().sum() == 0:
-            logger.warning(
-                f"Column '{col}' has no normalizable datetime values, skipping."
-            )
-            continue
-
-        iso_col = f"{col}_iso"
-
-        formatted = normalized.dt.strftime("%Y-%m-%dT%H:%M:%SZ").fillna("")
-        non_empty = formatted[formatted != ""]
-        if len(non_empty) > 0 and non_empty.str.startswith("1970-01-01").all():
-            logger.warning(
-                f"Column '{col}' all normalized values are '1970-01-01', likely bad input — skipping."
-            )
-            continue
-
-        df[iso_col] = formatted
-
-        logger.info(f"Normalized datetime column '{col}' into '{iso_col}'")
-
-    return df
-
-@op(out={"data": Out()})
-def normalize_numeric_min_max(context, config: ColumnsSelectConfiguration, df: pd.DataFrame):
-    logger = context.log
-
-    for col in config.columns:
-        if col not in df.columns:
-            logger.warning(f"Column '{col}' not found, skipping normalization.")
-            continue
-
-        min_val = df[col].min()
-        max_val = df[col].max()
-
-        if min_val == max_val:
-            logger.warning(f"Column '{col}' has constant values, skipping normalization.")
-            continue
-
-        df[col + "_norm"] = (df[col] - min_val) / (max_val - min_val)
-        logger.info(f"Normalized numeric column '{col}'")
-
-    return df
-
-@op(out={"data": Out()})
-def normalize_coordinates(context, config: CoordinatesNormalizationConfiguration, df: pd.DataFrame):
-    logger = context.log
-
-    lat = config.latColumn
-    lon = config.lonColumn
-
-    for col in [lat, lon]:
-        if pd.api.types.is_numeric_dtype(df[col]):
-            logger.info(f"Column '{col}' is numeric — coercing directly")
-            df[col] = pd.to_numeric(df[col], errors="coerce")
-        else:
-            logger.info(f"Column '{col}' is non-numeric — parsing as DMS with PyGeodesy")
-            df[col] = df[col].apply(_parse_dms_to_decimal)
-
-    invalid_lat = df[lat].isnull().sum()
-    invalid_lon = df[lon].isnull().sum()
-    logger.info(f"Found {invalid_lat} invalid latitudes and {invalid_lon} invalid longitudes")
-
-    df[lat] = df[lat].round(4)
-    df[lon] = df[lon].round(4)
-
-    before_filter_rows = len(df)
-    df = df[(df[lat].between(-90, 90)) & (df[lon].between(-180, 180))]
-    after_filter_rows = len(df)
-    logger.info(f"Filtered coordinates out of range: removed {before_filter_rows - after_filter_rows} rows")
-
-    logger.info(f"Coordinate normalization completed: resulting dataframe has {after_filter_rows} rows")
-
-    return df
-
-@op(out={"data": Out()})
-def add_global_aggregations(context, config: AggregationConfiguration, df: pd.DataFrame):
-    logger = context.log
-
-    group_by_cols = []
-    
-    for col in config.columns:
-        if col not in df.columns:
-            logger.warning(f"Column '{col}' not found, skipping aggregation.")
-            continue
-        group_by_cols.append(col)
-    
-    if config.operation not in {"sum", "mean", "min", "max", "count"}:
-        logger.warning(f"Unsupported aggregation '{config.operation}'")
-    
-    numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
-    cols_to_keep = list(set(numeric_cols + group_by_cols))
-    df = df[[c for c in cols_to_keep if c in df.columns]]
-    df = df.groupby(group_by_cols).agg(config.operation).reset_index()
-    return df
-
-@op(out={"data": Out()})
-def filter_dataset(context, config: DatasetFilterConfiguration, df: pd.DataFrame):
-    logger = context.log
-    total_rows_before = len(df)
-    
-    logger.info(f"Starting dataset filtering: initial dataframe has {total_rows_before} rows")
-
-    combined_mask = pd.Series([True] * total_rows_before, index=df.index)
-
-    for condition in config.conditions:
-        if condition.column not in df.columns:
-            logger.warning(f"Column '{condition.column}' not found, skipping filtering.")
-            continue
-        if df[condition.column].isna().all():
-            logger.warning(f"Column '{condition.column}' is empty (all NaN), skipping filtering.")
-            continue
-        try:
-            current_mask = condition.apply(df)
-            combined_mask &= current_mask
-            
-            logger.info(f"Applied filter: {condition.column} {condition.op.value} '{condition.value}'")
-        except Exception as e:
-            logger.error(f"Error applying filter on column '{condition.column}': {e}")
-
-    filtered_df = df[combined_mask]
-    total_rows_after = len(filtered_df)
-    
-    logger.info(
-        f"Filtering completed: {total_rows_after} rows remain "
-        f"(removed {total_rows_before - total_rows_after} rows in total)"
-    )
-
-    return filtered_df
diff --git a/src/template_code_location/dataframe_level_anonymisation/__init__.py b/src/template_code_location/dataframe_level_anonymisation/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/__init__.py b/src/template_code_location/dataframe_level_anonymisation/config_models/__init__.py
deleted file mode 100644
index 0f490b5..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/config_models/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""Configuration models for dataframe-level anonymization."""
-
-from .k_anonymity_configuration import KAnonymityConfiguration
-from .l_diversity_configuration import LDiversityConfiguration
-from .t_closeness_configuration import TClosenessConfiguration
-from .base_config import BaseConfiguration
-
-__all__ = [
-    "BaseConfiguration",
-    "KAnonymityConfiguration",
-    "LDiversityConfiguration",
-    "TClosenessConfiguration",
-]
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/base_config.py b/src/template_code_location/dataframe_level_anonymisation/config_models/base_config.py
deleted file mode 100644
index 4abf451..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/config_models/base_config.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from typing import Dict, List
-from dagster import Config
-from pydantic import Field, field_validator, model_validator
-
-
-class BaseConfiguration(Config):
-    ident: List[str] = Field(default=["Name"], description="List of identifier column names.")
-    quasi_identifiers: List[str] = Field(default=["Age"], description="List of quasi-identifier column names.")
-    supp_level: float = Field(default=50.0, ge=0.0, le=100.0, description="Max suppression allowed (0–100).")
-    generalisation_hierarchies: Dict[str, str] = Field(
-        default={"Age": "simpl_age"}, description="Hierarchies used to generalize quasi-identifiers."
-    )
-
-    @field_validator("quasi_identifiers")
-    def validate_quasi_identifiers(cls, value):
-        if not value:
-            raise ValueError("At least one quasi-identifier must be provided.")
-        return value
-
-    @field_validator("ident")
-    def validate_ident(cls, value):
-        if not value:
-            raise ValueError("At least one identifier must be provided.")
-        return value
-
-    @model_validator(mode="after")
-    def check_no_overlap(self):
-        ident = set(self.ident)
-        quasi = set(self.quasi_identifiers)
-        overlap = ident & quasi
-        if overlap:
-            raise ValueError(f"Fields cannot be both identifiers and quasi-identifiers: {overlap}")
-        return self
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/hierarchies.py b/src/template_code_location/dataframe_level_anonymisation/config_models/hierarchies.py
deleted file mode 100644
index 65105a0..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/config_models/hierarchies.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from anjana.anonymity.utils import utils
-
-simpl_age = {
-    0: [age for age in range(0, 100)],
-    1: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 5),
-    2: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 10),
-    3: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 20),
-    4: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 100),
-}
-simpl_age2 = {
-    0: [age for age in range(0, 100)],
-    1: utils.generate_intervals([age for age in range(0, 100)], 0, 100, 5),
-}
-simpl_gender = {0: ["M", "F", "O"], 1: ["*", "*", "*"]}
-
-
-def get_all_hierarchies():
-    return {name: obj for name, obj in globals().items() if isinstance(obj, dict)}
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/k_anonymity_configuration.py b/src/template_code_location/dataframe_level_anonymisation/config_models/k_anonymity_configuration.py
deleted file mode 100644
index 0ddd88f..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/config_models/k_anonymity_configuration.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from typing import List
-from pydantic import Field
-
-from .base_config import BaseConfiguration
-
-
-class KAnonymityConfiguration(BaseConfiguration):
-    k: int = Field(default=3, ge=2, description="Desired level of k-anonymity (must be >= 2).")
-    sensitive_attributes: List[str] = Field(
-        default=["Disease"], description="List of sensitive attribute column names."
-    )
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/l_diversity_configuration.py b/src/template_code_location/dataframe_level_anonymisation/config_models/l_diversity_configuration.py
deleted file mode 100644
index c764f1d..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/config_models/l_diversity_configuration.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from pydantic import Field
-from .base_config import BaseConfiguration
-
-
-class LDiversityConfiguration(BaseConfiguration):
-    k: int = Field(default=2, ge=2, description="Desired level of k-anonymity (must be >= 2).")
-    l: int = Field(default=3, ge=1, description="L-diversity level (must be >= 1)")
-    sensitive_attribute: str = Field(default="Disease", description="Sensitive attribute name.")
diff --git a/src/template_code_location/dataframe_level_anonymisation/config_models/t_closeness_configuration.py b/src/template_code_location/dataframe_level_anonymisation/config_models/t_closeness_configuration.py
deleted file mode 100644
index 4461539..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/config_models/t_closeness_configuration.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from pydantic import Field
-from .base_config import BaseConfiguration
-
-
-class TClosenessConfiguration(BaseConfiguration):
-    k: int = Field(default=2, ge=2, description="Desired level of k-anonymity (must be >= 2).")
-    t: float = Field(default=0.5, ge=0.0, le=1.0, description="Maximum t-distance threshold.")
-    sensitive_attribute: str = Field(default="Disease", description="Sensitive attribute name.")
diff --git a/src/template_code_location/dataframe_level_anonymisation/jobs.py b/src/template_code_location/dataframe_level_anonymisation/jobs.py
deleted file mode 100644
index 35c76f7..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/jobs.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from dagster import job
-from util_services.util_ops import (
-    preview_dataframe,
-    read_structured_to_df,
-    write_df_to_local,
-    read_structured_from_s3,
-    write_df_to_s3,
-    write_semistructured_to_s3,
-    read_semistructured_from_s3
-)
-
-from .ops import apply_k_anonymity, apply_l_diversity, apply_t_closeness
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION"
-})
-def k_anonymity_job():
-    org_df = read_structured_to_df()
-    anon_df, _ = apply_k_anonymity(org_df)
-    preview_dataframe(org_df)
-    write_df_to_local(anon_df)
-    preview_dataframe(anon_df)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION"
-})
-def l_diversity_job():
-    org_df = read_structured_to_df()
-    anon_df, _ = apply_l_diversity(org_df)
-    preview_dataframe(org_df)
-    write_df_to_local(anon_df)
-    preview_dataframe(anon_df)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION"
-})
-def t_closeness_job():
-    org_df = read_structured_to_df()
-    anon_df, _ = apply_t_closeness(org_df)
-    preview_dataframe(org_df)
-    write_df_to_local(anon_df)
-    preview_dataframe(anon_df)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION",
-    "resource_type": "RD_DATA"
-})
-def k_anonymity_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df, _ = apply_k_anonymity(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION",
-    "resource_type": "RD_DATA"
-})
-def l_diversity_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df, _ = apply_l_diversity(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION",
-    "resource_type": "RD_DATA"
-})
-def t_closeness_job_s3():
-    org_df = read_structured_from_s3()
-    anon_df, _ = apply_t_closeness(org_df)
-    preview_dataframe(org_df)
-    write_df_to_s3(anon_df)
-    preview_dataframe(anon_df)
-
-@job()
-def read_write_semistructured_job_s3():
-    semistruct_data = read_semistructured_from_s3()
-    write_semistructured_to_s3(semistruct_data)
diff --git a/src/template_code_location/dataframe_level_anonymisation/ops.py b/src/template_code_location/dataframe_level_anonymisation/ops.py
deleted file mode 100644
index 93682bf..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/ops.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import json
-from textwrap import dedent
-
-import pandas as pd
-from anjana.anonymity import k_anonymity, l_diversity, t_closeness
-from dagster import (
-    DagsterInvalidInvocationError,
-    MarkdownMetadataValue,
-    Out,
-    Output,
-    get_dagster_logger,
-    op,
-)
-from pycanon import anonymity
-
-from template_code_location.dataframe_level_anonymisation.config_models import (
-    KAnonymityConfiguration,
-    LDiversityConfiguration,
-    TClosenessConfiguration,
-)
-from template_code_location.dataframe_level_anonymisation.config_models.hierarchies import get_all_hierarchies
-
-
-def _calc_dataframe_metrics(df_anon, df_org, quasi_identifiers, sensitive_atttributes):
-    # --- Metrics ---
-    # Anonymization metrics
-    k_anon = anonymity.k_anonymity(df_anon, quasi_identifiers)
-    l_div = anonymity.l_diversity(df_anon, quasi_identifiers, sensitive_atttributes, True)
-    t_clos = anonymity.t_closeness(df_anon, quasi_identifiers, sensitive_atttributes, True)
-
-    # Data Utilization metrics
-    supression_rate = 1 - len(df_anon) / len(df_org)
-    grouped = df_anon.groupby(quasi_identifiers)
-    mean_equivalence_class_size = len(df_anon) / len(grouped) if len(grouped) else 0
-
-    # flake8: noqa
-    anon_report = dedent(
-        f"""
-        ### Anonymization & Data Utilization Metrics
-
-        | Metric | Value | Description |
-        |--------|-------|-------------|
-        | **k-anonymity** | `k = {k_anon}` | Minimum number of records sharing the same quasi-identifier values. |
-        | **l-diversity** | `l = {l_div}` | Diversity of sensitive attributes within each equivalence class. |
-        | **t-closeness** | `t = {round(t_clos, 2)}` | Distance between sensitive attribute distribution in a group and the overall dataset. |
-        | **Suppression rate** | `{round(supression_rate, 2)}` | Fraction of records or attributes suppressed to meet privacy requirements. |
-        | **Mean equivalence class size** | `{round(mean_equivalence_class_size, 2)}` | Average size of equivalence classes for quasi-identifiers, indicates data grouping. |
-    """
-    )
-    # flake8: enable
-    metrics = {
-        "k_anon": k_anon,
-        "l_div": l_div,
-        "t_clos": t_clos,
-        "supp_rate": supression_rate,
-        "mean_equivalence_class": mean_equivalence_class_size,
-    }
-    return anon_report, metrics
-
-
-def _validate_and_get_hierarchies(config, df: pd.DataFrame):
-    hierarchies = get_all_hierarchies()
-
-    # Dataset smaller than k
-    if len(df) < config.k:
-        raise DagsterInvalidInvocationError(
-            f"Cannot apply k-anonymity: dataset has {len(df)} records, but k={config.k}"
-        )
-
-    # Missing or incomplete generalisation hierarchies
-    for qi in config.quasi_identifiers:
-        if qi not in config.generalisation_hierarchies or not config.generalisation_hierarchies[qi]:
-            raise DagsterInvalidInvocationError(
-                f"Generalisation hierarchy for quasi-identifier '{qi}' is missing or incomplete"
-            )
-        if config.generalisation_hierarchies[qi] not in hierarchies:
-            raise DagsterInvalidInvocationError(
-                f"Generalisation hierarchy '{config.generalisation_hierarchies[qi]}' is missing in the code basis"
-            )
-
-    hier = {
-        qi: hierarchies[config.generalisation_hierarchies[qi]] for qi in config.quasi_identifiers
-    }
-    return hier
-
-
-@op(out={"data": Out(), "metrics": Out()})
-def apply_k_anonymity(context, config: KAnonymityConfiguration, df: pd.DataFrame):
-    
-    hier = _validate_and_get_hierarchies(config, df)
-
-    data_anon = k_anonymity(
-        df, config.ident, config.quasi_identifiers, config.k, config.supp_level, hier
-    )
-    if "index" in data_anon.columns and "index" not in df.columns:
-        data_anon.drop(columns="index", inplace=True)
-    anon_report, metrics = _calc_dataframe_metrics(
-        data_anon, df, config.quasi_identifiers, config.sensitive_attributes
-    )
-    yield Output(
-        value=data_anon,
-        metadata={
-            "metric_report": MarkdownMetadataValue(anon_report),
-            "metric_json": json.dumps(metrics),
-        },
-        output_name="data",
-    )
-    yield Output(value=metrics, output_name="metrics")
-
-
-@op(out={"data": Out(), "metrics": Out()})
-def apply_l_diversity(context, config: LDiversityConfiguration, df: pd.DataFrame):
-
-    hier = _validate_and_get_hierarchies(config, df)
-
-    data_anon = l_diversity(
-        df,
-        config.ident,
-        config.quasi_identifiers,
-        config.sensitive_attribute,
-        config.k,
-        config.l,
-        config.supp_level,
-        hier,
-    )
-    if data_anon.empty:
-        raise DagsterInvalidInvocationError(
-            "Could not tranform the data to l-diversity, empty dataset returned!"
-        )
-    anon_report, metrics = _calc_dataframe_metrics(
-        data_anon, df, config.quasi_identifiers, [config.sensitive_attribute]
-    )
-    yield Output(
-        value=data_anon,
-        metadata={
-            "metric_report": MarkdownMetadataValue(anon_report),
-            "metric_json": json.dumps(metrics),
-        },
-        output_name="data",
-    )
-    yield Output(value=metrics, output_name="metrics")
-
-
-@op(out={"data": Out(), "metrics": Out()})
-def apply_t_closeness(context, config: TClosenessConfiguration, df: pd.DataFrame):
-    
-    hier = _validate_and_get_hierarchies(config, df)
-
-    try:
-        data_anon = t_closeness(
-            df,
-            config.ident,
-            config.quasi_identifiers,
-            config.sensitive_attribute,
-            config.k,
-            config.t,
-            config.supp_level,
-            hier,
-        )
-    except ValueError as e:
-        if "Cannot be quasi-identifiers" in str(e):
-            raise DagsterInvalidInvocationError(
-                f"T-closeness failed: k-anonymity parameter = {config.k} is too small "
-                f"for existing hierarchies of {config.quasi_identifiers} in inner k-anonymity call."
-            )
-        else:
-            # Re-raise other ValueError types with context
-            raise DagsterInvalidInvocationError(f"T-closeness failed with error: {str(e)}")
-
-    if data_anon.empty:
-        raise DagsterInvalidInvocationError(
-            f"Could not transform the data to t-closeness, empty dataset returned! "
-            f"This may indicate that the t-closeness constraint (t={config.t}) is too strict for the given data."
-        )
-
-    anon_report, metrics = _calc_dataframe_metrics(
-        data_anon, df, config.quasi_identifiers, [config.sensitive_attribute]
-    )
-    yield Output(
-        value=data_anon,
-        metadata={
-            "metric_report": MarkdownMetadataValue(anon_report),
-            "metric_json": json.dumps(metrics),
-        },
-        output_name="data",
-    )
-    yield Output(value=metrics, output_name="metrics")
diff --git a/src/template_code_location/dataframe_level_anonymisation/utils.py b/src/template_code_location/dataframe_level_anonymisation/utils.py
deleted file mode 100644
index c233c4e..0000000
--- a/src/template_code_location/dataframe_level_anonymisation/utils.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import numpy as np
-
-
-def parse_value_list(values):
-    return [int(v) if isinstance(v, str) and v.isdigit() else v for v in values]
-
-
-# Hierarchy normalization for Anjana
-def normalize_hierarchy_levels(hierarchy_dict):
-    normalized = {}
-    for column, levels in hierarchy_dict.items():
-        normalized[column] = {}
-        for level_str, mapping_list in levels.items():
-            level = int(level_str)
-            if level == 0:
-                normalized[column][level] = np.array(parse_value_list(mapping_list))
-            else:
-                normalized[column][level] = mapping_list
-    return normalized
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/__init__.py b/src/template_code_location/field_level_pseudo_anonymisation/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/__init__.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/__init__.py
deleted file mode 100644
index 60944be..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/config_models/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from .structured_config import (  # noqa: F401
-    HashConfig,
-    EncryptConfig,
-    RedactConfig,
-    ReplaceConfig,
-    PseudoTechniqueConfig,
-    AnonymisePseudonymizeStructuredConfig,
-    DecryptConfig,
-    DepseudoTechniqueConfig,
-    DepseudonymizeStructuredConfig,
-)
-
-from .unstructured_config import (  # noqa: F401, F811
-    HashConfig,
-    EncryptConfig,
-    RedactConfig,
-    ReplaceConfig,
-    RetainConfig,
-    PseudoTechniqueConfig,
-    AnonymisePseudonymizeUnstructuredConfig,
-    DecryptConfig,
-    DepseudoTechniqueConfig,
-    DepseudonymizeUnstructuredConfig,
-)
-
-from .languages import SupportedLanguages, LanguageEnum  # noqa: F401
-
-from .pii_entities import PIIEntityEnum, PII_MAPPING  # noqa: F401
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/languages.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/languages.py
deleted file mode 100644
index e3ba89e..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/config_models/languages.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from enum import Enum
-from typing import ClassVar
-
-
-class SupportedLanguages:
-    LANGUAGES: ClassVar[dict[str, str]] = {
-        "hr": "hr_HR",  # Croatian
-        "da": "da_DK",  # Danish
-        "nl": "nl_NL",  # Dutch
-        "en": "en_US",  # English
-        "fi": "fi_FI",  # Finnish
-        "fr": "fr_FR",  # French
-        "de": "de_DE",  # German
-        "el": "el_GR",  # Greek
-        "it": "it_IT",  # Italian
-        "lt": "lt_LT",  # Lithuanian
-        "pl": "pl_PL",  # Polish
-        "pt": "pt_PT",  # Portuguese
-        "ro": "ro_RO",  # Romanian
-        "sl": "sl_SI",  # Slovenian
-        "es": "es_ES",  # Spanish
-        "sv": "sv_SE",  # Swedish
-    }
-    LANGUAGE_MODELS = {
-        "en": "en_core_web_sm",
-        "it": "it_core_news_sm",
-        "de": "de_core_news_sm",
-        "fr": "fr_core_news_sm",
-        "es": "es_core_news_sm",
-        "nl": "nl_core_news_sm",
-        "da": "da_core_news_sm",
-        "sv": "sv_core_news_sm",
-        "fi": "fi_core_news_sm",
-        "pl": "pl_core_news_sm",
-        "el": "el_core_news_sm",
-        "hr": "hr_core_news_sm",
-        "lt": "lt_core_news_sm",
-        "pt": "pt_core_news_sm",
-        "ro": "ro_core_news_sm",
-        "sl": "sl_core_news_sm",
-    }
-
-    @classmethod
-    def codes(cls) -> list[str]:
-        return list(cls.LANGUAGES.keys())
-
-    @classmethod
-    def get_locale(cls, code: str) -> str:
-        return cls.LANGUAGES[code]
-
-    @classmethod
-    def get_language_model(cls, code: str) -> str:
-        return cls.LANGUAGE_MODELS[code]
-
-
-class LanguageEnum(str, Enum):
-    hr = "hr"
-    da = "da"
-    nl = "nl"
-    en = "en"
-    fi = "fi"
-    fr = "fr"
-    de = "de"
-    el = "el"
-    it = "it"
-    lt = "lt"
-    pl = "pl"
-    pt = "pt"
-    ro = "ro"
-    sl = "sl"
-    es = "es"
-    sv = "sv"
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/pii_entities.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/pii_entities.py
deleted file mode 100644
index e730b6d..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/config_models/pii_entities.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from enum import Enum
-
-
-class PIIEntityEnum(str, Enum):
-    PERSON = "Person"
-    EMAIL = "Email"
-    CREDIT_CARD = "Credit card"
-    DATE_OF_BIRTH = "Date of birth"
-    URL = "URLs"
-    PHONE_NUMBERS = "Phone numbers"
-    CREDENTIALS = "Credentials"
-    X_SOCIAL = "X (formally known as Twitter) username"
-
-
-PII_MAPPING: dict[PIIEntityEnum, str] = {
-    PIIEntityEnum.PERSON: "NameFilth",
-    PIIEntityEnum.EMAIL: "EmailFilth",
-    PIIEntityEnum.CREDIT_CARD: "CreditCardFilth",
-    PIIEntityEnum.DATE_OF_BIRTH: "DateOfBirthFilth",
-    PIIEntityEnum.URL: "UrlFilth",
-    PIIEntityEnum.PHONE_NUMBERS: "PhoneFilth",
-    PIIEntityEnum.CREDENTIALS: "CredentialFilth",
-    PIIEntityEnum.X_SOCIAL: "TwitterFilth",
-}
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/structured_config.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/structured_config.py
deleted file mode 100644
index af8abf6..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/config_models/structured_config.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from typing import List, Literal, Optional, Union
-
-from dagster import Config
-from pydantic import Field as PydanticField, model_validator, field_validator
-  
-
-class HashConfig(Config):
-    type: Literal["hash"] = "hash"
-    columns: List[str] = PydanticField(default=["example_column"], description="Columns to hash")
-    algorithm: str = PydanticField(default="sha256", description="Hashing algorithm")
-
-class EncryptConfig(Config):
-    type: Literal["encrypt"] = "encrypt"
-    columns: List[str] = PydanticField(default=["example_column"], description="Columns to encrypt")
-    key_name: str = PydanticField(default="my_key", description="Key identifier used for encryption")
-
-class RedactConfig(Config):
-    type: Literal["redact"] = "redact"
-    columns: List[str] = PydanticField(default=["example_column"], description="Columns to redact")
-
-class ReplaceConfig(Config):
-    type: Literal["replace"] = "replace"
-    columns: List[str] = PydanticField(default=["example_column"], description="Columns to replace")
-    new_value: str = PydanticField(default="REPLACED", description="Replacement value")
-
-class PseudoTechniqueConfig(Config):
-    technique: Union[HashConfig, EncryptConfig, RedactConfig, ReplaceConfig] = PydanticField(
-        default={"hash": HashConfig().model_dump(exclude={"type"})},
-        discriminator="type"
-    )
-
-
-class AnonymisePseudonymizeStructuredConfig(Config):
-    used_function: List[PseudoTechniqueConfig] = PydanticField(
-        default=[{"technique": {"hash": HashConfig().model_dump(exclude={"type"})}}],
-        description=("List of functions to be used on column"),
-    )
-
-    @model_validator(mode="after")
-    def ensure_unique_columns(self):
-        column_to_techniques = self._collect_column_to_techniques()
-        duplicates = {
-            col: techs for col, techs in column_to_techniques.items() if len(techs) > 1
-        }
-
-        if duplicates:
-            formatted = "; ".join(
-                f"{col} -> {', '.join(techs)}" for col, techs in duplicates.items()
-            )
-            raise ValueError(f"Duplicate column(s) across techniques not allowed:\n{formatted}")
-
-        return self
-
-    def _collect_column_to_techniques(self):
-        """Extract column-to-techniques mapping from used_function list."""
-        column_to_techniques = {}
-        for f in self.used_function:
-            technique_type, cols = self._extract_technique_and_columns(f)
-            for col in cols:
-                column_to_techniques.setdefault(col, []).append(technique_type)
-        return column_to_techniques
-
-    def _extract_technique_and_columns(self, item):
-        """Extract technique type and columns list from a PseudoTechniqueConfig item (dict or model instance)."""
-        if isinstance(item, dict):
-            tech = item.get("technique") or {}
-            if isinstance(tech, dict):
-                if "type" in tech:
-                    return tech.get("type"), tech.get("columns") or []
-                elif len(tech) == 1:
-                    # variant-key mapping: {'hash': {...}}
-                    technique_type, inner = next(iter(tech.items()))
-                    return technique_type, inner.get("columns") or []
-            return None, []
-        else:
-            # item is a PseudoTechniqueConfig instance
-            technique_type = item.technique.type
-            cols = getattr(item.technique, "columns", [])
-            return technique_type, cols
-    
-class DecryptConfig(Config):
-    type: Literal["decrypt"] = "decrypt"
-    columns: List[str] = PydanticField(default=["example_column"], description="Columns to decrypt")
-    key_name: str = PydanticField(default="my_key", description="Key identifier used for decryption")
-    
-class DepseudoTechniqueConfig(Config):
-    technique: DecryptConfig = PydanticField(default={"type": "decrypt", **DecryptConfig().model_dump(exclude={"type"})})
-
-
-class DepseudonymizeStructuredConfig(Config):
-    used_function: List[DepseudoTechniqueConfig] = PydanticField(
-        default=[{"technique": {"type": "decrypt", **DecryptConfig().model_dump(exclude={"type"})}}],
-        description=("Decryption functions to be used on column"),
-    )
-
-    @field_validator("used_function", mode="before")
-    def _normalize_depseudo_used_function(cls, v):
-        normalized = []
-        for item in v:
-            if isinstance(item, dict):
-                normalized.append(DepseudoTechniqueConfig.model_validate(item))
-            else:
-                normalized.append(item)
-        return normalized
-
-    @model_validator(mode="after")
-    def ensure_unique_columns(self):
-        # For depseudonymize, we don't have per-column uniqueness constraints,
-        # but keep a no-op validator to preserve API parity.
-        return self
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/config_models/unstructured_config.py b/src/template_code_location/field_level_pseudo_anonymisation/config_models/unstructured_config.py
deleted file mode 100644
index abea0b0..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/config_models/unstructured_config.py
+++ /dev/null
@@ -1,115 +0,0 @@
-from typing import List, Literal, Optional, Union
-
-from dagster import Config
-from pydantic import Field as PydanticField, model_validator, field_validator
-from .languages import LanguageEnum
-from .pii_entities import PIIEntityEnum
-
-
-class HashConfig(Config):
-    type: Literal["hash"] = "hash"
-    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to hash")
-    algorithm: str = PydanticField(default="sha256", description="Hashing algorithm")
-
-class EncryptConfig(Config):
-    type: Literal["encrypt"] = "encrypt"
-    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to encrypt")
-    key_name: str = PydanticField(default="my_key", description="Key identifier used for encryption")
-
-
-class RedactConfig(Config):
-    type: Literal["redact"] = "redact"
-    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to redact")
-
-class ReplaceConfig(Config):
-    type: Literal["replace"] = "replace"
-    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to replace")
-    new_value: str = PydanticField(default="REPLACED", description="Replacement value")
-
-class RetainConfig(Config):
-    type: Literal["retain"] = "retain"
-    pii: List[PIIEntityEnum] = PydanticField(default=[PIIEntityEnum.EMAIL.name], description="PII entities to retain")
-
-class PseudoTechniqueConfig(Config):
-    technique: Union[HashConfig, EncryptConfig, RedactConfig, ReplaceConfig, RetainConfig] = PydanticField(
-        default={"hash": HashConfig().model_dump(exclude={"type"})},
-        discriminator="type"
-    )
-
-class AnonymisePseudonymizeUnstructuredConfig(Config):
-    language: LanguageEnum = PydanticField(
-        default=LanguageEnum.en,
-        description="Language code (must be one of: hr, da, nl, en, fi, fr, de, el, it, lt, pl, pt, ro, sl, es, sv)"
-
-    )
-    used_function: List[PseudoTechniqueConfig] = PydanticField(
-        default=[{"technique": {"hash": HashConfig().model_dump(exclude={"type"})}}],
-        description=("List of functions to be used on PIIs"),
-    )
-
-    @field_validator("used_function", mode="before")
-    def _normalize_used_function(cls, v):
-        normalized = []
-        for item in v:
-            if isinstance(item, dict):
-                normalized.append(PseudoTechniqueConfig.model_validate(item))
-            else:
-                normalized.append(item)
-        return normalized
-
-    @model_validator(mode="after")
-    def ensure_unique_pii(self):
-        pii_to_techniques = self._collect_pii_to_techniques()
-        duplicates = {
-            pii: techs for pii, techs in pii_to_techniques.items() if len(techs) > 1
-        }
-
-        if duplicates:
-            formatted = "; ".join(
-                f"{pii} -> {', '.join(techs)}" for pii, techs in duplicates.items()
-            )
-            raise ValueError(f"Duplicate PII(s) across techniques not allowed:\n{formatted}")
-
-        return self
-
-    def _collect_pii_to_techniques(self):
-        """Extract PII-to-techniques mapping from used_function list."""
-        pii_to_techniques = {}
-        for f in self.used_function:
-            technique_type, piis = self._extract_technique_and_pii(f)
-            for pii in piis:
-                pii_to_techniques.setdefault(pii, []).append(technique_type)
-        return pii_to_techniques
-
-    def _extract_technique_and_pii(self, item):
-        """Extract technique type and PII list from a PseudoTechniqueConfig item (dict or model instance)."""
-        if isinstance(item, dict):
-            tech = item.get("technique") or {}
-            if isinstance(tech, dict):
-                if "type" in tech:
-                    return tech.get("type"), tech.get("pii") or tech.get("columns") or []
-                elif len(tech) == 1:
-                    # variant-key mapping: {'hash': {...}}
-                    technique_type, inner = next(iter(tech.items()))
-                    return technique_type, inner.get("pii") or inner.get("columns") or []
-            return None, []
-        else:
-            # item is a PseudoTechniqueConfig instance
-            technique_type = item.technique.type
-            piis = getattr(item.technique, "pii", []) or getattr(item.technique, "columns", [])
-            return technique_type, piis
-    
-class DecryptConfig(Config):
-    type: Literal["decrypt"] = "decrypt"
-    key_name: str = PydanticField(default="my_key", description="Key identifier used for decryption")
-    
-class DepseudoTechniqueConfig(Config):
-    technique: DecryptConfig = PydanticField(
-        default={"type": "decrypt", **DecryptConfig().model_dump(exclude={"type"})},
-    )
-
-class DepseudonymizeUnstructuredConfig(Config):
-    used_function: List[DepseudoTechniqueConfig] = PydanticField(
-        default=[{"technique": {"type": "decrypt", **DecryptConfig().model_dump(exclude={"type"})}}],
-        description=("Decryption function"),
-    )
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/jobs.py b/src/template_code_location/field_level_pseudo_anonymisation/jobs.py
deleted file mode 100644
index 0f39cfb..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/jobs.py
+++ /dev/null
@@ -1,126 +0,0 @@
-from dagster import job
-from util_services.util_ops import (
-    preview_dataframe,
-    read_structured_to_df,
-    write_df_to_local,
-    write_string_to_unstructured,
-    read_unstructured_to_string,
-    preview_unstructured,
-    read_structured_from_s3,
-    write_df_to_s3,
-    read_unstructured_from_s3,
-    write_unstructured_to_s3,
-)
-from .ops import (
-    anonymize_pseudonymize_structured,
-    depseudonymize_structured,
-)
-from .unstructured_ops import (
-    anonymize_pseudonymize_unstructured,
-    depseudonymize_unstructured,
-)
-
-@job(tags={
-    "business_operation": "ANONYMISATION_PSEUDONYMISATION"
-})
-def anonymise_pseudonymise_structured_job():
-    df = read_structured_to_df()
-    preview_dataframe(df)
-    df_anon, metrics = anonymize_pseudonymize_structured(df)
-    preview_dataframe(df_anon)
-    write_df_to_local(df_anon)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION_PSEUDONYMISATION",
-    "resource_type": "RD_DATA"
-})
-def anonymise_pseudonymise_structured_job_s3():
-    df = read_structured_from_s3()
-    preview_dataframe(df)
-    df_anon, metrics = anonymize_pseudonymize_structured(df)
-    preview_dataframe(df_anon)
-    write_df_to_s3(df_anon)
-
-
-@job(tags={
-    "business_operation": "DEPSEUDONYMISATION"
-})
-def depseudonymise_structured_job():
-    df = read_structured_to_df()
-    preview_dataframe(df)
-    df_anon, metrics = depseudonymize_structured(df)
-    preview_dataframe(df_anon)
-    write_df_to_local(df_anon)
-
-
-@job(tags={
-    "business_operation": "DEPSEUDONYMISATION",
-    "resource_type": "RD_DATA"
-})
-def depseudonymise_structured_job_s3():
-    df = read_structured_from_s3()
-    preview_dataframe(df)
-    df_anon, metrics = depseudonymize_structured(df)
-    preview_dataframe(df_anon)
-    write_df_to_s3(df_anon)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION_PSEUDONYMISATION"
-})
-def anonymise_pseudonymise_depseudonymise_structured_job():
-    df = read_structured_to_df()
-    preview_dataframe(df)
-    df_pseduo, metrics = anonymize_pseudonymize_structured(df)
-    preview_dataframe(df_pseduo)
-    df_depseduo, metrics = depseudonymize_structured(df_pseduo)
-    preview_dataframe(df_depseduo)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION_PSEUDONYMISATION"
-})
-def anonymise_pseudonymise_unstructured_job():
-    text = read_unstructured_to_string()
-    preview_unstructured(text)
-    text_anon, metrics = anonymize_pseudonymize_unstructured(text)
-    preview_unstructured(text_anon)
-    preview_unstructured(metrics)
-    write_string_to_unstructured(text_anon)
-
-
-@job(tags={
-    "business_operation": "ANONYMISATION_PSEUDONYMISATION",
-    "resource_type": "RD_DATA"
-})
-def anonymise_pseudonymise_unstructured_job_s3():
-    text = read_unstructured_from_s3()
-    preview_unstructured(text)
-    text_anon, metrics = anonymize_pseudonymize_unstructured(text)
-    preview_unstructured(text_anon)
-    preview_unstructured(metrics)
-    write_unstructured_to_s3(text_anon)
-
-
-@job(tags={
-    "business_operation": "DEPSEUDONYMISATION"
-})
-def depseudonymise_unstructured_job():
-    text = read_unstructured_to_string()
-    preview_unstructured(text)
-    text_anon, metrics = depseudonymize_unstructured(text)
-    preview_unstructured(text_anon)
-    write_string_to_unstructured(text_anon)
-
-
-@job(tags={
-    "business_operation": "DEPSEUDONYMISATION",
-    "resource_type": "RD_DATA"
-})
-def depseudonymise_unstructured_job_s3():
-    text = read_unstructured_from_s3()
-    preview_unstructured(text)
-    text_anon, metrics = depseudonymize_unstructured(text)
-    preview_unstructured(text_anon)
-    write_unstructured_to_s3(text_anon)
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/ops.py b/src/template_code_location/field_level_pseudo_anonymisation/ops.py
deleted file mode 100644
index a485ff9..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/ops.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import pandas as pd
-import numpy as np
-from dagster import Out, Output, op
-from cryptography.fernet import InvalidToken
-from template_code_location.field_level_pseudo_anonymisation.config_models import (
-    AnonymisePseudonymizeStructuredConfig,
-    DepseudonymizeStructuredConfig,
-)
-from template_code_location.field_level_pseudo_anonymisation.techniques import (
-    anonymisation_pseudonymisation_techniques as anon_pseudo_funcs,
-)
-import template_code_location.field_level_pseudo_anonymisation.techniques.depseudonymisation_techniques as depseudo_funcs
-from .utils import create_get_encryption_key
-
-
-def _apply_column_wise_function(config, df, funcs):
-    for used_function in config.used_function:
-        func_name = used_function.technique.type
-        columns = used_function.technique.columns
-        func = getattr(funcs, func_name)
-        params = used_function.technique.model_dump()
-        del params["type"]
-        del params["columns"]
-
-        if func_name in ["encrypt", "decrypt"]:
-            key_name = used_function.technique.key_name
-            del params["key_name"]
-            params["key"] = create_get_encryption_key(func_name, key_name)
-
-        missing = [col for col in columns if col not in df.columns]
-        if missing:
-            raise ValueError(
-                f"The following columns required by technique '{func_name}' "
-                f"are not present in the DataFrame: {', '.join(missing)}"
-            )
-
-        # Skip processing if DataFrame is empty
-        if len(df) == 0:
-            continue
-
-        for column in columns:
-            try:
-                vectorized_func = np.vectorize(lambda x: func(x, **params))
-                df[column] = vectorized_func(df[column].to_numpy())
-            except InvalidToken:
-                raise ValueError(
-                    f"Invalid Fernet token while decrypting column '{column}' "
-                    f"using key '{key_name}'. The data may not be encrypted "
-                    f"or the key may be incorrect. "
-                )
-    return df
-
-
-@op(out={"data": Out(), "metrics": Out()})
-def anonymize_pseudonymize_structured(
-    context, config: AnonymisePseudonymizeStructuredConfig, df: pd.DataFrame
-):
-
-    df = _apply_column_wise_function(config, df, anon_pseudo_funcs)
-    yield Output(
-        value=df,
-        metadata={},
-        output_name="data",
-    )
-    yield Output(value={}, output_name="metrics")
-
-
-@op(out={"data": Out(), "metrics": Out()})
-def depseudonymize_structured(context, config: DepseudonymizeStructuredConfig, df: pd.DataFrame):
-
-    df = _apply_column_wise_function(config, df, depseudo_funcs)
-    yield Output(
-        value=df,
-        metadata={},
-        output_name="data",
-    )
-    yield Output(value={}, output_name="metrics")
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/techniques/__init__.py b/src/template_code_location/field_level_pseudo_anonymisation/techniques/__init__.py
deleted file mode 100644
index 128c371..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/techniques/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .anonymisation_pseudonymisation_techniques import hash, redact, replace, encrypt  # noqa: F401
-
-from .depseudonymisation_techniques import decrypt  # noqa: F401
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/techniques/anonymisation_pseudonymisation_techniques.py b/src/template_code_location/field_level_pseudo_anonymisation/techniques/anonymisation_pseudonymisation_techniques.py
deleted file mode 100644
index ce15613..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/techniques/anonymisation_pseudonymisation_techniques.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import hashlib
-from cryptography.fernet import Fernet
-
-
-def hash(value: str, algorithm: str = "sha256") -> str:
-    """
-    Hash the value using the specified algorithm (default: SHA-256).
-    """
-    value = str(value)
-    hash_func = hashlib.new(algorithm)
-    hash_func.update(value.encode("utf-8"))
-    return hash_func.hexdigest()
-
-
-def redact(value: str) -> str:
-    """
-    Redact the column and return an empty string
-    """
-    return ""
-
-
-def replace(value: str, new_value) -> str:
-    """
-    Replace the value column with the provided value
-    """
-    return new_value
-
-
-def encrypt(value: str, key: bytes) -> str:
-    """
-    Encrypt the value using the provided Fernet key.
-    """
-    value = str(value)
-    f = Fernet(key)
-    return f.encrypt(value.encode()).decode()
-
-
-def retain(value: str) -> str:
-    """
-    Retain the original value without any changes.
-    """
-    return value
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/techniques/depseudonymisation_techniques.py b/src/template_code_location/field_level_pseudo_anonymisation/techniques/depseudonymisation_techniques.py
deleted file mode 100644
index 4e0937c..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/techniques/depseudonymisation_techniques.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from cryptography.fernet import Fernet
-
-
-def decrypt(value: str, key: bytes) -> str:
-    """
-    Decrypt a string using the provided Fernet key.
-    """
-    f = Fernet(key)
-    return f.decrypt(value.encode()).decode()
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/unstructured_ops.py b/src/template_code_location/field_level_pseudo_anonymisation/unstructured_ops.py
deleted file mode 100644
index f8f0ffe..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/unstructured_ops.py
+++ /dev/null
@@ -1,428 +0,0 @@
-import importlib
-import importlib.abc
-import importlib.machinery
-import re
-import sys
-import types
-
-
-# ---------------------------------------------------------------------------
-# Stub out the `transformers` and `spacy_transformers` packages before any
-# other import triggers spaCy's entry-point scan or scrubadub_spacy's runtime
-# import of spacy_transformers.pipeline_component.
-# ---------------------------------------------------------------------------
-_STUB_PACKAGES = ("transformers", "spacy_transformers")
-
-
-class _StubModule(types.ModuleType):
-    """Module that returns a dummy class for any attribute access."""
-
-    def __getattr__(self, name: str):
-        return type(name, (), {})
-
-
-class _StubFinder(importlib.abc.MetaPathFinder):
-    """Intercept any import under the stubbed packages and return a stub module."""
-
-    def find_spec(self, fullname, path=None, target=None):  # noqa: ANN001
-        for pkg in _STUB_PACKAGES:
-            if fullname == pkg or fullname.startswith(pkg + "."):
-                return importlib.machinery.ModuleSpec(fullname, _StubLoader())
-        return None
-
-
-class _StubLoader(importlib.abc.Loader):
-    def create_module(self, spec):  # noqa: ANN001
-        mod = _StubModule(spec.name)
-        mod.__path__ = []  # mark as package
-        mod.__spec__ = spec
-        return mod
-
-    def exec_module(self, module):  # noqa: ANN001
-        pass
-
-
-# Install the finder once, before scrubadub / spacy are imported.
-if not any(isinstance(f, _StubFinder) for f in sys.meta_path):
-    sys.meta_path.insert(0, _StubFinder())
-# ---------------------------------------------------------------------------
-
-
-import scrubadub  # noqa: E402
-import scrubadub_spacy  # noqa: E402
-from cryptography.fernet import InvalidToken  # noqa: E402
-from dagster import Out, Output, get_dagster_logger, op  # noqa: E402
-from scrubadub.detectors import RegexDetector  # noqa: E402
-from scrubadub.filth import CredentialFilth, NameFilth  # noqa: E402
-
-from template_code_location.field_level_pseudo_anonymisation.techniques import (
-    anonymisation_pseudonymisation_techniques as anon_pseudo_funcs,
-)
-from template_code_location.field_level_pseudo_anonymisation.techniques import (
-    depseudonymisation_techniques as depseudo_funcs,
-)
-
-from .config_models import (
-    PII_MAPPING,
-    AnonymisePseudonymizeUnstructuredConfig,
-    DepseudonymizeUnstructuredConfig,
-    PIIEntityEnum,
-    PseudoTechniqueConfig,
-    SupportedLanguages,
-)
-from .utils import create_get_encryption_key
-
-
-def _initialize_scrubber(language: str) -> scrubadub.Scrubber:
-    class SIMPLCredentialDetector(RegexDetector):
-        """
-        Remove username/password combinations from dirty ``text``.
-        """
-
-        filth_cls = CredentialFilth
-        name = "credential"
-        autoload = True
-
-        regex = re.compile(
-            r"""
-            (?:username|login|u:)\s*(?::\s*)?
-            (?P<username>[\w.\-@+]+)
-            [\s\S]{0,500}?
-            (?:password|pw|p:)\s*(?::\s*)?
-            (?P<password>[^\s]+)
-        """,
-            re.MULTILINE | re.VERBOSE | re.IGNORECASE,
-        )
-
-    locale = SupportedLanguages.get_locale(language)
-    scrubber = scrubadub.Scrubber(locale=locale)
-
-    model_name = SupportedLanguages.get_language_model(language)
-    spacy_detector = scrubadub_spacy.detectors.SpacyEntityDetector(model=model_name)
-    spacy_detector.named_entities = {
-        "PERSON",
-        "PER",
-        "ORG",
-        "persName",
-        "PRS",
-    }  # Need to set it after the constructor because scrubadub_spacy uses upper on all entries
-    spacy_detector.filth_cls_map["persName"] = NameFilth  # Required because PL uses persName
-    spacy_detector.filth_cls_map["PRS"] = NameFilth  # Required for swedish that uses PRS
-    scrubber.add_detector(spacy_detector)
-    if language in ["en", "de"]:
-        scrubber.add_detector(
-            scrubadub.detectors.DateOfBirthDetector
-        )  # add optional data of birth detector
-    scrubber.remove_detector(
-        scrubadub.detectors.CredentialDetector
-    )  # remove the not so great credentials detector and replace with custom SIMPL one
-    scrubber.add_detector(SIMPLCredentialDetector())
-    return scrubber
-
-
-def _map_filth_to_pii_enum(filth) -> PIIEntityEnum | None:
-    cls_name = filth.__class__.__name__
-    for pii_enum, filth_name in PII_MAPPING.items():
-        if filth_name == cls_name:
-            return pii_enum
-    return None
-
-
-def _get_metrics(metrics_dict: dict, language: str) -> str:
-    # Format metrics as Markdown table
-    metrics_report = f"""
-## PII Anonymization Report
-
-### Summary
-- **Total PII Detected**: {metrics_dict['total_pii_detected']}
-- **Original Length**: {metrics_dict['text_length_original']} chars
-- **Anonymized Length**: {metrics_dict['text_length_anonymised']} chars
-- **Language**: {language}
-
-### PII by Type
-| Entity Type | Count |
-|-------------|-------|
-"""
-    for pii_type, count in metrics_dict["pii_by_type"].items():
-        metrics_report += f"| {pii_type} | {count} |\n"
-
-    metrics_report += "\n### Techniques Applied\n"
-    for pii, technique in metrics_dict["techniques_applied"].items():
-        metrics_report += f"- **{pii}**: {technique}\n"
-
-    return metrics_report
-
-
-def _build_metrics_dict(
-    pii_counts: dict[str, int],
-    text: str,
-    anon_text: str,
-    technique_map: dict[PIIEntityEnum, PseudoTechniqueConfig],
-) -> dict:
-    metrics_dict = {
-        "total_pii_detected": sum(pii_counts.values()),
-        "pii_by_type": pii_counts,
-        "text_length_original": len(text),
-        "text_length_anonymised": len(anon_text),
-        "techniques_applied": {
-            pii.name: technique_map[pii].technique.type for pii in technique_map.keys()
-        },
-    }
-
-    return metrics_dict
-
-
-@op(out={"data": Out(), "metrics": Out()})
-def anonymize_pseudonymize_unstructured(
-    context, config: AnonymisePseudonymizeUnstructuredConfig, text: str
-):
-    logger = get_dagster_logger()
-
-    if text is None or not text.strip():
-        raise ValueError("Input text cannot be None or empty")
-
-    logger.debug(
-        f"Starting unstructured PII anonymization | lang={config.language.value} "
-        f"| input_chars={len(text)}"
-    )
-
-    # --- Filth detection ---
-    try:
-        scrubber = _initialize_scrubber(config.language.value)
-        filths = list(scrubber.iter_filth(text))
-        logger.info(f"Detected {len(filths)} potential PII entities before filtering.")
-    except Exception as e:
-        logger.error(f"Scrubber initialization/detection failed | lang={config.language.value}")
-        raise RuntimeError(f"PII detection failed for language '{config.language.value}'") from e
-
-    # --- Build technique routing map ---
-    technique_map = _build_technique_map(config)
-    logger.debug(
-        "Technique map constructed: "
-        + ", ".join(f"{pii.name}->{cfg.technique.type}" for pii, cfg in technique_map.items())
-    )
-
-    replacements = []
-    key_cache = {}
-    pii_counts = {}
-
-    # --- Process filths ---
-    for idx, filth in enumerate(filths, start=1):
-        pii_enum = _map_filth_to_pii_enum(filth)
-
-        if pii_enum is None:
-            logger.debug(f"[{idx}] Skipping unknown filth class={filth.__class__.__name__}")
-            continue
-
-        start_idx, end_idx = _extract_span(filth, logger, idx)
-        if start_idx is None:
-            continue
-
-        original_value = text[start_idx:end_idx]
-        technique_cfg = technique_map.get(pii_enum)
-
-        # No technique configured
-        if technique_cfg is None:
-            _handle_missing_technique(
-                pii_enum,
-                start_idx,
-                end_idx,
-                text,
-                pii_counts,
-                replacements,
-                logger,
-                idx,
-            )
-            continue
-
-        # Apply configured technique
-        t = technique_cfg.technique
-        params = _prepare_params(t, key_cache, idx, logger)
-        replacement = _apply_technique(original_value, t.type, params, pii_enum, idx, logger)
-
-        replacements.append((start_idx, end_idx, replacement))
-        pii_counts[pii_enum.name] = pii_counts.get(pii_enum.name, 0) + 1
-
-    # --- Apply replacements ---
-    anon_text = _apply_replacements(text, replacements, logger)
-
-    logger.info(f"Anonymisation completed, total PII counts: {pii_counts}")
-
-    metrics_report = _get_metrics(
-        _build_metrics_dict(pii_counts, text, anon_text, technique_map),
-        config.language.value,
-    )
-
-    yield Output(value=anon_text, output_name="data")
-    yield Output(value=metrics_report, output_name="metrics")
-
-
-@op(out={"data": Out(), "metrics": Out()})
-def depseudonymize_unstructured(context, config: DepseudonymizeUnstructuredConfig, input_text: str):
-
-    input_restored, metrics = _apply_depseudonimisation_function(config, input_text, depseudo_funcs)
-    yield Output(
-        value=input_restored,
-        metadata={},
-        output_name="data",
-    )
-    yield Output(value=metrics, output_name="metrics")
-
-
-def _apply_depseudonimisation_function(config, input_text: str, funcs_module):
-    """
-    Searches and depseudonymizes text segments formatted as:
-        {technique:pseudonymized_value}
-    """
-
-    total_depseudo_count = 0
-    depseudonimized_text = input_text  # Initialize with input text
-
-    # Loop through each depseudonymisation technique defined in the config
-    for used_function in config.used_function:
-        func_name = used_function.technique.type
-        func = getattr(funcs_module, func_name)
-        pseudo_anon_func = ""
-
-        # Prepare parameters
-        params = used_function.technique.model_dump()
-        del params["type"]
-
-        if func_name == "decrypt":
-            key_name = used_function.technique.key_name
-            del params["key_name"]
-            pseudo_anon_func = "encrypt"
-            params["key"] = create_get_encryption_key(func_name, key_name)
-
-        # Regex pattern for this technique, e.g. {encrypt:...}
-        pattern = rf"\{{{pseudo_anon_func}:([^}}]+)\}}"
-
-        def replace_match(match):
-            nonlocal total_depseudo_count
-            pseudovalue = match.group(1)
-            total_depseudo_count += 1
-            try:
-                return func(pseudovalue, **params)
-            except InvalidToken:
-                raise ValueError(
-                    f"Invalid Fernet token while decrypting value using key '{key_name}'. "
-                    f"The data may not be encrypted or the key may be incorrect."
-                )
-            except Exception as e:
-                raise RuntimeError(f"Error during depseudonymisation with '{func_name}': {e}")
-
-        # Apply replacements for this technique
-        depseudonimized_text = re.sub(pattern, replace_match, depseudonimized_text)
-
-    yield depseudonimized_text
-    yield {"total_depseudo_count": total_depseudo_count}
-
-
-def _build_technique_map(config):
-    technique_map = {}
-    for func_cfg in config.used_function:
-        for pii in func_cfg.technique.pii:
-            technique_map[pii] = func_cfg
-    return technique_map
-
-
-def _extract_span(filth, logger, idx):
-    start_idx = getattr(filth, "beg", getattr(filth, "start", None))
-    end_idx = getattr(filth, "end", None)
-    if start_idx is None or end_idx is None:
-        logger.debug(f"[{idx}] Filth missing span attributes; skipping.")
-        return None, None
-    return start_idx, end_idx
-
-
-def _handle_missing_technique(
-    pii_enum, start_idx, end_idx, text, pii_counts, replacements, logger, idx
-):
-    original_value = text[start_idx:end_idx]
-    logger.debug(
-        f"[{idx}] PII={pii_enum.name} span=({start_idx},{end_idx}) value={original_value} "
-        f"- No technique configured, using placeholder"
-    )
-    placeholder = f"{{{{{pii_enum.name}}}}}"
-    replacements.append((start_idx, end_idx, placeholder))
-    pii_counts[pii_enum.name] = pii_counts.get(pii_enum.name, 0) + 1
-
-
-def _prepare_params(t, key_cache, idx, logger):
-    params = t.model_dump()
-    del params["type"]
-    del params["pii"]
-
-    if t.type == "encrypt":
-        try:
-            if t.key_name not in key_cache:
-                logger.debug(
-                    f"[{idx}] Retrieving/generating Vault key name={t.key_name} for encryption"
-                )
-                key_cache[t.key_name] = create_get_encryption_key("encrypt", t.key_name)
-            params["key"] = key_cache[t.key_name]
-            del params["key_name"]
-            logger.debug(f"[{idx}] Encryption key prepared")
-        except Exception as e:
-            raise RuntimeError(
-                f"Encryption key retrieval failed for key '{t.key_name}': {type(e).__name__}"
-            ) from e
-
-    return params
-
-
-def _apply_technique(original_value, t_type, params, pii_enum, idx, logger):
-    try:
-        func = getattr(anon_pseudo_funcs, t_type)
-        replacement = func(original_value, **params)
-
-        if t_type == "encrypt":
-            replacement = f"{{encrypt:{replacement}}}"
-
-        logger.debug(f"[{idx}] {t_type.capitalize()} complete")
-        return replacement
-
-    except AttributeError:
-        logger.warning(f"[{idx}] Technique '{t_type}' not recognized; inserting placeholder.")
-        return f"{{UNIMPL_{t_type}_{pii_enum.name}}}"
-
-    except Exception as e:
-        raise RuntimeError(
-            f"Technique '{t_type}' failed for PII type '{pii_enum.name}': {type(e).__name__}"
-        ) from e
-
-
-def _apply_replacements(text, replacements, logger):
-    if not replacements:
-        logger.info("No PII detected; returning original text.")
-        return text
-
-    logger.debug(f"Applying {len(replacements)} replacements to text body.")
-    replacements.sort(key=lambda r: r[0])
-
-    # Detect overlaps
-    for i in range(len(replacements) - 1):
-        if replacements[i][1] > replacements[i + 1][0]:
-            logger.warning(
-                f"Overlapping PII detected at positions "
-                f"({replacements[i][0]},{replacements[i][1]}) "
-                f"and ({replacements[i+1][0]},{replacements[i+1][1]}). "
-                f"Using first match."
-            )
-            replacements[i + 1] = (
-                replacements[i][1],
-                replacements[i + 1][1],
-                replacements[i + 1][2],
-            )
-
-    result_parts = []
-    last = 0
-    for start, end, repl in replacements:
-        if start < last:
-            continue
-        result_parts.append(text[last:start])
-        result_parts.append(repl)
-        last = end
-
-    result_parts.append(text[last:])
-    return "".join(result_parts)
diff --git a/src/template_code_location/field_level_pseudo_anonymisation/utils.py b/src/template_code_location/field_level_pseudo_anonymisation/utils.py
deleted file mode 100644
index 25ebd75..0000000
--- a/src/template_code_location/field_level_pseudo_anonymisation/utils.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import os
-import hvac
-from hvac.exceptions import InvalidPath
-from cryptography.fernet import Fernet
-
-
-def create_get_encryption_key(func_name: str, key_name: str) -> bytes:
-    client = hvac.Client(url=os.getenv("OPENBAO_URL"), token=os.getenv("OPENBAO_TOKEN"))
-
-    secret_folder = os.getenv("ENCRYPTION_KEYS_PATH")
-    secret_path = f"{secret_folder}/{key_name}" if secret_folder else key_name
-    mount_point = os.getenv("ENCRYPTION_KEYS_MOUNT_POINT")
-
-    try:
-        secret_response = client.secrets.kv.v2.read_secret_version(
-            path=secret_path, mount_point=mount_point
-        )
-        key_value = secret_response["data"]["data"]["value"]
-
-    except InvalidPath:
-        if func_name == "encrypt":
-            new_key = Fernet.generate_key().decode()
-            client.secrets.kv.v2.create_or_update_secret(
-                path=secret_path, mount_point=mount_point, secret={"value": new_key}
-            )
-            key_value = new_key
-        else:
-            raise ValueError(f"Fernet key '{key_name}' not found in Vault for decrypt.")
-    except Exception as e:
-        raise ValueError(f"Error while reading Fernet key '{key_name}': {e}")
-
-    return key_value.encode()
diff --git a/src/template_code_location/repository.py b/src/template_code_location/repository.py
index 1d0be85..94f3746 100644
--- a/src/template_code_location/repository.py
+++ b/src/template_code_location/repository.py
@@ -8,7 +8,7 @@ from util_services.sensors import (
 from util_services.custom_json_logger import simpl_json_logger
 
 # Data processing jobs
-from template_code_location.data_processing.jobs import (
+from data_processing.jobs import (
     remove_duplicates_job_s3,
     fill_missing_values_job_s3,
     standardize_categorical_values_job_s3,
@@ -21,7 +21,7 @@ from template_code_location.data_processing.jobs import (
 )
 
 # Dataframe-level anonymisation jobs
-from template_code_location.dataframe_level_anonymisation.jobs import (
+from dataframe_level_anonymisation.jobs import (
     k_anonymity_job_s3,
     l_diversity_job_s3,
     t_closeness_job_s3,
@@ -29,7 +29,7 @@ from template_code_location.dataframe_level_anonymisation.jobs import (
 )
 
 # Field-level pseudo-anonymisation jobs
-from template_code_location.field_level_pseudo_anonymisation.jobs import (
+from field_level_pseudo_anonymisation.jobs import (
     anonymise_pseudonymise_structured_job_s3,
     depseudonymise_structured_job_s3,
     anonymise_pseudonymise_unstructured_job_s3,
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/data_processing/__init__.py b/tests/data_processing/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/tests/data_processing/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/data_processing/conftest.py b/tests/data_processing/conftest.py
deleted file mode 100644
index 9eda2af..0000000
--- a/tests/data_processing/conftest.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Pytest configuration and shared fixtures."""
-
-import pytest
-import pandas as pd
-from unittest.mock import MagicMock, patch
-import sys
-from dagster import build_op_context
-
-# Mock external dependencies that might not be available in test environment
-sys.modules['spellchecker'] = MagicMock()
-
-
-@pytest.fixture
-def mock_context():
-    """Create a mock Dagster context for testing operations."""
-    context = build_op_context()
-    return context
-
-
-@pytest.fixture
-def sample_dataframe():
-    """Create a sample DataFrame for testing."""
-    return pd.DataFrame({
-        'Name': ['John Doe', 'jane smith', 'John Doe', 'bob johnson', 'John Doe'],
-        'Age': [25, 30, 25, None, 25],
-        'City': ['New York', 'los angeles', 'New York', 'chicago', 'New York'],
-        'Status': ['Active', 'INACTIVE', 'Active', 'penDing', 'Active']
-    })
-
-
-@pytest.fixture
-def sample_dataframe_with_typos():
-    """Create a sample DataFrame with typos for spell checking."""
-    return pd.DataFrame({
-        'Name': ['jon doe', 'jane smith', 'bob jonson'],
-        'Description': ['developer', 'analst', 'enginer']
-    })
-
-
-@pytest.fixture
-def empty_dataframe():
-    """Create an empty DataFrame."""
-    return pd.DataFrame()
-
-
-@pytest.fixture
-def dataframe_with_missing_values():
-    """Create a DataFrame with various missing values."""
-    return pd.DataFrame({
-        'Column1': [1, None, 3, None, 5],
-        'Column2': ['a', 'b', None, 'd', None],
-        'Column3': [None, None, None, None, None]
-    })
diff --git a/tests/data_processing/conftest_utils.py b/tests/data_processing/conftest_utils.py
deleted file mode 100644
index 19d2f59..0000000
--- a/tests/data_processing/conftest_utils.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""Configuration utilities for testing."""
-
-import os
-import sys
-
-# Add src directory to path for imports
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
diff --git a/tests/data_processing/test_config_models.py b/tests/data_processing/test_config_models.py
deleted file mode 100644
index 989054f..0000000
--- a/tests/data_processing/test_config_models.py
+++ /dev/null
@@ -1,202 +0,0 @@
-"""Unit tests for configuration models."""
-
-import pytest
-from pydantic import ValidationError
-from template_code_location.data_processing.config_models import (
-    FillMissingConfiguration,
-    ColumnsSelectConfiguration,
-    SpellCheckConfiguration,
-    AggregationConfiguration
-)
-
-
-class TestColumnsSelectConfiguration:
-    """Tests for ColumnsSelectConfiguration."""
-
-    def test_default_columns(self):
-        """Test default columns configuration."""
-        config = ColumnsSelectConfiguration()
-        assert config.columns == ['Name']
-
-    def test_custom_columns(self):
-        """Test custom columns configuration."""
-        config = ColumnsSelectConfiguration(columns=['Col1', 'Col2', 'Col3'])
-        assert config.columns == ['Col1', 'Col2', 'Col3']
-
-    def test_empty_columns_list(self):
-        """Test with empty columns list."""
-        config = ColumnsSelectConfiguration(columns=[])
-        assert config.columns == []
-
-    def test_single_column(self):
-        """Test with a single column."""
-        config = ColumnsSelectConfiguration(columns=['SingleCol'])
-        assert config.columns == ['SingleCol']
-
-    def test_columns_with_special_characters(self):
-        """Test columns with special characters."""
-        config = ColumnsSelectConfiguration(columns=['Col-1', 'Col_2', 'Col.3'])
-        assert config.columns == ['Col-1', 'Col_2', 'Col.3']
-
-    def test_duplicate_columns_are_removed(self):
-        """Verifica che i duplicati vengano rimossi mantenendo l'ordine (grazie a dict.fromkeys)."""
-        config = ColumnsSelectConfiguration(columns=['A', 'B', 'A', 'C', 'B'])
-        
-        assert config.columns == ['A', 'B', 'C']
-
-    def test_duplicate_default_behavior(self):
-        """Verifica che anche input estremi vengano gestiti correttamente."""
-        config = ColumnsSelectConfiguration(columns=['Name', 'Name', 'Name'])
-        assert config.columns == ['Name']
-
-
-class TestFillMissingConfiguration:
-    """Tests for FillMissingConfiguration."""
-
-    def test_default_fill_map(self):
-        """Test default fill map configuration."""
-        config = FillMissingConfiguration()
-        
-        assert config.fill_map == {'Age': 'UNKNOWN_AGE'}
-
-    def test_custom_fill_map(self):
-        """Test custom fill map configuration."""
-        fill_map = {'Age': '0', 'Name': 'UNKNOWN', 'City': 'N/A'}
-        config = FillMissingConfiguration(fill_map=fill_map)
-        
-        assert config.fill_map == fill_map
-
-    def test_empty_fill_map(self):
-        """Test with empty fill map."""
-        config = FillMissingConfiguration(fill_map={})
-        
-        assert config.fill_map == {}
-
-    def test_fill_map_with_numeric_values(self):
-        """Test fill map with numeric string values."""
-        fill_map = {'Age': '0', 'Score': '-1', 'Count': '999'}
-        config = FillMissingConfiguration(fill_map=fill_map)
-        
-        assert config.fill_map == fill_map
-
-    def test_fill_map_with_string_values(self):
-        """Test fill map with string values."""
-        fill_map = {'Name': 'Unknown', 'Email': 'no-email'}
-        config = FillMissingConfiguration(fill_map=fill_map)
-        
-        assert config.fill_map == fill_map
-
-    def test_fill_map_mixed_types(self):
-        """Test fill map with mixed value types (all strings)."""
-        fill_map = {'IntCol': '0', 'StrCol': 'Unknown', 'FloatCol': '0.0'}
-        config = FillMissingConfiguration(fill_map=fill_map)
-        
-        assert config.fill_map == fill_map
-
-
-class TestSpellCheckConfiguration:
-    """Tests for SpellCheckConfiguration."""
-
-    def test_default_spell_check_config(self):
-        """Test default spell check configuration."""
-        config = SpellCheckConfiguration()
-        
-        assert config.columns == ['Name']
-        assert config.language == 'en'
-
-    def test_custom_spell_check_config(self):
-        """Test custom spell check configuration."""
-        config = SpellCheckConfiguration(
-            columns=['Description', 'Notes'],
-            language='es'
-        )
-        
-        assert config.columns == ['Description', 'Notes']
-        assert config.language == 'es'
-
-    def test_spell_check_all_languages(self):
-        """Test spell check with all supported languages."""
-        supported_languages = ['en', 'es', 'it', 'fr', 'pt', 'de', 'nl']
-        
-        for lang in supported_languages:
-            config = SpellCheckConfiguration(language=lang)
-            assert config.language == lang
-
-    def test_spell_check_invalid_language(self):
-        """Test spell check with invalid language."""
-        with pytest.raises(ValidationError):
-            SpellCheckConfiguration(language='invalid')
-
-    def test_spell_check_multiple_columns(self):
-        """Test spell check with multiple columns."""
-        columns = ['Col1', 'Col2', 'Col3', 'Col4']
-        config = SpellCheckConfiguration(columns=columns)
-        
-        assert config.columns == columns
-
-    def test_spell_check_empty_columns(self):
-        """Test spell check with empty columns list."""
-        config = SpellCheckConfiguration(columns=[])
-        
-        assert config.columns == []
-        assert config.language == 'en'
-
-    def test_spell_check_inheritance(self):
-        """Test that SpellCheckConfiguration inherits from ColumnsSelectConfiguration."""
-        config = SpellCheckConfiguration()
-        
-        assert isinstance(config, ColumnsSelectConfiguration)
-        assert hasattr(config, 'columns')
-        assert hasattr(config, 'language')
-
-    @pytest.mark.parametrize("language", ['en', 'es', 'it', 'fr', 'pt', 'de', 'nl'])
-    def test_spell_check_languages_parametrized(self, language):
-        """Test spell check with parametrized languages."""
-        config = SpellCheckConfiguration(language=language)
-        assert config.language == language
-
-class TestAggregationConfiguration:
-    """Tests for AggregationConfiguration."""
-
-    def test_aggregation_default_config(self):
-        """Test default aggregation configuration."""
-        config = AggregationConfiguration()
-        
-        assert config.columns == ['Name']
-        assert config.operation == 'sum'
-
-    @pytest.mark.parametrize("op", ["sum", "mean", "min", "max", "count"])
-    def test_aggregation_valid_operations(self, op):
-        """Test all allowed aggregation operations."""
-        config = AggregationConfiguration(operation=op)
-        assert config.operation == op
-
-    def test_aggregation_invalid_operation(self):
-        """Test that an invalid operation raises a ValidationError."""
-        with pytest.raises(ValidationError) as excinfo:
-            AggregationConfiguration(operation="invalid_op")
-        
-        assert "Invalid aggregation operation 'invalid_op'" in str(excinfo.value)
-
-    def test_aggregation_custom_columns(self):
-        """Test aggregation with custom columns."""
-        config = AggregationConfiguration(columns=['Price', 'Quantity'], operation='mean')
-        
-        assert config.columns == ['Price', 'Quantity']
-        assert config.operation == 'mean'
-
-    def test_aggregation_inheritance(self):
-        """Test that AggregationConfiguration inherits from ColumnsSelectConfiguration."""
-        config = AggregationConfiguration()
-        
-        assert isinstance(config, ColumnsSelectConfiguration)
-        assert hasattr(config, 'columns')
-        assert hasattr(config, 'operation')
-
-    def test_aggregation_model_dump(self):
-        """Test that model_dump contains all expected fields (useful for the Dagster op)."""
-        config = AggregationConfiguration(columns=['Value'], operation='max')
-        dump = config.model_dump()
-        
-        assert dump['columns'] == ['Value']
-        assert dump['operation'] == 'max'
diff --git a/tests/data_processing/test_integration.py b/tests/data_processing/test_integration.py
deleted file mode 100644
index c9d01eb..0000000
--- a/tests/data_processing/test_integration.py
+++ /dev/null
@@ -1,185 +0,0 @@
-"""Integration tests for data processing jobs."""
-
-import pytest
-import pandas as pd
-from unittest.mock import patch, MagicMock
-from template_code_location.data_processing.ops import (
-    remove_duplicates,
-    fill_missing_values,
-    standardize_categorical_values,
-    correct_typos
-)
-from template_code_location.data_processing.config_models import (
-    FillMissingConfiguration,
-    ColumnsSelectConfiguration,
-    SpellCheckConfiguration
-)
-
-
-class TestPipelineIntegration:
-    """Integration tests for data processing pipeline."""
-
-    def test_pipeline_remove_duplicates_then_standardize(self, mock_context):
-        """Test pipeline: remove duplicates then standardize."""
-        df = pd.DataFrame({
-            'Name': ['  JOHN DOE  ', 'jane smith', '  JOHN DOE  ', 'bob johnson'],
-            'City': ['NEW YORK', 'los angeles', 'NEW YORK', 'chicago']
-        })
-        
-        # Step 1: Remove duplicates
-        df_no_dupes = remove_duplicates(mock_context, df)
-        assert df_no_dupes.shape[0] == 3
-        
-        # Step 2: Standardize
-        config = ColumnsSelectConfiguration(columns=['Name', 'City'])
-        df_standardized = standardize_categorical_values(mock_context, config, df_no_dupes)
-        
-        assert df_standardized['Name'].iloc[0] == 'john doe'
-        assert df_standardized['City'].iloc[0] == 'new york'
-
-    def test_pipeline_fill_missing_then_standardize(self, mock_context):
-        """Test pipeline: fill missing values then standardize."""
-        df = pd.DataFrame({
-            'Category': ['  ACTIVE  ', None, '  PENDING  '],
-            'Value': ['1', '2', None]
-        })
-        
-        # Step 1: Fill missing values
-        fill_config = FillMissingConfiguration(fill_map={'Value': '0'})
-        df_filled = fill_missing_values(mock_context, fill_config, df)
-        
-        # Step 2: Standardize
-        std_config = ColumnsSelectConfiguration(columns=['Category'])
-        df_standardized = standardize_categorical_values(mock_context, std_config, df_filled)
-        
-        assert df_standardized['Category'].iloc[0] == 'active'
-        assert df_filled['Value'].iloc[2] == '0'
-
-    def test_pipeline_all_operations(self, mock_context):
-        """Test complete pipeline with all operations."""
-        df = pd.DataFrame({
-            'Name': ['  john doe  ', 'JANE SMITH', '  john doe  ', None],
-            'Value': ['1', None, '1', '2']
-        })
-        
-        # Step 1: Remove duplicates
-        df = remove_duplicates(mock_context, df)
-        assert df.shape[0] == 3
-        
-        # Step 2: Fill missing
-        fill_config = FillMissingConfiguration(fill_map={'Value': '0'})
-        df = fill_missing_values(mock_context, fill_config, df)
-        assert df['Value'].isna().sum() == 0
-        
-        # Step 3: Standardize
-        std_config = ColumnsSelectConfiguration(columns=['Name'])
-        df = standardize_categorical_values(mock_context, std_config, df)
-        
-        assert df['Name'].iloc[0] == 'john doe'
-
-    def test_pipeline_with_large_dataset(self, mock_context):
-        """Test pipeline performance with larger dataset."""
-        # Create larger dataset
-        size = 1000
-        df = pd.DataFrame({
-            'ID': list(range(size)),
-            'Name': ['User_' + str(i % 50) for i in range(size)],
-            'Status': ['ACTIVE', 'INACTIVE', 'PENDING'] * (size // 3) + ['ACTIVE'] * (size % 3),
-            'Score': [i % 100 for i in range(size)]
-        })
-        
-        # Add some duplicates
-        df = pd.concat([df, df.head(100)], ignore_index=True)
-        
-        # Process
-        df_cleaned = remove_duplicates(mock_context, df)
-        
-        assert df_cleaned.shape[0] == 1000
-        assert df_cleaned.shape[1] == 4
-
-
-class TestErrorHandling:
-    """Tests for error handling and edge cases."""
-
-    def test_operation_with_corrupted_data(self, mock_context):
-        """Test operations with corrupted/unusual data."""
-        df = pd.DataFrame({
-            'Col': [float('nan'), float('inf'), -float('inf'), 0, 1, 2]
-        })
-        
-        # Should handle special float values
-        result = remove_duplicates(mock_context, df)
-        assert result.shape[0] > 0
-
-    def test_operation_preserves_index(self, mock_context):
-        """Test that index is handled correctly."""
-        df = pd.DataFrame(
-            {'Col': [1, 2, 1, 3]},
-            index=['a', 'b', 'c', 'd']
-        )
-        
-        result = remove_duplicates(mock_context, df)
-        # Index may be reset, so just check shape
-        assert result.shape[0] == 3
-
-    def test_standardize_with_unicode_characters(self, mock_context):
-        """Test standardization with unicode characters."""
-        df = pd.DataFrame({
-            'Name': ['José', 'François', 'Müller']
-        })
-        
-        config = ColumnsSelectConfiguration(columns=['Name'])
-        result = standardize_categorical_values(mock_context, config, df)
-        
-        # Should handle unicode correctly
-        assert result.shape[0] == 3
-
-    def test_fill_with_same_key_multiple_times(self, mock_context):
-        """Test filling when fill_map has multiple entries."""
-        df = pd.DataFrame({
-            'A': ['1', None, '3'],
-            'B': [None, None, 'c'],
-            'C': [None, '2', None]
-        })
-        
-        config = FillMissingConfiguration(fill_map={
-            'A': '-1',
-            'B': 'EMPTY',
-            'C': '0'
-        })
-        
-        result = fill_missing_values(mock_context, config, df)
-        
-        assert result.loc[1, 'A'] == '-1'
-        assert result.loc[0, 'B'] == 'EMPTY'
-        assert result.loc[0, 'C'] == '0'
-
-
-class TestDataTypePreservation:
-    """Tests to ensure data types are preserved appropriately."""
-
-    def test_remove_duplicates_preserves_dtypes(self, mock_context):
-        """Test that remove_duplicates preserves column data types."""
-        df = pd.DataFrame({
-            'int32': pd.array([1, 2, 1], dtype='int32'),
-            'float64': pd.array([1.5, 2.5, 1.5], dtype='float64'),
-            'str': ['a', 'b', 'a']
-        })
-        
-        result = remove_duplicates(mock_context, df)
-        
-        assert result['int32'].dtype == df['int32'].dtype
-        assert result['float64'].dtype == df['float64'].dtype
-
-    def test_fill_missing_preserves_column_types_where_possible(self, mock_context):
-        """Test that fill_missing handles type preservation."""
-        df = pd.DataFrame({
-            'A': pd.array(['1', None, '3'], dtype='string'),
-            'B': ['x', 'y', 'z']
-        })
-        
-        config = FillMissingConfiguration(fill_map={'A': '0'})
-        result = fill_missing_values(mock_context, config, df)
-        
-        assert result['A'].loc[1] == '0'
-        assert result['B'].dtype == df['B'].dtype
diff --git a/tests/data_processing/test_jobs.py b/tests/data_processing/test_jobs.py
deleted file mode 100644
index 5373f7c..0000000
--- a/tests/data_processing/test_jobs.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from template_code_location.data_processing.jobs import (
-    remove_duplicates_job_s3,
-    fill_missing_values_job_s3,
-    standardize_categorical_values_job_s3,
-    correct_typos_job_s3,
-    normalize_numeric_min_max_job_s3,
-    normalize_datetime_job_s3,
-    normalize_coordinates_job_s3,
-    add_global_aggregations_job_s3
-)
-
-
-def test_remove_duplicates_job_s3_is_callable():
-    """Test remove_duplicates_job_s3 is a valid Dagster job"""
-    assert callable(remove_duplicates_job_s3)
-    assert hasattr(remove_duplicates_job_s3, 'execute_in_process')
-
-
-def test_fill_missing_values_job_s3_is_callable():
-    """Test fill_missing_values_job_s3 is a valid Dagster job"""
-    assert callable(fill_missing_values_job_s3)
-    assert hasattr(fill_missing_values_job_s3, 'execute_in_process')
-
-
-def test_standardize_categorical_values_job_s3_is_callable():
-    """Test standardize_categorical_values_job_s3 is a valid Dagster job"""
-    assert callable(standardize_categorical_values_job_s3)
-    assert hasattr(standardize_categorical_values_job_s3, 'execute_in_process')
-
-
-def test_correct_typos_job_s3_is_callable():
-    """Test correct_typos_job_s3 is a valid Dagster job"""
-    assert callable(correct_typos_job_s3)
-    assert hasattr(correct_typos_job_s3, 'execute_in_process')
-
-
-def test_normalize_numeric_min_max_job_s3_is_callable():
-    """Test normalize_numeric_min_max_job_s3 is a valid Dagster job"""
-    assert callable(normalize_numeric_min_max_job_s3)
-    assert hasattr(normalize_numeric_min_max_job_s3, 'execute_in_process')
-
-
-def test_normalize_datetime_job_s3_is_callable():
-    """Test normalize_datetime_job_s3 is a valid Dagster job"""
-    assert callable(normalize_datetime_job_s3)
-    assert hasattr(normalize_datetime_job_s3, 'execute_in_process')
-
-def test_normalize_coordinates_job_s3_is_callable():
-    """Test normalize_coordinates_job_s3 is a valid Dagster job"""
-    assert callable(normalize_coordinates_job_s3)
-    assert hasattr(normalize_coordinates_job_s3, 'execute_in_process')
-
-def test_add_global_aggregations_job_s3_is_callable():
-    """Test add_global_aggregations_job_s3 is a valid Dagster job"""
-    assert callable(add_global_aggregations_job_s3)
-    assert hasattr(add_global_aggregations_job_s3, 'execute_in_process')
diff --git a/tests/data_processing/test_ops.py b/tests/data_processing/test_ops.py
deleted file mode 100644
index def913b..0000000
--- a/tests/data_processing/test_ops.py
+++ /dev/null
@@ -1,700 +0,0 @@
-"""Unit tests for data processing operations."""
-
-import pytest
-import pandas as pd
-from template_code_location.data_processing.ops import (
-    remove_duplicates,
-    fill_missing_values,
-    standardize_categorical_values,
-    correct_typos,
-    normalize_datetime,
-    normalize_numeric_min_max,
-    normalize_coordinates,
-    add_global_aggregations
-)
-from template_code_location.data_processing.config_models import (
-    FillMissingConfiguration,
-    ColumnsSelectConfiguration,
-    SpellCheckConfiguration,
-    AggregationConfiguration,
-    CoordinatesNormalizationConfiguration
-)
-
-
-class TestRemoveDuplicates:
-    """Tests for the remove_duplicates operation."""
-
-    def test_remove_duplicates_basic(self, mock_context, sample_dataframe):
-        """Test basic duplicate removal."""
-        result = remove_duplicates(mock_context, sample_dataframe)
-        
-        # Should have 3 unique rows (john doe appears 3x, jane smith 1x, bob johnson 1x)
-        assert result.shape[0] == 3
-        assert len(result) < len(sample_dataframe)
-
-    def test_remove_duplicates_no_duplicates(self, mock_context):
-        """Test remove_duplicates when there are no duplicates."""
-        df = pd.DataFrame({
-            'A': [1, 2, 3],
-            'B': ['x', 'y', 'z']
-        })
-        result = remove_duplicates(mock_context, df)
-        
-        assert result.shape[0] == 3
-        pd.testing.assert_frame_equal(result, df)
-
-    def test_remove_duplicates_all_duplicates(self, mock_context):
-        """Test remove_duplicates when all rows are identical."""
-        df = pd.DataFrame({
-            'A': [1, 1, 1],
-            'B': ['x', 'x', 'x']
-        })
-        result = remove_duplicates(mock_context, df)
-        
-        assert result.shape[0] == 1
-
-    def test_remove_duplicates_empty_dataframe(self, mock_context, empty_dataframe):
-        """Test remove_duplicates with empty DataFrame."""
-        result = remove_duplicates(mock_context, empty_dataframe)
-        
-        assert result.shape[0] == 0
-        assert result.shape[1] == 0
-
-    def test_remove_duplicates_preserves_data_types(self, mock_context):
-        """Test that remove_duplicates preserves data types."""
-        df = pd.DataFrame({
-            'int_col': [1, 2, 1],
-            'str_col': ['a', 'b', 'a'],
-            'float_col': [1.5, 2.5, 1.5]
-        })
-        result = remove_duplicates(mock_context, df)
-        
-        assert result['int_col'].dtype == df['int_col'].dtype
-        assert result['str_col'].dtype == df['str_col'].dtype
-        assert result['float_col'].dtype == df['float_col'].dtype
-
-
-class TestFillMissingValues:
-    """Tests for the fill_missing_values operation."""
-
-    def test_fill_missing_values_basic(self, mock_context, dataframe_with_missing_values):
-        """Test basic missing value filling."""
-        config = FillMissingConfiguration(fill_map={'Column1': '0', 'Column2': 'N/A'})
-        result = fill_missing_values(mock_context, config, dataframe_with_missing_values)
-        
-        # Check that no NaN values remain
-        assert result['Column1'].isna().sum() == 0
-        assert result['Column2'].isna().sum() == 0
-
-    def test_fill_missing_values_with_different_values(self, mock_context):
-        """Test filling with different replacement values."""
-        df = pd.DataFrame({
-            'A': [1, None, 3],
-            'B': [None, 'b', 'c']
-        })
-        config = FillMissingConfiguration(fill_map={'A': '-1', 'B': 'UNKNOWN'})
-        result = fill_missing_values(mock_context, config, df)
-        
-        assert result.loc[1, 'A'] == '-1'
-        assert result.loc[0, 'B'] == 'UNKNOWN'
-
-    def test_fill_missing_values_partial_columns(self, mock_context):
-        """Test filling only specified columns."""
-        df = pd.DataFrame({
-            'A': [1, None, 3],
-            'B': [None, 'b', 'c']
-        })
-        config = FillMissingConfiguration(fill_map={'A': '999'})
-        result = fill_missing_values(mock_context, config, df)
-        
-        assert result.loc[1, 'A'] == '999'
-        assert pd.isna(result.loc[0, 'B'])  # B should still have NaN
-
-    def test_fill_missing_values_no_missing(self, mock_context):
-        """Test when there are no missing values."""
-        df = pd.DataFrame({
-            'A': ['1', '2', '3'],
-            'B': ['a', 'b', 'c']
-        })
-        config = FillMissingConfiguration(fill_map={'A': '0'})
-        result = fill_missing_values(mock_context, config, df)
-        
-        pd.testing.assert_frame_equal(result, df)
-
-    def test_fill_missing_values_empty_dataframe(self, mock_context, empty_dataframe):
-        """Test with empty DataFrame."""
-        config = FillMissingConfiguration(fill_map={})
-        result = fill_missing_values(mock_context, config, empty_dataframe)
-        
-        assert result.shape[0] == 0
-
-
-class TestStandardizeCategoricalValues:
-    """Tests for the standardize_categorical_values operation."""
-
-    def test_standardize_categorical_basic(self, mock_context, sample_dataframe):
-        """Test basic categorical standardization."""
-        config = ColumnsSelectConfiguration(columns=['Name', 'City', 'Status'])
-        result = standardize_categorical_values(mock_context, config, sample_dataframe)
-        
-        # Check that values are lowercase and stripped
-        assert result['Name'].iloc[0] == 'john doe'
-        assert result['City'].iloc[1] == 'los angeles'
-        assert result['Status'].iloc[1] == 'inactive'
-
-    def test_standardize_categorical_single_column(self, mock_context):
-        """Test standardization on a single column."""
-        df = pd.DataFrame({
-            'City': ['  NEW YORK  ', 'LOS ANGELES', '  chicago  ']
-        })
-        config = ColumnsSelectConfiguration(columns=['City'])
-        result = standardize_categorical_values(mock_context, config, df)
-        
-        assert result['City'].iloc[0] == 'new york'
-        assert result['City'].iloc[1] == 'los angeles'
-        assert result['City'].iloc[2] == 'chicago'
-
-    def test_standardize_categorical_missing_column(self, mock_context, sample_dataframe):
-        """Test with non-existent column (should skip)."""
-        config = ColumnsSelectConfiguration(columns=['NonExistent', 'Name'])
-        result = standardize_categorical_values(mock_context, config, sample_dataframe)
-        
-        # Should process 'Name' column without error
-        assert result['Name'].iloc[0] == 'john doe'
-
-    def test_standardize_categorical_with_missing_values(self, mock_context):
-        """Test standardization with missing values."""
-        df = pd.DataFrame({
-            'Category': ['  ACTIVE  ', None, '  pending  ']
-        })
-        config = ColumnsSelectConfiguration(columns=['Category'])
-        result = standardize_categorical_values(mock_context, config, df)
-        
-        assert result['Category'].iloc[0] == 'active'
-        assert result['Category'].iloc[1] == ''
-        assert result['Category'].iloc[2] == 'pending'
-
-    def test_standardize_categorical_empty_dataframe(self, mock_context, empty_dataframe):
-        """Test with empty DataFrame."""
-        config = ColumnsSelectConfiguration(columns=['A', 'B'])
-        result = standardize_categorical_values(mock_context, config, empty_dataframe)
-        
-        assert result.shape[0] == 0
-
-    def test_standardize_categorical_numeric_columns(self, mock_context):
-        """Test that numeric columns are converted to strings."""
-        df = pd.DataFrame({
-            'NumCol': [1, 2, 3]
-        })
-        config = ColumnsSelectConfiguration(columns=['NumCol'])
-        result = standardize_categorical_values(mock_context, config, df)
-        
-        assert result['NumCol'].iloc[0] == '1'
-        assert isinstance(result['NumCol'].iloc[0], str)
-
-
-class TestCorrectTypos:
-    """Tests for the correct_typos operation."""
-
-    def test_correct_typos_basic(self, mock_context):
-        """Test basic typo correction."""
-        df = pd.DataFrame({
-            'Name': ['jon', 'jayne', 'bob']
-        })
-        config = SpellCheckConfiguration(columns=['Name'], language='en')
-        result = correct_typos(mock_context, config, df)
-        
-        # Result should have corrections applied
-        assert result.shape[0] == 3
-
-    def test_correct_typos_missing_column(self, mock_context):
-        """Test with non-existent column (should skip)."""
-        df = pd.DataFrame({
-            'Name': ['jon', 'jayne']
-        })
-        config = SpellCheckConfiguration(columns=['NonExistent'], language='en')
-        result = correct_typos(mock_context, config, df)
-        
-        # Should not raise error, just skip
-        pd.testing.assert_frame_equal(result, df)
-
-    def test_correct_typos_with_missing_values(self, mock_context):
-        """Test typo correction with missing values."""
-        df = pd.DataFrame({
-            'Text': ['helo', '', 'wrld']
-        })
-        config = SpellCheckConfiguration(columns=['Text'], language='en')
-        result = correct_typos(mock_context, config, df)
-        
-        # Empty strings should be preserved
-        assert result.loc[1, 'Text'] == ''
-
-    def test_correct_typos_empty_dataframe(self, mock_context, empty_dataframe):
-        """Test with empty DataFrame."""
-        config = SpellCheckConfiguration(columns=['A'], language='en')
-        result = correct_typos(mock_context, config, empty_dataframe)
-        
-        assert result.shape[0] == 0
-
-    def test_correct_typos_different_languages(self, mock_context):
-        """Test typo correction with different languages."""
-        df = pd.DataFrame({
-            'Text': ['ciao', 'mondo']
-        })
-        
-        for lang in ['en', 'es', 'it']:
-            config = SpellCheckConfiguration(columns=['Text'], language=lang)
-            result = correct_typos(mock_context, config, df)
-            
-            # Should process without error
-            assert result.shape[0] == 2
-
-    def test_correct_typos_numeric_values(self, mock_context):
-        """Test typo correction on numeric values converted to strings."""
-        df = pd.DataFrame({
-            'Values': [123, 456, 789]
-        })
-        config = SpellCheckConfiguration(columns=['Values'], language='en')
-        result = correct_typos(mock_context, config, df)
-        
-        # Numeric values should be converted to string and processed
-        assert result.shape[0] == 3
-
-class TestNormalizeDatetime:
-    """Tests for the normalize_datetime operation."""
-
-    def test_normalize_datetime_basic(self, mock_context):
-        """Test basic datetime normalization to ISO format."""
-        df = pd.DataFrame({
-            'date_col': ['2023-01-01 10:00:00', '2023-12-31T23:59:59']
-        })
-        
-        config = ColumnsSelectConfiguration(columns=['date_col'])
-        
-        result = normalize_datetime(mock_context, config, df.copy())
-
-        assert 'date_col_iso' in result.columns
-        assert result['date_col_iso'].iloc[0] == '2023-01-01T10:00:00Z'
-        assert result['date_col_iso'].iloc[1] == '2023-12-31T23:59:59Z'
-
-    def test_normalize_datetime_missing_column(self, mock_context, sample_dataframe):
-        """Test behavior when a configured column is missing in the DataFrame."""
-        config = ColumnsSelectConfiguration(columns=['non_existent_column'])
-        
-        result = normalize_datetime(mock_context, config, sample_dataframe.copy())
-
-        pd.testing.assert_frame_equal(result, sample_dataframe)
-
-    def test_normalize_datetime_unparseable_values(self, mock_context):
-        """Test column with values that cannot be parsed as dates."""
-        df = pd.DataFrame({
-            'invalid_col': ['not-a-date', 'completely-random-text']
-        })
-        config = ColumnsSelectConfiguration(columns=['invalid_col'])
-        
-        result = normalize_datetime(mock_context, config, df.copy())
-
-        assert 'invalid_col_iso' not in result.columns
-
-    def test_normalize_datetime_mixed_and_nulls(self, mock_context):
-        """Test column with mixed valid dates, invalid dates, and NaNs."""
-        df = pd.DataFrame({
-            'mixed_col': ['2023-05-01', None, 'invalid-date']
-        })
-        config = ColumnsSelectConfiguration(columns=['mixed_col'])
-        
-        result = normalize_datetime(mock_context, config, df.copy())
-
-        assert 'mixed_col_iso' in result.columns
-        assert result['mixed_col_iso'].iloc[0] == '2023-05-01T00:00:00Z'
-        
-        assert result['mixed_col_iso'].iloc[1] == ""
-        assert result['mixed_col_iso'].iloc[2] == ""
-
-    def test_normalize_datetime_empty_dataframe(self, mock_context, empty_dataframe):
-        """Test with an empty DataFrame."""
-        config = ColumnsSelectConfiguration(columns=['some_col'])
-        
-        result = normalize_datetime(mock_context, config, empty_dataframe)
-        
-        assert result.empty
-
-    def test_normalize_datetime_epoch_only(self, mock_context, capsys):
-        """If parsing a column yields only the Unix epoch date, it should be skipped."""
-        df = pd.DataFrame({
-            'weird_col': ['0', 0, '0000', '']
-        })
-
-        config = ColumnsSelectConfiguration(columns=['weird_col'])
-
-        result = normalize_datetime(mock_context, config, df.copy())
-
-        assert 'weird_col_iso' not in result.columns
-
-        captured = capsys.readouterr()
-        assert "all normalized values are '1970-01-01'" in captured.err
-
-    def test_normalize_datetime_all_1970_skipped(self, mock_context, capsys):
-        """If all formatted values are '1970-01-01', the column should be skipped with a warning."""
-        df = pd.DataFrame({
-            'ts_col': ['1970-01-01 05:30:00', '1970-01-01 12:00:00']
-        })
-
-        config = ColumnsSelectConfiguration(columns=['ts_col'])
-
-        result = normalize_datetime(mock_context, config, df.copy())
-
-        assert 'ts_col_iso' not in result.columns
-
-        captured = capsys.readouterr()
-        assert "all normalized values are '1970-01-01'" in captured.err
-
-    def test_normalize_datetime_integer_age_column_skipped(self, mock_context, capsys):
-        """If an integer column like 'age' is passed, all values become 1970-01-01 and should be skipped."""
-        df = pd.DataFrame({
-            'age': [66, 45, 40, 43, 20, 26, 69, 21, 46]
-        })
-
-        config = ColumnsSelectConfiguration(columns=['age'])
-
-        result = normalize_datetime(mock_context, config, df.copy())
-
-        assert 'age_iso' not in result.columns
-
-        captured = capsys.readouterr()
-        assert "all normalized values are '1970-01-01'" in captured.err
-
-class TestNormalizeNumericMinMax:
-    """Tests for the normalize_numeric_min_max operation."""
-
-    def test_normalize_numeric_basic(self, mock_context):
-        """Test standard min-max normalization between 0 and 1."""
-        df = pd.DataFrame({
-            'score': [10, 20, 30, 40, 50]
-        })
-        config = ColumnsSelectConfiguration(columns=['score'])
-        
-        result = normalize_numeric_min_max(mock_context, config, df.copy())
-
-        assert 'score_norm' in result.columns
-        assert result['score_norm'].min() == 0.0
-        assert result['score_norm'].max() == 1.0
-        
-        assert result['score_norm'].iloc[2] == 0.5
-
-    def test_normalize_numeric_missing_column(self, mock_context):
-        """Test skipping of non-existent columns."""
-        df = pd.DataFrame({'existing': [1, 2, 3]})
-        config = ColumnsSelectConfiguration(columns=['missing_col'])
-        
-        result = normalize_numeric_min_max(mock_context, config, df.copy())
-
-        assert 'missing_col_norm' not in result.columns
-
-    def test_normalize_numeric_constant_values(self, mock_context):
-        """Test skipping when min == max to avoid division by zero."""
-        df = pd.DataFrame({
-            'constant': [10, 10, 10]
-        })
-        config = ColumnsSelectConfiguration(columns=['constant'])
-        
-        result = normalize_numeric_min_max(mock_context, config, df.copy())
-
-        assert 'constant_norm' not in result.columns
-
-    def test_normalize_numeric_with_nans(self, mock_context):
-        """Test normalization with NaN values (pandas min/max ignore NaNs by default)."""
-        df = pd.DataFrame({
-            'with_nans': [10, None, 50]
-        })
-        config = ColumnsSelectConfiguration(columns=['with_nans'])
-        
-        result = normalize_numeric_min_max(mock_context, config, df.copy())
-
-        assert 'with_nans_norm' in result.columns
-        assert result['with_nans_norm'].iloc[0] == 0.0
-        assert result['with_nans_norm'].iloc[2] == 1.0
-        assert pd.isna(result['with_nans_norm'].iloc[1])
-
-    def test_normalize_numeric_multiple_columns(self, mock_context):
-        """Test processing multiple columns in one call."""
-        df = pd.DataFrame({
-            'A': [1, 2],
-            'B': [10, 20]
-        })
-        config = ColumnsSelectConfiguration(columns=['A', 'B'])
-        
-        result = normalize_numeric_min_max(mock_context, config, df.copy())
-
-        assert 'A_norm' in result.columns
-        assert 'B_norm' in result.columns
-
-class TestNormalizeCoordinates:
-    """Tests for the normalize_coordinates operation."""
-
-    def test_normalize_coordinates_basic(self, mock_context):
-        """Test rounding and basic coordinate normalization."""
-        df = pd.DataFrame({
-            'lat': [45.123456, 46.0],
-            'lon': [9.123456, 10.0]
-        })
-        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
-        
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert result['lat'].iloc[0] == 45.1235
-        assert result['lon'].iloc[0] == 9.1235
-        
-        assert len(result) == 2
-
-    def test_normalize_coordinates_filtering(self, mock_context):
-        """Test filtering of out-of-range coordinates."""
-        df = pd.DataFrame({
-            'lat': [45.0, 100.0, -91.0, 0.0],  # 100 e -91 sono out of range
-            'lon': [9.0, 0.0, 0.0, 200.0]      # 200 è out of range
-        })
-        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
-        
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert len(result) == 1
-        assert result['lat'].iloc[0] == 45.0
-
-    def test_normalize_coordinates_invalid_types(self, mock_context):
-        """Test conversion of strings to numeric and handling of NaNs."""
-        df = pd.DataFrame({
-            'lat': ["45.5", "invalid", None],
-            'lon': ["9.5", "10.0", "11.0"]
-        })
-        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
-        
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert len(result) == 1
-        assert isinstance(result['lat'].iloc[0], float)
-
-    def test_normalize_coordinates_empty_df(self, mock_context, empty_dataframe):
-        """Test with an empty DataFrame."""
-        
-        df = pd.DataFrame(columns=['lat', 'lon'])
-        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
-        
-        result = normalize_coordinates(mock_context, config, df)
-        
-        assert len(result) == 0
-        assert result.empty
-
-    def test_normalize_coordinates_default_config(self, mock_context):
-        """Test that normalize_coordinates uses default 'lat'/'lon' columns when no config is provided."""
-        df = pd.DataFrame({
-            'lat': [45.123456, 46.0],
-            'lon': [9.123456, 10.0]
-        })
-        config = CoordinatesNormalizationConfiguration()
-
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert result['lat'].iloc[0] == 45.1235
-        assert result['lon'].iloc[0] == 9.1235
-        assert len(result) == 2
-
-    def test_normalize_coordinates_null_config_values(self, mock_context):
-        """Test that null lat/lon column names fall back to defaults ('lat'/'lon')."""
-        df = pd.DataFrame({
-            'lat': [45.123456, 46.0],
-            'lon': [9.123456, 10.0]
-        })
-        config = CoordinatesNormalizationConfiguration(latColumn=None, lonColumn=None)
-
-        assert config.latColumn == "lat"
-        assert config.lonColumn == "lon"
-
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert result['lat'].iloc[0] == 45.1235
-        assert result['lon'].iloc[0] == 9.1235
-        assert len(result) == 2
-
-    def test_normalize_coordinates_dms_degree_symbol(self, mock_context):
-        """Test DMS parsing with degree/minute/second symbols like 40°26'46\"N."""
-        df = pd.DataFrame({
-            'lat': ["40°26'46\"N", "51°30'26\"N"],
-            'lon': ["79°58'56\"W", "0°7'39\"W"]
-        })
-        config = CoordinatesNormalizationConfiguration(
-            latColumn='lat', lonColumn='lon'
-        )
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert len(result) == 2
-        # 40°26'46"N ≈ 40.4461
-        assert abs(result['lat'].iloc[0] - 40.4461) < 0.001
-        # 79°58'56"W ≈ -79.9822
-        assert abs(result['lon'].iloc[0] - (-79.9822)) < 0.001
-
-    def test_normalize_coordinates_dms_spaced_format(self, mock_context):
-        """Test DMS parsing with space-separated format like '40 26 46 N'."""
-        df = pd.DataFrame({
-            'lat': ["40 26 46 N"],
-            'lon': ["79 58 56 W"]
-        })
-        config = CoordinatesNormalizationConfiguration(
-            latColumn='lat', lonColumn='lon'
-        )
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert len(result) == 1
-        assert abs(result['lat'].iloc[0] - 40.4461) < 0.001
-        assert abs(result['lon'].iloc[0] - (-79.9822)) < 0.001
-
-    def test_normalize_coordinates_dms_already_decimal(self, mock_context):
-        """Test that string columns with decimal values are auto-parsed correctly."""
-        df = pd.DataFrame({
-            'lat': ["45.5", "46.0"],
-            'lon': ["9.5", "10.0"]
-        })
-        config = CoordinatesNormalizationConfiguration(
-            latColumn='lat', lonColumn='lon'
-        )
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert len(result) == 2
-        assert result['lat'].iloc[0] == 45.5
-        assert result['lon'].iloc[0] == 9.5
-
-    def test_normalize_coordinates_dms_mixed_valid_invalid(self, mock_context):
-        """Test auto-detection with a mix of valid DMS, valid decimal, and unparseable values."""
-        df = pd.DataFrame({
-            'lat': ["40°26'46\"N", "not_a_coord", "51.5"],
-            'lon': ["79°58'56\"W", "10.0", "0.1"]
-        })
-        config = CoordinatesNormalizationConfiguration(
-            latColumn='lat', lonColumn='lon'
-        )
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        # Row with "not_a_coord" for lat should be dropped (NaN lat)
-        assert len(result) == 2
-
-    def test_normalize_coordinates_dms_out_of_range(self, mock_context):
-        """Test that DMS-parsed coordinates outside valid range are filtered out."""
-        df = pd.DataFrame({
-            'lat': ["91°0'0\"N", "45°0'0\"N"],
-            'lon': ["0°0'0\"E", "9°0'0\"E"]
-        })
-        config = CoordinatesNormalizationConfiguration(
-            latColumn='lat', lonColumn='lon'
-        )
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        # First row has lat=91° which is out of [-90, 90]
-        assert len(result) == 1
-        assert abs(result['lat'].iloc[0] - 45.0) < 0.001
-
-    def test_normalize_coordinates_dms_south_and_east(self, mock_context):
-        """Test DMS parsing with south latitude and east longitude."""
-        df = pd.DataFrame({
-            'lat': ["33°51'54\"S"],
-            'lon': ["151°12'36\"E"]
-        })
-        config = CoordinatesNormalizationConfiguration(
-            latColumn='lat', lonColumn='lon'
-        )
-        result = normalize_coordinates(mock_context, config, df.copy())
-
-        assert len(result) == 1
-        # 33°51'54"S ≈ -33.865
-        assert result['lat'].iloc[0] < 0
-        assert abs(result['lat'].iloc[0] - (-33.865)) < 0.001
-        # 151°12'36"E ≈ 151.21
-        assert result['lon'].iloc[0] > 0
-        assert abs(result['lon'].iloc[0] - 151.21) < 0.01
-
-    def test_normalize_coordinates_autodetect_numeric_vs_dms(self, mock_context):
-        """Test that numeric columns are coerced directly while string columns are parsed as DMS."""
-        # Numeric columns — should go through pd.to_numeric path
-        df_numeric = pd.DataFrame({
-            'lat': [45.123456, 46.0],
-            'lon': [9.123456, 10.0]
-        })
-        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
-        result_numeric = normalize_coordinates(mock_context, config, df_numeric.copy())
-
-        assert result_numeric['lat'].iloc[0] == 45.1235
-        assert len(result_numeric) == 2
-
-        # String DMS columns — should go through _parse_dms_to_decimal path
-        df_dms = pd.DataFrame({
-            'lat': ["40°26'46\"N"],
-            'lon': ["79°58'56\"W"]
-        })
-        result_dms = normalize_coordinates(mock_context, config, df_dms.copy())
-
-        assert len(result_dms) == 1
-        assert abs(result_dms['lat'].iloc[0] - 40.4461) < 0.001
-
-class TestAddGlobalAggregations:
-    """Tests for the add_global_aggregations operation."""
-
-    def test_add_global_aggregations_success(self, mock_context):
-        """Test a successful group by and aggregation."""
-        df = pd.DataFrame({
-            'category': ['A', 'A', 'B'],
-            'value': [10, 20, 100],
-            'ignored_str': ['x', 'y', 'z']
-        })
-        
-        config = AggregationConfiguration(
-            columns=['category'], 
-            operation='sum'
-        )
-        
-        result = add_global_aggregations(mock_context, config, df.copy())
-
-        assert len(result) == 2 
-        assert result.loc[result['category'] == 'A', 'value'].values[0] == 30
-        assert result.loc[result['category'] == 'B', 'value'].values[0] == 100
-        assert 'ignored_str' not in result.columns
-        mock_context.log.info.assert_called()
-
-    def test_add_global_aggregations_missing_column(self, mock_context):
-        """Test skipping a column that does not exist in the dataframe."""
-        df = pd.DataFrame({'value': [1, 2, 3]})
-        config = AggregationConfiguration(
-            columns=['missing_col'], 
-            operation='count'
-        )
-
-        result = add_global_aggregations(mock_context, config, df.copy())
-
-        mock_context.log.warning.assert_any_call("Column 'missing_col' not found, skipping aggregation.")
-        assert len(result) == 1
-
-    def test_add_global_aggregations_unsupported_op(self, mock_context):
-        """Test the warning when an unsupported operation is provided."""
-        df = pd.DataFrame({'category': ['A'], 'value': [1]})
-        
-        config = AggregationConfiguration(
-            columns=['category'], 
-            operation='unsupported' 
-        )
-        
-        with pytest.raises(Exception):
-            add_global_aggregations(mock_context, config, df.copy())
-            
-        mock_context.log.warning.assert_any_call("Unsupported aggregation 'unsupported'")
-
-    def test_add_global_aggregations_only_numeric_kept(self, mock_context):
-        """Verify that non-numeric and non-grouping columns are dropped."""
-        df = pd.DataFrame({
-            'group': ['A', 'A'],
-            'num': [1, 2],
-            'text': ['hello', 'world']
-        })
-        config = AggregationConfiguration(columns=['group'], operation='mean')
-
-        result = add_global_aggregations(mock_context, config, df.copy())
-
-        assert 'text' not in result.columns
-        assert 'num' in result.columns
-        assert 'group' in result.columns
diff --git a/tests/dataframe_level_anonymisation/__init__.py b/tests/dataframe_level_anonymisation/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/tests/dataframe_level_anonymisation/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/dataframe_level_anonymisation/config_models/__init__.py b/tests/dataframe_level_anonymisation/config_models/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/tests/dataframe_level_anonymisation/config_models/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/dataframe_level_anonymisation/config_models/test_base_config.py b/tests/dataframe_level_anonymisation/config_models/test_base_config.py
deleted file mode 100644
index 92e599b..0000000
--- a/tests/dataframe_level_anonymisation/config_models/test_base_config.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import pytest
-from pydantic import ValidationError
-
-from template_code_location.dataframe_level_anonymisation.config_models.base_config import BaseConfiguration
-
-
-def test_valid_configuration_with_overrides():
-    cfg = BaseConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        supp_level=10.0,
-        generalisation_hierarchies={"age": "age_hierarchy"},
-    )
-    assert cfg.ident == ["id"]
-    assert cfg.quasi_identifiers == ["age"]
-    assert cfg.supp_level == 10.0
-    assert cfg.generalisation_hierarchies == {"age": "age_hierarchy"}
-
-
-def test_default_values_are_loaded():
-    cfg = BaseConfiguration()
-    assert cfg.ident == ["Name"]
-    assert cfg.quasi_identifiers == ["Age"]
-    assert cfg.supp_level == 50.0
-    assert cfg.generalisation_hierarchies == {"Age": "simpl_age"}
-
-
-def test_missing_ident_raises_error():
-    with pytest.raises(ValidationError):
-        BaseConfiguration(
-            ident=[]
-        )
-
-
-def test_missing_quasi_ident_raises_error():
-    with pytest.raises(ValidationError):
-        BaseConfiguration(
-            quasi_identifiers=[]
-        )
-
-
-def test_overlap_between_ident_and_quasi_identifiers():
-    with pytest.raises(ValidationError):
-        BaseConfiguration(
-            ident=["age"],
-            quasi_identifiers=["age"]
-        )
-
-
-def test_supp_level_bounds():
-    with pytest.raises(ValidationError):
-        BaseConfiguration(
-            supp_level=150.0  # fuori range
-        )
diff --git a/tests/dataframe_level_anonymisation/config_models/test_hierarchies.py b/tests/dataframe_level_anonymisation/config_models/test_hierarchies.py
deleted file mode 100644
index c6994a9..0000000
--- a/tests/dataframe_level_anonymisation/config_models/test_hierarchies.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from template_code_location.dataframe_level_anonymisation.config_models.hierarchies import (
-    simpl_age,
-    simpl_age2,
-    simpl_gender,
-    get_all_hierarchies,
-)
-
-
-def test_simpl_age_structure():
-    assert isinstance(simpl_age, dict)
-    assert 0 in simpl_age
-    assert isinstance(simpl_age[0], list)
-    # verify first level contains 100 ages
-    assert len(simpl_age[0]) == 100
-    assert simpl_age[0][0] == 0
-    assert simpl_age[0][-1] == 99
-
-
-def test_simpl_age2_structure():
-    assert isinstance(simpl_age2, dict)
-    assert 0 in simpl_age2
-    assert 1 in simpl_age2
-    assert isinstance(simpl_age2[0], list)
-    assert isinstance(simpl_age2[1], list)
-
-
-def test_simpl_gender_structure():
-    assert isinstance(simpl_gender, dict)
-    assert 0 in simpl_gender
-    assert 1 in simpl_gender
-    assert simpl_gender[0] == ["M", "F", "O"]
-    assert simpl_gender[1] == ["*", "*", "*"]
-
-
-def test_get_all_hierarchies():
-    hier = get_all_hierarchies()
-
-    # the function should return dicts only
-    assert isinstance(hier, dict)
-
-    # ensure expected dicts are included
-    assert "simpl_age" in hier
-    assert "simpl_age2" in hier
-    assert "simpl_gender" in hier
-
-    # ensure the values returned are references to the actual dicts
-    assert hier["simpl_age"] is simpl_age
-    assert hier["simpl_gender"] is simpl_gender
diff --git a/tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py b/tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py
deleted file mode 100644
index ef6e2c8..0000000
--- a/tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import pytest
-from pydantic import ValidationError
-
-from template_code_location.dataframe_level_anonymisation.config_models.k_anonymity_configuration import (
-    KAnonymityConfiguration,
-)
-
-
-def test_valid_k_anonymity_config_with_overrides():
-    cfg = KAnonymityConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        supp_level=5.0,
-        generalisation_hierarchies={"age": "age_hier"},
-        k=3,
-        sensitive_attributes=["disease"],
-    )
-    assert cfg.k == 3
-    assert cfg.sensitive_attributes == ["disease"]
-    assert cfg.generalisation_hierarchies == {"age": "age_hier"}
-
-
-def test_default_values_are_loaded():
-    cfg = KAnonymityConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        generalisation_hierarchies={"age": "age_hier"}
-    )
-    assert cfg.k == 3
-    assert cfg.sensitive_attributes == ["Disease"]
-
-
-def test_invalid_k_value_raises_error():
-    with pytest.raises(ValidationError):
-        KAnonymityConfiguration(
-            ident=["id"],
-            quasi_identifiers=["age"],
-            generalisation_hierarchies={"age": "age_hier"},
-            k=1,  # invalid, must be >= 2
-            sensitive_attributes=["disease"],
-        )
diff --git a/tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py b/tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py
deleted file mode 100644
index c94db3e..0000000
--- a/tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import pytest
-from pydantic import ValidationError
-
-from template_code_location.dataframe_level_anonymisation.config_models.l_diversity_configuration import (
-    LDiversityConfiguration,
-)
-
-
-def test_valid_l_diversity_config_with_overrides():
-    cfg = LDiversityConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        supp_level=5.0,
-        generalisation_hierarchies={"age": "age_hier"},
-        k=3,
-        l=2,
-        sensitive_attribute="disease",
-    )
-    assert cfg.k == 3
-    assert cfg.l == 2
-    assert cfg.sensitive_attribute == "disease"
-
-
-def test_default_values_are_loaded():
-    cfg = LDiversityConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        generalisation_hierarchies={"age": "age_hier"}
-    )
-    assert cfg.k == 2
-    assert cfg.l == 3
-    assert cfg.sensitive_attribute == "Disease"
-
-
-def test_invalid_l_value_raises_error():
-    with pytest.raises(ValidationError):
-        LDiversityConfiguration(
-            ident=["id"],
-            quasi_identifiers=["age"],
-            generalisation_hierarchies={"age": "age_hier"},
-            k=3,
-            l=0,  # invalid, must be >= 1
-            sensitive_attribute="disease",
-        )
diff --git a/tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py b/tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py
deleted file mode 100644
index 615bd27..0000000
--- a/tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import pytest
-from pydantic import ValidationError
-
-from template_code_location.dataframe_level_anonymisation.config_models.t_closeness_configuration import (
-    TClosenessConfiguration,
-)
-
-
-def test_valid_t_closeness_config_with_overrides():
-    cfg = TClosenessConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        supp_level=5.0,
-        generalisation_hierarchies={"age": "age_hier"},
-        k=3,
-        t=0.4,
-        sensitive_attribute="disease",
-    )
-    assert cfg.k == 3
-    assert cfg.t == 0.4
-    assert cfg.sensitive_attribute == "disease"
-
-
-def test_default_values_are_loaded():
-    cfg = TClosenessConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        generalisation_hierarchies={"age": "age_hier"}
-    )
-    assert cfg.k == 2
-    assert cfg.t == 0.5
-    assert cfg.sensitive_attribute == "Disease"
-
-
-def test_invalid_t_value_low():
-    with pytest.raises(ValidationError):
-        TClosenessConfiguration(
-            ident=["id"],
-            quasi_identifiers=["age"],
-            generalisation_hierarchies={"age": "age_hier"},
-            k=3,
-            t=-0.1,  # invalid
-            sensitive_attribute="disease",
-        )
-
-
-def test_invalid_t_value_high():
-    with pytest.raises(ValidationError):
-        TClosenessConfiguration(
-            ident=["id"],
-            quasi_identifiers=["age"],
-            generalisation_hierarchies={"age": "age_hier"},
-            k=3,
-            t=2.0,  # invalid > 1
-            sensitive_attribute="disease",
-        )
diff --git a/tests/dataframe_level_anonymisation/test_jobs.py b/tests/dataframe_level_anonymisation/test_jobs.py
deleted file mode 100644
index f890e2d..0000000
--- a/tests/dataframe_level_anonymisation/test_jobs.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from template_code_location.dataframe_level_anonymisation.jobs import (
-    k_anonymity_job,
-    l_diversity_job,
-    t_closeness_job,
-    k_anonymity_job_s3,
-    l_diversity_job_s3,
-    t_closeness_job_s3
-)
-
-
-def test_k_anonymity_job_is_callable():
-    """Test k_anonymity_job is a valid Dagster job"""
-    assert callable(k_anonymity_job)
-    assert hasattr(k_anonymity_job, 'execute_in_process')
-
-
-def test_l_diversity_job_is_callable():
-    """Test l_diversity_job is a valid Dagster job"""
-    assert callable(l_diversity_job)
-    assert hasattr(l_diversity_job, 'execute_in_process')
-
-
-def test_t_closeness_job_is_callable():
-    """Test t_closeness_job is a valid Dagster job"""
-    assert callable(t_closeness_job)
-    assert hasattr(t_closeness_job, 'execute_in_process')
-
-
-def test_k_anonymity_job_s3_is_callable():
-    """Test k_anonymity_job_s3 is a valid Dagster job"""
-    assert callable(k_anonymity_job_s3)
-    assert hasattr(k_anonymity_job_s3, 'execute_in_process')
-
-
-def test_l_diversity_job_s3_is_callable():
-    """Test l_diversity_job_s3 is a valid Dagster job"""
-    assert callable(l_diversity_job_s3)
-    assert hasattr(l_diversity_job_s3, 'execute_in_process')
-
-
-def test_t_closeness_job_s3_is_callable():
-    """Test t_closeness_job_s3 is a valid Dagster job"""
-    assert callable(t_closeness_job_s3)
-    assert hasattr(t_closeness_job_s3, 'execute_in_process')
diff --git a/tests/dataframe_level_anonymisation/test_ops.py b/tests/dataframe_level_anonymisation/test_ops.py
deleted file mode 100644
index 90c01aa..0000000
--- a/tests/dataframe_level_anonymisation/test_ops.py
+++ /dev/null
@@ -1,230 +0,0 @@
-import pytest
-import pandas as pd
-from unittest.mock import patch
-from dagster import DagsterInvalidInvocationError, build_op_context
-
-from template_code_location.dataframe_level_anonymisation.ops import (
-    apply_k_anonymity,
-    apply_l_diversity,
-    apply_t_closeness,
-)
-from template_code_location.dataframe_level_anonymisation.config_models import (
-    KAnonymityConfiguration,
-    LDiversityConfiguration,
-    TClosenessConfiguration,
-)
-
-
-# ---------------------------
-# Fixtures
-# ---------------------------
-@pytest.fixture
-def fake_df():
-    return pd.DataFrame({"id": [1, 2], "age": [30, 40]})
-
-
-@pytest.fixture
-def k_config():
-    return KAnonymityConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        sensitive_attributes=["age"],
-        k=2,
-        supp_level=0.0,
-        generalisation_hierarchies={"age": "simpl_age"},
-    )
-
-
-@pytest.fixture
-def l_config():
-    return LDiversityConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        sensitive_attribute="age",
-        k=2,
-        l=1,
-        supp_level=0.0,
-        generalisation_hierarchies={"age": "simpl_age"},
-    )
-
-
-@pytest.fixture
-def t_config():
-    return TClosenessConfiguration(
-        ident=["id"],
-        quasi_identifiers=["age"],
-        sensitive_attribute="age",
-        k=2,
-        t=0.5,
-        supp_level=0.0,
-        generalisation_hierarchies={"age": "simpl_age"},
-    )
-
-
-@pytest.fixture
-def op_context():
-    return build_op_context()
-
-
-# ---------------------------
-# Helper for patching external functions
-# ---------------------------
-@pytest.fixture(autouse=True)
-def patch_external_ops():
-    with (
-        patch(
-            "dataframe_level_anonymisation.ops.get_all_hierarchies",
-            return_value={"simpl_age": {0: [30, 40]}},
-        ),
-        patch(
-            "dataframe_level_anonymisation.ops.k_anonymity",
-            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
-        ),
-        patch(
-            "dataframe_level_anonymisation.ops.l_diversity",
-            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
-        ),
-        patch(
-            "dataframe_level_anonymisation.ops.t_closeness",
-            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
-        ),
-    ):
-        yield
-
-
-# ---------------------------
-# Tests for apply_k_anonymity
-# ---------------------------
-def test_apply_k_anonymity_outputs(op_context, k_config, fake_df):
-    results = list(apply_k_anonymity(op_context, k_config, fake_df))
-    assert len(results) == 2
-
-    data_output = results[0].value
-    metrics_output = results[1].value
-
-    # Check types
-    assert isinstance(data_output, pd.DataFrame)
-    assert isinstance(metrics_output, dict)
-    assert "k_anon" in metrics_output
-    assert "l_div" in metrics_output
-    assert "t_clos" in metrics_output
-
-
-# ---------------------------
-# Tests for apply_l_diversity
-# ---------------------------
-def test_apply_l_diversity_outputs(op_context, l_config, fake_df):
-    results = list(apply_l_diversity(op_context, l_config, fake_df))
-    assert len(results) == 2
-
-    data_output = results[0].value
-    metrics_output = results[1].value
-
-    assert isinstance(data_output, pd.DataFrame)
-    assert isinstance(metrics_output, dict)
-    assert "k_anon" in metrics_output
-    assert "l_div" in metrics_output
-    assert "t_clos" in metrics_output
-
-
-def test_apply_l_diversity_empty_raises(op_context, l_config):
-    with patch("dataframe_level_anonymisation.ops.l_diversity", return_value=pd.DataFrame()):
-
-        with pytest.raises(DagsterInvalidInvocationError):
-            list(apply_l_diversity(op_context, l_config, pd.DataFrame({"id": [1], "age": [30]})))
-
-
-# ---------------------------
-# Tests for apply_t_closeness
-# ---------------------------
-def test_apply_t_closeness_outputs(op_context, t_config, fake_df):
-    results = list(apply_t_closeness(op_context, t_config, fake_df))
-    assert len(results) == 2
-
-    data_output = results[0].value
-    metrics_output = results[1].value
-
-    assert isinstance(data_output, pd.DataFrame)
-    assert isinstance(metrics_output, dict)
-    assert "k_anon" in metrics_output
-    assert "l_div" in metrics_output
-    assert "t_clos" in metrics_output
-
-
-def test_apply_t_closeness_empty_raises(op_context, t_config):
-    with patch("dataframe_level_anonymisation.ops.t_closeness", return_value=pd.DataFrame()):
-        with pytest.raises(DagsterInvalidInvocationError):
-            list(apply_t_closeness(op_context, t_config, pd.DataFrame({"id": [1], "age": [30]})))
-
-
-# ---------------------------
-# Additional tests for _validate_and_get_hierarchies
-# ---------------------------
-def test_validate_hierarchies_dataset_too_small(k_config):
-    small_df = pd.DataFrame({"id": [1], "age": [30]})
-    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
-
-    with pytest.raises(DagsterInvalidInvocationError):
-        _validate_and_get_hierarchies(k_config, small_df)
-
-
-def test_validate_hierarchies_missing_hierarchy(k_config, fake_df):
-    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
-
-    bad_config = k_config.model_copy(update={"generalisation_hierarchies": {}})
-
-    with pytest.raises(DagsterInvalidInvocationError):
-        _validate_and_get_hierarchies(bad_config, fake_df)
-
-
-def test_validate_hierarchies_hierarchy_not_in_code(k_config, fake_df):
-    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
-
-    with patch("dataframe_level_anonymisation.ops.get_all_hierarchies", return_value={}):
-        with pytest.raises(DagsterInvalidInvocationError):
-            _validate_and_get_hierarchies(k_config, fake_df)
-
-
-# ---------------------------
-# Additional tests for _calc_dataframe_metrics
-# ---------------------------
-def test_calc_dataframe_metrics_basic():
-    from template_code_location.dataframe_level_anonymisation.ops import _calc_dataframe_metrics
-
-    df_org = pd.DataFrame({"age": [30, 40], "id": [1, 2]})
-    df_anon = df_org.copy()
-
-    with (
-        patch("dataframe_level_anonymisation.ops.anonymity.k_anonymity", return_value=2),
-        patch("dataframe_level_anonymisation.ops.anonymity.l_diversity", return_value=1),
-        patch("dataframe_level_anonymisation.ops.anonymity.t_closeness", return_value=0.1),
-    ):
-
-        report, metrics = _calc_dataframe_metrics(df_anon, df_org, ["age"], ["age"])
-
-        assert "k-anonymity" in report
-        assert metrics["k_anon"] == 2
-        assert metrics["l_div"] == 1
-        assert metrics["t_clos"] == 0.1
-
-
-# ---------------------------
-# Tests for apply_t_closeness exception branches
-# ---------------------------
-def test_apply_t_closeness_value_error_quasi_identifiers(op_context, t_config, fake_df):
-    """Covers the branch where ValueError contains 'Cannot be quasi-identifiers'."""
-    with patch(
-        "dataframe_level_anonymisation.ops.t_closeness",
-        side_effect=ValueError("Cannot be quasi-identifiers invalid"),
-    ):
-        with pytest.raises(DagsterInvalidInvocationError):
-            list(apply_t_closeness(op_context, t_config, fake_df))
-
-
-def test_apply_t_closeness_value_error_other_message(op_context, t_config, fake_df):
-    """Covers the branch where ValueError is raised but message does NOT contain that substring."""
-    with patch(
-        "dataframe_level_anonymisation.ops.t_closeness", side_effect=ValueError("Some other error")
-    ):
-        with pytest.raises(DagsterInvalidInvocationError):
-            list(apply_t_closeness(op_context, t_config, fake_df))
diff --git a/tests/dataframe_level_anonymisation/test_utils.py b/tests/dataframe_level_anonymisation/test_utils.py
deleted file mode 100644
index 3fa1841..0000000
--- a/tests/dataframe_level_anonymisation/test_utils.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import numpy as np
-
-from template_code_location.dataframe_level_anonymisation.utils import (
-    parse_value_list,
-    normalize_hierarchy_levels,
-)
-
-
-# ------------------------------------
-# Tests for parse_value_list
-# ------------------------------------
-def test_parse_value_list_all_strings_digits():
-    values = ["1", "2", "3"]
-    assert parse_value_list(values) == [1, 2, 3]
-
-
-def test_parse_value_list_mixed_values():
-    values = ["1", 2, "abc", "5"]
-    assert parse_value_list(values) == [1, 2, "abc", 5]
-
-
-def test_parse_value_list_no_digits():
-    values = ["a", "b", "c"]
-    assert parse_value_list(values) == ["a", "b", "c"]
-
-
-# ------------------------------------
-# Tests for normalize_hierarchy_levels
-# ------------------------------------
-def test_normalize_hierarchy_levels_level_0_converted_to_numpy_array():
-    hierarchy = {"age": {"0": ["1", "2", "3"], "1": ["0-10", "11-20"]}}
-
-    normalized = normalize_hierarchy_levels(hierarchy)
-
-    assert "age" in normalized
-    assert 0 in normalized["age"]
-    assert isinstance(normalized["age"][0], np.ndarray)
-    assert normalized["age"][0].tolist() == [1, 2, 3]  # converted via parse_value_list
-    assert normalized["age"][1] == ["0-10", "11-20"]  # untouched
-
-
-def test_normalize_hierarchy_levels_multiple_columns():
-    hierarchy = {"age": {"0": ["10", "20"]}, "gender": {"0": ["M", "F"], "1": ["*"]}}
-
-    normalized = normalize_hierarchy_levels(hierarchy)
-
-    # First column
-    assert isinstance(normalized["age"][0], np.ndarray)
-    assert normalized["age"][0].tolist() == [10, 20]
-
-    # Second column
-    assert isinstance(normalized["gender"][0], np.ndarray)
-    assert normalized["gender"][0].tolist() == ["M", "F"]
-    assert normalized["gender"][1] == ["*"]
-
-
-def test_normalize_hierarchy_levels_mixed_digit_non_digit_at_level_0():
-    hierarchy = {"test": {"0": ["1", "x", "3"]}}
-
-    normalized = normalize_hierarchy_levels(hierarchy)
-
-    assert isinstance(normalized["test"][0], np.ndarray)
-    assert normalized["test"][0].tolist() == ["1", "x", "3"]
-
-
-def test_normalize_hierarchy_levels_empty_mapping():
-    hierarchy = {"col": {}}
-    normalized = normalize_hierarchy_levels(hierarchy)
-
-    assert normalized == {"col": {}}
diff --git a/tests/field_level_pseudo_anonymisation/__init__.py b/tests/field_level_pseudo_anonymisation/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/tests/field_level_pseudo_anonymisation/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tests/field_level_pseudo_anonymisation/conftest.py b/tests/field_level_pseudo_anonymisation/conftest.py
deleted file mode 100644
index ee54069..0000000
--- a/tests/field_level_pseudo_anonymisation/conftest.py
+++ /dev/null
@@ -1,444 +0,0 @@
-"""
-Shared pytest fixtures and helpers for field-level pseudonymisation tests.
-
-This module provides:
-- Mock Vault client for testing without real Vault connections
-- Sample data fixtures
-- Configuration fixtures for encryption/decryption operations
-- Helper functions for running ops and managing test Vault storage
-"""
-
-import pandas as pd
-import pytest
-from dagster import build_op_context
-from cryptography.fernet import Fernet
-from hvac.exceptions import InvalidPath, Forbidden
-from unittest.mock import patch, MagicMock
-
-from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
-    AnonymisePseudonymizeStructuredConfig,
-    DepseudonymizeStructuredConfig,
-    EncryptConfig,
-    DecryptConfig,
-    PseudoTechniqueConfig,
-    DepseudoTechniqueConfig,
-)
-from template_code_location.field_level_pseudo_anonymisation.ops import (
-    anonymize_pseudonymize_structured,
-    depseudonymize_structured,
-)
-
-
-# -------------------------------- Mock Vault Storage ----------------------------------------
-
-# In-memory Vault simulation for tests
-_test_vault_storage = {}
-_test_vault_access_control = {}  # For simulating access control
-
-
-@pytest.fixture(autouse=True)
-def mock_vault_client():
-    """
-    Auto-use fixture that mocks the hvac.Client to avoid real Vault connections.
-    Uses an in-memory dict to simulate Vault storage for tests.
-    Includes access control simulation for AC3.
-    """
-    global _test_vault_storage, _test_vault_access_control
-    _test_vault_storage = {}  # Reset storage before each test
-    _test_vault_access_control = {}  # Reset access control
-
-    def mock_read_secret(path, mount_point):
-        """Mock reading secret from Vault with access control"""
-        full_path = f"{mount_point}/{path}"
-
-        # Check access control first
-        if full_path in _test_vault_access_control:
-            if not _test_vault_access_control[full_path]:
-                raise Forbidden(f"Access denied to secret: {full_path}")
-
-        if full_path not in _test_vault_storage:
-            raise InvalidPath(f"Secret not found: {full_path}")
-        return {"data": {"data": {"value": _test_vault_storage[full_path]}}}
-
-    def mock_create_or_update_secret(path, mount_point, secret):
-        """Mock creating/updating secret in Vault"""
-        full_path = f"{mount_point}/{path}"
-        _test_vault_storage[full_path] = secret["value"]
-
-    def mock_delete_metadata(path, mount_point):
-        """Mock deleting secret from Vault"""
-        full_path = f"{mount_point}/{path}"
-        if full_path in _test_vault_storage:
-            del _test_vault_storage[full_path]
-        if full_path in _test_vault_access_control:
-            del _test_vault_access_control[full_path]
-
-    with patch("hvac.Client") as mock_client_class:
-        mock_instance = MagicMock()
-        mock_instance.secrets.kv.v2.read_secret_version.side_effect = mock_read_secret
-        mock_instance.secrets.kv.v2.create_or_update_secret.side_effect = (
-            mock_create_or_update_secret
-        )
-        mock_instance.secrets.kv.v2.delete_metadata_and_all_versions.side_effect = (
-            mock_delete_metadata
-        )
-        mock_client_class.return_value = mock_instance
-        yield mock_instance
-
-
-# -------------------------------- Sample Data Fixtures ----------------------------------------
-
-
-@pytest.fixture
-def sample_df():
-    """
-    Fixture providing a sample structured dataset with PII data.
-    Represents typical data that requires pseudonymisation and restoration.
-    """
-    return pd.DataFrame(
-        {
-            "id": [1, 2, 3, 4, 5],
-            "name": [
-                "Alice Smith",
-                "Bob Jones",
-                "Charlie Brown",
-                "David Wilson",
-                "Eva Garcia",
-            ],
-            "email": [
-                "alice@example.com",
-                "bob@example.com",
-                "charlie@example.com",
-                "david@example.com",
-                "eva@example.com",
-            ],
-            "ssn": [
-                "123-45-6789",
-                "234-56-7890",
-                "345-67-8901",
-                "456-78-9012",
-                "567-89-0123",
-            ],
-            "age": [25, 30, 35, 40, 45],
-            "salary": [50000.0, 60000.0, 70000.0, 80000.0, 90000.0],
-            "department": ["HR", "IT", "Finance", "IT", "HR"],
-        }
-    )
-
-
-# -------------------------------- Configuration Fixtures ----------------------------------------
-
-
-@pytest.fixture
-def encrypt_config_single_field():
-    """
-    Configuration for encrypting a single field (email).
-    Used to create pseudonymised data for restoration tests.
-    """
-    return AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    columns=["email"],
-                    key_name="test_restoration_key_single",
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def decrypt_config_single_field():
-    """
-    Configuration for decrypting a single field (email).
-    Used to restore original values.
-    """
-    return DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],
-                    key_name="test_restoration_key_single",
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def encrypt_config_multiple_fields():
-    """
-    Configuration for encrypting multiple fields (name, email, ssn).
-    Tests restoration of multiple sensitive fields.
-    """
-    return AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    columns=["name", "email", "ssn"],
-                    key_name="test_restoration_key_multi",
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def decrypt_config_multiple_fields():
-    """
-    Configuration for decrypting multiple fields (name, email, ssn).
-    """
-    return DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["name", "email", "ssn"],
-                    key_name="test_restoration_key_multi",
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def encrypt_config_partial_fields():
-    """
-    Configuration for encrypting only some fields (email, ssn).
-    Tests partial restoration scenarios.
-    """
-    return AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    columns=["email", "ssn"],
-                    key_name="test_restoration_key_partial",
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def decrypt_config_partial_fields():
-    """
-    Configuration for decrypting only some fields (email, ssn).
-    """
-    return DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email", "ssn"],
-                    key_name="test_restoration_key_partial",
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def authorized_multi_key_scenario():
-    """
-    Fixture for testing multi-key authorization scenarios.
-    Sets up two keys: one authorized, one denied.
-    """
-    clear_vault_key("authorized_key")
-    clear_vault_key("unauthorized_key")
-
-    # Create authorized key by generating it
-    authorized_key = Fernet.generate_key().decode()
-    set_vault_key("authorized_key", authorized_key)
-
-    # Create unauthorized key and deny access
-    unauthorized_key = Fernet.generate_key().decode()
-    set_vault_key("unauthorized_key", unauthorized_key)
-    deny_vault_access("unauthorized_key")
-
-    yield {"authorized": "authorized_key", "unauthorized": "unauthorized_key"}
-
-    # Cleanup
-    clear_vault_key("authorized_key")
-    clear_vault_key("unauthorized_key")
-
-
-@pytest.fixture
-def large_dataset():
-    """
-    Fixture providing a large dataset (10,000 rows) for performance testing.
-    Reusable across multiple performance tests.
-    """
-    return pd.DataFrame(
-        {
-            "id": range(1, 10001),
-            "email": [f"user{i}@example.com" for i in range(1, 10001)],
-            "name": [f"User {i}" for i in range(1, 10001)],
-            "ssn": [f"{i:03d}-{i:02d}-{i:04d}" for i in range(1, 10001)],
-            "age": [20 + (i % 50) for i in range(1, 10001)],
-            "salary": [30000.0 + (i * 10) for i in range(1, 10001)],
-            "department": [["HR", "IT", "Finance", "Sales"][i % 4] for i in range(1, 10001)],
-        }
-    )
-
-
-@pytest.fixture(scope="session")
-def vault_test_keys():
-    """
-    Session-scoped fixture to pre-generate test keys for faster test execution.
-    Avoids repeated key generation in each test.
-    """
-    keys = {f"test_key_{i}": Fernet.generate_key().decode() for i in range(10)}
-
-    return keys
-
-
-@pytest.fixture
-def cleanup_test_keys(request):
-    """
-    Fixture to automatically cleanup test keys after each test.
-    Use with: @pytest.mark.usefixtures("cleanup_test_keys")
-    """
-    yield
-
-    # Cleanup all test keys from mock Vault
-    test_keys = [k for k in _test_vault_storage.keys() if "test_" in k]
-    for key in test_keys:
-        _test_vault_storage.pop(key, None)
-
-
-# -------------------------------- Helper Functions ----------------------------------------
-
-
-def config_to_dagster_dict(config):
-    """
-    Convert Pydantic config to Dagster-compatible dictionary.
-
-    For AnonymisePseudonymizeStructuredConfig (uses discriminated Union):
-        Pydantic v2 outputs: {'technique': {'type': 'encrypt', 'columns': [...], 'key_name': '...'}}
-        Dagster expects: {'technique': {'encrypt': {'columns': [...], 'key_name': '...'}}}
-
-    For DepseudonymizeStructuredConfig (direct DecryptConfig, no Union):
-        Pydantic v2 outputs:
-        {'technique': {'type': 'decrypt', 'columns': [...], 'key_name': '...'}}
-        Dagster expects: Same flat structure with 'type' field
-
-    Args:
-        config: Pydantic config instance
-            (AnonymisePseudonymizeStructuredConfig or
-            DepseudonymizeStructuredConfig)
-
-    Returns:
-        dict: Dagster-compatible configuration dictionary
-    """
-    from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
-        AnonymisePseudonymizeStructuredConfig,
-    )
-
-    config_dict = config.model_dump()
-
-    # Only convert discriminated unions for AnonymisePseudonymizeStructuredConfig
-    # DepseudonymizeStructuredConfig uses direct DecryptConfig (no discriminated union)
-    if isinstance(config, AnonymisePseudonymizeStructuredConfig):
-        if "used_function" in config_dict:
-            for func_config in config_dict["used_function"]:
-                if "technique" in func_config:
-                    technique = func_config["technique"]
-                    # Pydantic outputs flat dict with 'type' field for discriminated unions
-                    if isinstance(technique, dict) and "type" in technique:
-                        # Extract the type discriminator
-                        technique_type = technique["type"]
-                        # Create nested structure without the 'type' field
-                        technique_data = {k: v for k, v in technique.items() if k != "type"}
-                        # Nest under the discriminator key for Dagster
-                        func_config["technique"] = {technique_type: technique_data}
-
-    return config_dict
-
-
-def run_encrypt_op(config, df):
-    """
-    Helper function to execute the anonymize_pseudonymize_structured op.
-
-    Args:
-        config: AnonymisePseudonymizeStructuredConfig instance
-        df: Input pandas DataFrame
-
-    Returns:
-        tuple: (result_df, metrics) - Output DataFrame and metrics dict
-    """
-    context = build_op_context(op_config=config_to_dagster_dict(config))
-    result_df, metrics = anonymize_pseudonymize_structured(context, df=df)
-    return result_df.value, metrics.value
-
-
-def run_decrypt_op(config, df):
-    """
-    Helper function to execute the depseudonymize_structured op.
-
-    Args:
-        config: DepseudonymizeStructuredConfig instance
-        df: Input pandas DataFrame
-
-    Returns:
-        tuple: (result_df, metrics) - Output DataFrame and metrics dict
-    """
-    context = build_op_context(op_config=config_to_dagster_dict(config))
-    result_df, metrics = depseudonymize_structured(context, df=df)
-    return result_df.value, metrics.value
-
-
-def clear_vault_key(key_name: str):
-    """
-    Helper function to clear a key from the simulated Vault storage for test isolation.
-
-    Args:
-        key_name: Name of the key to delete from Vault
-    """
-    full_path = f"secret/PseudonymKeys/{key_name}"
-    if full_path in _test_vault_storage:
-        del _test_vault_storage[full_path]
-    if full_path in _test_vault_access_control:
-        del _test_vault_access_control[full_path]
-
-
-def set_vault_key(key_name: str, key_value: str):
-    """
-    Helper function to set a key in the simulated Vault storage.
-
-    Args:
-        key_name: Name of the key
-        key_value: Value of the key (Fernet key as string)
-    """
-    full_path = f"secret/PseudonymKeys/{key_name}"
-    _test_vault_storage[full_path] = key_value
-
-
-def deny_vault_access(key_name: str):
-    """
-    Helper function to deny access to a key for authorization testing (AC3).
-
-    Args:
-        key_name: Name of the key to deny access to
-    """
-    full_path = f"secret/PseudonymKeys/{key_name}"
-    _test_vault_access_control[full_path] = False
-
-
-def get_vault_key(key_name: str) -> bytes:
-    """
-    Helper function to retrieve a key from the simulated Vault storage.
-
-    Args:
-        key_name: Name of the key to retrieve
-
-    Returns:
-        bytes: The encryption key
-    """
-    full_path = f"secret/PseudonymKeys/{key_name}"
-    if full_path not in _test_vault_storage:
-        raise InvalidPath(f"Key not found: {key_name}")
-    return _test_vault_storage[full_path].encode()
diff --git a/tests/field_level_pseudo_anonymisation/test_config_models_coverage.py b/tests/field_level_pseudo_anonymisation/test_config_models_coverage.py
deleted file mode 100644
index 010b9a6..0000000
--- a/tests/field_level_pseudo_anonymisation/test_config_models_coverage.py
+++ /dev/null
@@ -1,633 +0,0 @@
-import pytest
-from pydantic import ValidationError
-
-from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
-    AnonymisePseudonymizeStructuredConfig,
-    DepseudonymizeStructuredConfig,
-    PseudoTechniqueConfig,
-    DepseudoTechniqueConfig,
-    HashConfig,
-    EncryptConfig,
-    RedactConfig,
-    ReplaceConfig,
-    DecryptConfig,
-)
-from template_code_location.field_level_pseudo_anonymisation.config_models.unstructured_config import (
-    AnonymisePseudonymizeUnstructuredConfig,
-    DepseudonymizeUnstructuredConfig,
-    PseudoTechniqueConfig as UnstructuredPseudoTechniqueConfig,
-    DepseudoTechniqueConfig as UnstructuredDepseudoTechniqueConfig,
-    HashConfig as UnstructuredHashConfig,
-    EncryptConfig as UnstructuredEncryptConfig,
-    RedactConfig as UnstructuredRedactConfig,
-    ReplaceConfig as UnstructuredReplaceConfig,
-    RetainConfig,
-    DecryptConfig as UnstructuredDecryptConfig,
-)
-from template_code_location.field_level_pseudo_anonymisation.config_models.languages import LanguageEnum
-from template_code_location.field_level_pseudo_anonymisation.config_models.pii_entities import PIIEntityEnum
-
-
-# ==================== Structured Config Tests ====================
-
-class TestStructuredConfigValidators:
-    """Tests for structured_config.py validators and validators."""
-    
-    def test_ensure_unique_columns_valid_single_technique(self):
-        """Test that single technique with single column passes validation."""
-        config = AnonymisePseudonymizeStructuredConfig(
-            used_function=[
-                PseudoTechniqueConfig(
-                    technique=EncryptConfig(
-                        columns=["email"],
-                        key_name="key1"
-                    )
-                )
-            ]
-        )
-        assert config is not None
-        assert len(config.used_function) == 1
-    
-    def test_ensure_unique_columns_valid_multiple_techniques_different_columns(self):
-        """Test that multiple techniques with different columns passes validation."""
-        config = AnonymisePseudonymizeStructuredConfig(
-            used_function=[
-                PseudoTechniqueConfig(
-                    technique=EncryptConfig(
-                        columns=["email"],
-                        key_name="key1"
-                    )
-                ),
-                PseudoTechniqueConfig(
-                    technique=HashConfig(
-                        columns=["ssn"],
-                        algorithm="sha256"
-                    )
-                )
-            ]
-        )
-        assert config is not None
-        assert len(config.used_function) == 2
-    
-    def test_ensure_unique_columns_duplicate_columns_same_technique(self):
-        """Test that duplicate columns in different techniques raises error."""
-        with pytest.raises(ValueError) as exc_info:
-            AnonymisePseudonymizeStructuredConfig(
-                used_function=[
-                    PseudoTechniqueConfig(
-                        technique=EncryptConfig(
-                            columns=["email"],
-                            key_name="key1"
-                        )
-                    ),
-                    PseudoTechniqueConfig(
-                        technique=HashConfig(
-                            columns=["email"],
-                            algorithm="sha256"
-                        )
-                    )
-                ]
-            )
-        assert "Duplicate column" in str(exc_info.value)
-        assert "email" in str(exc_info.value)
-    
-    def test_ensure_unique_columns_multiple_duplicates(self):
-        """Test error message with multiple duplicate columns."""
-        with pytest.raises(ValueError) as exc_info:
-            AnonymisePseudonymizeStructuredConfig(
-                used_function=[
-                    PseudoTechniqueConfig(
-                        technique=EncryptConfig(
-                            columns=["email", "phone"],
-                            key_name="key1"
-                        )
-                    ),
-                    PseudoTechniqueConfig(
-                        technique=HashConfig(
-                            columns=["email", "phone"],
-                            algorithm="sha256"
-                        )
-                    )
-                ]
-            )
-        error_msg = str(exc_info.value)
-        assert "Duplicate column" in error_msg
-        assert "email" in error_msg
-        assert "phone" in error_msg
-    
-    def test_collect_column_to_techniques_single_technique(self):
-        """Test _collect_column_to_techniques with single technique."""
-        config = AnonymisePseudonymizeStructuredConfig(
-            used_function=[
-                PseudoTechniqueConfig(
-                    technique=EncryptConfig(
-                        columns=["email", "phone"],
-                        key_name="key1"
-                    )
-                )
-            ]
-        )
-        mapping = config._collect_column_to_techniques()
-        assert mapping == {
-            "email": ["encrypt"],
-            "phone": ["encrypt"]
-        }
-    
-    def test_extract_technique_and_columns_dict_with_type_field(self):
-        """Test _extract_technique_and_columns with dict containing 'type' field."""
-        config = AnonymisePseudonymizeStructuredConfig()
-        technique_type, columns = config._extract_technique_and_columns(
-            {
-                "technique": {
-                    "type": "encrypt",
-                    "columns": ["email", "ssn"],
-                    "key_name": "test_key"
-                }
-            }
-        )
-        assert technique_type == "encrypt"
-        assert columns == ["email", "ssn"]
-    
-    def test_extract_technique_and_columns_dict_with_variant_mapping(self):
-        """Test _extract_technique_and_columns with variant-key mapping {'hash': {...}}."""
-        config = AnonymisePseudonymizeStructuredConfig()
-        technique_type, columns = config._extract_technique_and_columns(
-            {
-                "technique": {
-                    "encrypt": {
-                        "columns": ["ssn"],
-                        "key_name": "test_key"
-                    }
-                }
-            }
-        )
-        assert technique_type == "encrypt"
-        assert columns == ["ssn"]
-    
-    def test_extract_technique_and_columns_model_instance(self):
-        """Test _extract_technique_and_columns with PseudoTechniqueConfig model instance."""
-        pseudo_config = PseudoTechniqueConfig(
-            technique=RedactConfig(columns=["address"])
-        )
-        config = AnonymisePseudonymizeStructuredConfig()
-        technique_type, columns = config._extract_technique_and_columns(pseudo_config)
-        assert technique_type == "redact"
-        assert columns == ["address"]
-    
-    def test_extract_technique_and_columns_empty_dict(self):
-        """Test _extract_technique_and_columns with empty dict."""
-        config = AnonymisePseudonymizeStructuredConfig()
-        technique_type, columns = config._extract_technique_and_columns(
-            {"technique": {}}
-        )
-        assert technique_type is None
-        assert columns == []
-    
-    def test_extract_technique_and_columns_none_technique(self):
-        """Test _extract_technique_and_columns with None technique."""
-        config = AnonymisePseudonymizeStructuredConfig()
-        technique_type, columns = config._extract_technique_and_columns(
-            {"technique": None}
-        )
-        assert technique_type is None
-        assert columns == []
-    
-    def test_extract_technique_and_columns_missing_columns_key(self):
-        """Test _extract_technique_and_columns when 'columns' key is missing."""
-        config = AnonymisePseudonymizeStructuredConfig()
-        technique_type, columns = config._extract_technique_and_columns(
-            {
-                "technique": {
-                    "type": "encrypt",
-                    "key_name": "test_key"
-                }
-            }
-        )
-        assert technique_type == "encrypt"
-        assert columns == []
-    
-    def test_extract_technique_and_columns_model_without_columns_attr(self):
-        """Test _extract_technique_and_columns with model instance missing columns attribute."""
-        pseudo_config = PseudoTechniqueConfig(
-            technique=ReplaceConfig(columns=["old_value"], new_value="NEW")
-        )
-        config = AnonymisePseudonymizeStructuredConfig()
-        technique_type, columns = config._extract_technique_and_columns(pseudo_config)
-        assert technique_type == "replace"
-        assert columns == ["old_value"]
-
-
-class TestStructuredDepseudonymizeConfig:
-    """Tests for DepseudonymizeStructuredConfig."""
-    
-    def test_depseudonymize_config_normalize_used_function_with_dict(self):
-        """Test _normalize_depseudo_used_function with dict input."""
-        config = DepseudonymizeStructuredConfig(
-            used_function=[
-                {
-                    "technique": {
-                        "type": "decrypt",
-                        "columns": ["email"],
-                        "key_name": "key1"
-                    }
-                }
-            ]
-        )
-        assert len(config.used_function) == 1
-        assert isinstance(config.used_function[0], DepseudoTechniqueConfig)
-        assert config.used_function[0].technique.type == "decrypt"
-    
-    def test_depseudonymize_config_normalize_used_function_with_model(self):
-        """Test _normalize_depseudo_used_function with model instance."""
-        depseudo_tech = DepseudoTechniqueConfig(
-            technique=DecryptConfig(
-                columns=["email"],
-                key_name="key1"
-            )
-        )
-        config = DepseudonymizeStructuredConfig(
-            used_function=[depseudo_tech]
-        )
-        assert len(config.used_function) == 1
-        assert config.used_function[0] is depseudo_tech
-    
-    def test_depseudonymize_config_ensure_unique_columns_no_op(self):
-        """Test that ensure_unique_columns is a no-op for depseudonymize."""
-        # For depseudonymize, there's no per-column uniqueness constraint
-        config = DepseudonymizeStructuredConfig(
-            used_function=[
-                DepseudoTechniqueConfig(
-                    technique=DecryptConfig(
-                        columns=["email"],
-                        key_name="key1"
-                    )
-                ),
-                DepseudoTechniqueConfig(
-                    technique=DecryptConfig(
-                        columns=["email"],
-                        key_name="key2"
-                    )
-                )
-            ]
-        )
-        # Should not raise - no-op validator
-        assert config is not None
-
-
-# ==================== Unstructured Config Tests ====================
-
-class TestUnstructuredConfigValidators:
-    """Tests for unstructured_config.py validators."""
-    
-    def test_normalize_used_function_with_dict(self):
-        """Test _normalize_used_function with dict input."""
-        config = AnonymisePseudonymizeUnstructuredConfig(
-            language=LanguageEnum.en,
-            used_function=[
-                {
-                    "technique": {
-                        "encrypt": {
-                            "pii": [PIIEntityEnum.EMAIL.value],
-                            "key_name": "key1"
-                        }
-                    }
-                }
-            ]
-        )
-        assert len(config.used_function) == 1
-    
-    def test_normalize_used_function_with_model(self):
-        """Test _normalize_used_function with model instance."""
-        pseudo_tech = UnstructuredPseudoTechniqueConfig(
-            technique=UnstructuredEncryptConfig(
-                pii=[PIIEntityEnum.EMAIL.value],
-                key_name="key1"
-            )
-        )
-        config = AnonymisePseudonymizeUnstructuredConfig(
-            language=LanguageEnum.en,
-            used_function=[pseudo_tech]
-        )
-        assert len(config.used_function) == 1
-    
-    def test_ensure_unique_pii_valid_different_pii_types(self):
-        """Test that different PII types pass validation."""
-        config = AnonymisePseudonymizeUnstructuredConfig(
-            language=LanguageEnum.en,
-            used_function=[
-                UnstructuredPseudoTechniqueConfig(
-                    technique=UnstructuredEncryptConfig(
-                        pii=[PIIEntityEnum.EMAIL.value],
-                        key_name="key1"
-                    )
-                ),
-                UnstructuredPseudoTechniqueConfig(
-                    technique=UnstructuredHashConfig(
-                        pii=[PIIEntityEnum.PERSON.value],
-                        algorithm="sha256"
-                    )
-                )
-            ]
-        )
-        assert config is not None
-        assert len(config.used_function) == 2
-    
-    def test_ensure_unique_pii_duplicate_pii_types(self):
-        """Test that duplicate PII types raise error."""
-        with pytest.raises(ValueError) as exc_info:
-            AnonymisePseudonymizeUnstructuredConfig(
-                language=LanguageEnum.en,
-                used_function=[
-                    UnstructuredPseudoTechniqueConfig(
-                        technique=UnstructuredEncryptConfig(
-                            pii=[PIIEntityEnum.EMAIL.value],
-                            key_name="key1"
-                        )
-                    ),
-                    UnstructuredPseudoTechniqueConfig(
-                        technique=UnstructuredHashConfig(
-                            pii=[PIIEntityEnum.EMAIL.value],
-                            algorithm="sha256"
-                        )
-                    )
-                ]
-            )
-        assert "Duplicate PII" in str(exc_info.value)
-        # Error message shows PIIEntityEnum.EMAIL (the enum repr) rather than the value
-        assert "EMAIL" in str(exc_info.value)
-    
-    def test_collect_pii_to_techniques_single_technique(self):
-        """Test _collect_pii_to_techniques with single technique."""
-        config = AnonymisePseudonymizeUnstructuredConfig(
-            language=LanguageEnum.en,
-            used_function=[
-                UnstructuredPseudoTechniqueConfig(
-                    technique=UnstructuredEncryptConfig(
-                        pii=[PIIEntityEnum.EMAIL.value, PIIEntityEnum.PERSON.value],
-                        key_name="key1"
-                    )
-                )
-            ]
-        )
-        mapping = config._collect_pii_to_techniques()
-        assert mapping == {
-            PIIEntityEnum.EMAIL.value: ["encrypt"],
-            PIIEntityEnum.PERSON.value: ["encrypt"]
-        }
-    
-    def test_extract_technique_and_pii_dict_with_type_field(self):
-        """Test _extract_technique_and_pii with dict containing 'type' field."""
-        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
-        technique_type, piis = config._extract_technique_and_pii(
-            {
-                "technique": {
-                    "type": "encrypt",
-                    "pii": [PIIEntityEnum.EMAIL.value],
-                    "key_name": "test_key"
-                }
-            }
-        )
-        assert technique_type == "encrypt"
-        assert piis == [PIIEntityEnum.EMAIL.value]
-    
-    def test_extract_technique_and_pii_dict_with_variant_mapping(self):
-        """Test _extract_technique_and_pii with variant-key mapping."""
-        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
-        technique_type, piis = config._extract_technique_and_pii(
-            {
-                "technique": {
-                    "hash": {
-                        "pii": [PIIEntityEnum.PERSON.value],
-                        "algorithm": "sha256"
-                    }
-                }
-            }
-        )
-        assert technique_type == "hash"
-        assert piis == [PIIEntityEnum.PERSON.value]
-    
-    def test_extract_technique_and_pii_dict_fallback_to_columns(self):
-        """Test _extract_technique_and_pii fallback to 'columns' key when 'pii' is missing."""
-        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
-        technique_type, piis = config._extract_technique_and_pii(
-            {
-                "technique": {
-                    "type": "redact",
-                    "columns": ["fallback_col"]
-                }
-            }
-        )
-        assert technique_type == "redact"
-        assert piis == ["fallback_col"]
-    
-    def test_extract_technique_and_pii_model_instance(self):
-        """Test _extract_technique_and_pii with model instance."""
-        pseudo_tech = UnstructuredPseudoTechniqueConfig(
-            technique=UnstructuredRedactConfig(
-                pii=[PIIEntityEnum.EMAIL.value]
-            )
-        )
-        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
-        technique_type, piis = config._extract_technique_and_pii(pseudo_tech)
-        assert technique_type == "redact"
-        assert piis == [PIIEntityEnum.EMAIL.value]
-    
-    def test_extract_technique_and_pii_model_with_getattr_fallback(self):
-        """Test _extract_technique_and_pii model with getattr fallback to columns."""
-        # Create a mock-like scenario where pii attribute doesn't exist
-        pseudo_tech = UnstructuredPseudoTechniqueConfig(
-            technique=RetainConfig(pii=[PIIEntityEnum.PERSON.value])
-        )
-        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
-        technique_type, piis = config._extract_technique_and_pii(pseudo_tech)
-        assert technique_type == "retain"
-        assert piis == [PIIEntityEnum.PERSON.value]
-    
-    def test_extract_technique_and_pii_empty_dict(self):
-        """Test _extract_technique_and_pii with empty dict."""
-        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
-        technique_type, piis = config._extract_technique_and_pii(
-            {"technique": {}}
-        )
-        assert technique_type is None
-        assert piis == []
-    
-    def test_extract_technique_and_pii_missing_pii_key(self):
-        """Test _extract_technique_and_pii when 'pii' key is missing."""
-        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
-        technique_type, piis = config._extract_technique_and_pii(
-            {
-                "technique": {
-                    "type": "encrypt",
-                    "key_name": "test_key"
-                }
-            }
-        )
-        assert technique_type == "encrypt"
-        assert piis == []
-
-
-class TestUnstructuredDepseudonymizeConfig:
-    """Tests for DepseudonymizeUnstructuredConfig."""
-    
-    def test_depseudonymize_unstructured_config_default(self):
-        """Test default DepseudonymizeUnstructuredConfig."""
-        config = DepseudonymizeUnstructuredConfig()
-        assert config is not None
-        assert len(config.used_function) >= 1
-    
-    def test_depseudonymize_unstructured_config_with_custom_function(self):
-        """Test DepseudonymizeUnstructuredConfig with custom function."""
-        config = DepseudonymizeUnstructuredConfig(
-            used_function=[
-                UnstructuredDepseudoTechniqueConfig(
-                    technique=UnstructuredDecryptConfig(
-                        key_name="custom_key"
-                    )
-                )
-            ]
-        )
-        assert len(config.used_function) == 1
-        assert config.used_function[0].technique.key_name == "custom_key"
-
-
-class TestLanguageSupport:
-    """Tests for language configuration support."""
-    
-    def test_all_supported_languages(self):
-        """Test that all supported languages can be set."""
-        supported_languages = [
-            LanguageEnum.hr, LanguageEnum.da, LanguageEnum.nl, LanguageEnum.en,
-            LanguageEnum.fi, LanguageEnum.fr, LanguageEnum.de, LanguageEnum.el,
-            LanguageEnum.it, LanguageEnum.lt, LanguageEnum.pl, LanguageEnum.pt,
-            LanguageEnum.ro, LanguageEnum.sl, LanguageEnum.es, LanguageEnum.sv
-        ]
-        
-        for lang in supported_languages:
-            config = AnonymisePseudonymizeUnstructuredConfig(language=lang)
-            assert config.language == lang
-    
-    def test_default_language_is_english(self):
-        """Test that default language is English."""
-        config = AnonymisePseudonymizeUnstructuredConfig()
-        assert config.language == LanguageEnum.en
-
-
-class TestTechniqueConfigDefaults:
-    """Tests for technique config defaults."""
-    
-    def test_hash_config_default_algorithm(self):
-        """Test HashConfig default algorithm."""
-        config = HashConfig()
-        assert config.algorithm == "sha256"
-        assert config.type == "hash"
-    
-    def test_encrypt_config_defaults(self):
-        """Test EncryptConfig defaults."""
-        config = EncryptConfig()
-        assert config.type == "encrypt"
-        assert config.key_name == "my_key"
-    
-    def test_redact_config_defaults(self):
-        """Test RedactConfig defaults."""
-        config = RedactConfig()
-        assert config.type == "redact"
-    
-    def test_replace_config_defaults(self):
-        """Test ReplaceConfig defaults."""
-        config = ReplaceConfig()
-        assert config.type == "replace"
-        assert config.new_value == "REPLACED"
-    
-    def test_decrypt_config_defaults(self):
-        """Test DecryptConfig defaults."""
-        config = DecryptConfig()
-        assert config.type == "decrypt"
-        assert config.key_name == "my_key"
-    
-    def test_unstructured_retain_config_defaults(self):
-        """Test RetainConfig defaults."""
-        config = RetainConfig()
-        assert config.type == "retain"
-
-
-class TestPseudoTechniqueConfigDefaults:
-    """Tests for PseudoTechniqueConfig defaults."""
-    
-    def test_pseudo_technique_default_to_hash(self):
-        """Test PseudoTechniqueConfig defaults to hash technique."""
-        config = PseudoTechniqueConfig()
-        # For Dagster Config, technique may be a dict with the discriminator structure
-        if isinstance(config.technique, dict):
-            # Check if it has hash configuration
-            assert "hash" in config.technique or config.technique.get("type") == "hash"
-        else:
-            assert config.technique.type == "hash"
-    
-    def test_unstructured_pseudo_technique_default_to_hash(self):
-        """Test UnstructuredPseudoTechniqueConfig defaults to hash technique."""
-        config = UnstructuredPseudoTechniqueConfig()
-        # For Dagster Config, technique may be a dict with the discriminator structure
-        if isinstance(config.technique, dict):
-            # Check if it has hash configuration
-            assert "hash" in config.technique or config.technique.get("type") == "hash"
-        else:
-            assert config.technique.type == "hash"
-
-
-class TestConfigModelIntegration:
-    """Integration tests for config models."""
-    
-    def test_structured_config_with_all_technique_types(self):
-        """Test structured config with all technique types."""
-        config = AnonymisePseudonymizeStructuredConfig(
-            used_function=[
-                PseudoTechniqueConfig(
-                    technique=HashConfig(columns=["col1"])
-                ),
-                PseudoTechniqueConfig(
-                    technique=EncryptConfig(columns=["col2"], key_name="k1")
-                ),
-                PseudoTechniqueConfig(
-                    technique=RedactConfig(columns=["col3"])
-                ),
-                PseudoTechniqueConfig(
-                    technique=ReplaceConfig(columns=["col4"], new_value="X")
-                )
-            ]
-        )
-        assert len(config.used_function) == 4
-        techniques = {f.technique.type for f in config.used_function}
-        assert techniques == {"hash", "encrypt", "redact", "replace"}
-    
-    def test_unstructured_config_with_all_technique_types(self):
-        """Test unstructured config with all technique types."""
-        config = AnonymisePseudonymizeUnstructuredConfig(
-            language=LanguageEnum.en,
-            used_function=[
-                UnstructuredPseudoTechniqueConfig(
-                    technique=UnstructuredHashConfig(pii=[PIIEntityEnum.EMAIL.value])
-                ),
-                UnstructuredPseudoTechniqueConfig(
-                    technique=UnstructuredEncryptConfig(
-                        pii=[PIIEntityEnum.PERSON.value],
-                        key_name="k1"
-                    )
-                ),
-                UnstructuredPseudoTechniqueConfig(
-                    technique=UnstructuredRedactConfig(pii=[PIIEntityEnum.PHONE_NUMBERS.value])
-                ),
-                UnstructuredPseudoTechniqueConfig(
-                    technique=UnstructuredReplaceConfig(
-                        pii=[PIIEntityEnum.CREDIT_CARD.value],
-                        new_value="X"
-                    )
-                ),
-                UnstructuredPseudoTechniqueConfig(
-                    technique=RetainConfig(pii=[PIIEntityEnum.DATE_OF_BIRTH.value])
-                )
-            ]
-        )
-        assert len(config.used_function) == 5
-        techniques = {f.technique.type for f in config.used_function}
-        assert techniques == {"hash", "encrypt", "redact", "replace", "retain"}
diff --git a/tests/field_level_pseudo_anonymisation/test_decrypt_structured.py b/tests/field_level_pseudo_anonymisation/test_decrypt_structured.py
deleted file mode 100644
index 9ed013a..0000000
--- a/tests/field_level_pseudo_anonymisation/test_decrypt_structured.py
+++ /dev/null
@@ -1,1090 +0,0 @@
-"""
-Test suite for data restoration (depseudonymization) operations.
-
-This test suite validates the data restoration feature against the following Acceptance Criteria:
-
-## Test Coverage Summary
-
-### Acceptance Criteria Coverage:
-- AC1 (Data Restoration with Valid Key): 7 tests
-- AC2 (Restoration Denial - Missing Key): 3 tests
-- AC3 (Restoration Denial - Unauthorized Access): 2 tests
-- AC4 (Restoration Denial - Invalid Key): 3 tests
-- Additional Coverage: 3 tests
-
-### Test Pattern:
-- Each test uses build_op_context with .model_dump() for configuration
-- Tests validate dual outputs (data, metrics)
-- Tests verify complete restoration of original values
-- Tests validate security controls and error handling
-
-"""
-
-import pandas as pd
-import pytest
-from cryptography.fernet import Fernet
-
-from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
-    AnonymisePseudonymizeStructuredConfig,
-    DepseudonymizeStructuredConfig,
-    EncryptConfig,
-    DecryptConfig,
-    PseudoTechniqueConfig,
-    DepseudoTechniqueConfig,
-)
-
-# Import helper functions (fixtures are auto-discovered by pytest)
-from .conftest import (
-    run_encrypt_op,
-    run_decrypt_op,
-    clear_vault_key,
-    set_vault_key,
-    deny_vault_access,
-    get_vault_key,
-)
-
-
-# -------------------------------- Test Markers Configuration --------------------------------
-
-# Register custom markers
-pytest.mark.slow = pytest.mark.slow
-pytest.mark.security = pytest.mark.security
-pytest.mark.edge_case = pytest.mark.edge_case
-pytest.mark.integration = pytest.mark.integration
-
-
-# ---------------------- AC1: Data Restoration with Valid Key --------------------------------
-
-
-def test_ac1_restore_single_encrypted_field_with_valid_key(
-    sample_df, encrypt_config_single_field, decrypt_config_single_field
-):
-    """
-    AC1: Data Restoration using Secret Management Tool-Stored Decryption Key
-
-    Scenario: Restore encrypted field with a valid key
-    Given: A pseudonymised dataset with encrypted email field
-    And: A valid decryption key stored in secret management tool
-    And: The participant provided the field that needs to be restored (email)
-    And: The participant is authorized
-    When: The participant requests data restoration
-    And: Provides the correct key name
-    Then: The system retrieves the key from secret management tool
-    And: Decrypts the dataset accurately
-    And: All original values are restored
-    And: A success message is presented to the user (via successful return)
-    And: The result is presented to the user
-    """
-    # Clear any existing test key
-    clear_vault_key("test_restoration_key_single")
-
-    # Step 1: Encrypt the data (pseudonymisation phase)
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Verify encryption occurred
-    assert not encrypted_df["email"].equals(sample_df["email"]), "Email field should be encrypted"
-
-    # Verify key was created in Vault
-    key = get_vault_key("test_restoration_key_single")
-    assert key is not None, "Encryption key should exist in Vault"
-
-    # Step 2: Restore the data (depseudonymisation phase)
-    restored_df, metrics = run_decrypt_op(decrypt_config_single_field, encrypted_df.copy())
-
-    # Verify restoration succeeded
-    assert restored_df is not None, "Restored DataFrame should not be None"
-    assert metrics is not None, "Metrics should not be None"
-
-    # Verify all original values are restored exactly
-    assert restored_df["email"].equals(
-        sample_df["email"]
-    ), "Email field should be restored to original values"
-
-    # Verify each individual value
-    for idx, (original, restored) in enumerate(zip(sample_df["email"], restored_df["email"])):
-        assert (
-            original == restored
-        ), f"Row {idx}: Original '{original}' should match restored '{restored}'"
-
-    # Verify row count preserved
-    assert len(restored_df) == len(sample_df), "Row count should be preserved during restoration"
-
-    # Verify non-encrypted columns remain unchanged
-    assert restored_df["name"].equals(
-        sample_df["name"]
-    ), "Non-encrypted fields should remain unchanged"
-    assert restored_df["age"].equals(
-        sample_df["age"]
-    ), "Non-encrypted fields should remain unchanged"
-    assert restored_df["department"].equals(
-        sample_df["department"]
-    ), "Non-encrypted fields should remain unchanged"
-
-
-def test_ac1_restore_multiple_encrypted_fields_with_valid_key(
-    sample_df, encrypt_config_multiple_fields, decrypt_config_multiple_fields
-):
-    """
-    AC1: Data Restoration of multiple encrypted fields with a valid key
-
-    Scenario: Restore multiple encrypted fields (name, email, ssn) with a valid key
-    Given: A pseudonymised dataset with multiple encrypted fields
-    And: A valid decryption key stored in secret management tool
-    And: The participant provided the fields that need to be restored
-    When: The participant requests data restoration
-    Then: All specified fields are decrypted accurately
-    And: All original values are restored
-    """
-    clear_vault_key("test_restoration_key_multi")
-
-    # Encrypt multiple fields
-    encrypted_df, _ = run_encrypt_op(encrypt_config_multiple_fields, sample_df.copy())
-
-    # Verify all specified fields were encrypted
-    assert not encrypted_df["name"].equals(sample_df["name"]), "Name should be encrypted"
-    assert not encrypted_df["email"].equals(sample_df["email"]), "Email should be encrypted"
-    assert not encrypted_df["ssn"].equals(sample_df["ssn"]), "SSN should be encrypted"
-
-    # Restore all encrypted fields
-    restored_df, _ = run_decrypt_op(decrypt_config_multiple_fields, encrypted_df.copy())
-
-    # Verify all fields restored to original values
-    assert restored_df["name"].equals(
-        sample_df["name"]
-    ), "Name field should be restored to original values"
-    assert restored_df["email"].equals(
-        sample_df["email"]
-    ), "Email field should be restored to original values"
-    assert restored_df["ssn"].equals(
-        sample_df["ssn"]
-    ), "SSN field should be restored to original values"
-
-    # Verify non-encrypted columns remain unchanged
-    assert restored_df["age"].equals(
-        sample_df["age"]
-    ), "Non-encrypted fields should remain unchanged"
-    assert restored_df["salary"].equals(
-        sample_df["salary"]
-    ), "Non-encrypted fields should remain unchanged"
-
-
-def test_ac1_restore_partial_fields_leaves_others_encrypted(
-    sample_df, encrypt_config_multiple_fields
-):
-    """
-    AC1: Partial restoration - participant specifies only some fields to restore
-
-    Scenario: Restore only selected fields while leaving others encrypted
-    Given: A pseudonymised dataset with multiple encrypted fields (name, email, ssn)
-    And: The participant specifies only some fields to restore (e.g., only email)
-    When: The participant requests partial restoration
-    Then: Only the specified fields are decrypted
-    And: Other encrypted fields remain encrypted
-    """
-    clear_vault_key("test_restoration_key_multi")
-
-    # Encrypt multiple fields
-    encrypted_df, _ = run_encrypt_op(encrypt_config_multiple_fields, sample_df.copy())
-
-    # Create config to restore only email field
-    partial_decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],  # Only restore email
-                    key_name="test_restoration_key_multi",
-                )
-            )
-        ]
-    )
-
-    # Restore only email field
-    restored_df, _ = run_decrypt_op(partial_decrypt_config, encrypted_df.copy())
-
-    # Verify email is restored
-    assert restored_df["email"].equals(
-        sample_df["email"]
-    ), "Email field should be restored to original values"
-
-    # Verify other fields remain encrypted (different from original)
-    assert not restored_df["name"].equals(sample_df["name"]), "Name field should remain encrypted"
-    assert not restored_df["ssn"].equals(sample_df["ssn"]), "SSN field should remain encrypted"
-
-
-def test_ac1_restore_preserves_data_types(sample_df):
-    """
-    AC1: Data restoration preserves original data types for all fields
-
-    Scenario: Restore encrypted numeric and string fields
-    Given: A dataset with mixed data types (strings, integers, floats)
-    When: Fields are encrypted and then restored
-    Then: Original data types are preserved after restoration
-    """
-    # Create config to encrypt mixed types
-    encrypt_config = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    columns=["name", "age", "salary"],
-                    key_name="test_restoration_types",
-                )
-            )
-        ]
-    )
-
-    decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["name", "age", "salary"],
-                    key_name="test_restoration_types",
-                )
-            )
-        ]
-    )
-
-    clear_vault_key("test_restoration_types")
-
-    # Encrypt and restore
-    encrypted_df, _ = run_encrypt_op(encrypt_config, sample_df.copy())
-    restored_df, _ = run_decrypt_op(decrypt_config, encrypted_df.copy())
-
-    # Verify values are restored (as strings due to encryption/decryption)
-    # Note: Fernet encryption/decryption converts everything to strings
-    # This is expected behavior - original types are preserved via string representation
-    assert (
-        restored_df["name"].tolist() == sample_df["name"].tolist()
-    ), "String values should be restored"
-    assert (
-        restored_df["age"].tolist() == sample_df["age"].astype(str).tolist()
-    ), "Integer values should be restored as strings"
-    assert (
-        restored_df["salary"].tolist() == sample_df["salary"].astype(str).tolist()
-    ), "Float values should be restored as strings"
-
-
-def test_ac1_restore_empty_dataframe(encrypt_config_single_field, decrypt_config_single_field):
-    """
-    AC1: Edge case - restore an empty dataset
-
-    Scenario: Attempt to restore an empty pseudonymised dataset
-    Given: An empty DataFrame with correct schema
-    When: Restoration is attempted
-    Then: Operation completes successfully without errors
-    And: Returns an empty DataFrame
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Create empty DataFrame with same schema
-    empty_df = pd.DataFrame(columns=["id", "name", "email", "ssn", "age", "salary", "department"])
-
-    # Encrypt (should handle empty DataFrame)
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, empty_df.copy())
-
-    # Restore (should also handle empty DataFrame)
-    restored_df, metrics = run_decrypt_op(decrypt_config_single_field, encrypted_df.copy())
-
-    # Verify empty DataFrame returned
-    assert len(restored_df) == 0, "Restored DataFrame should be empty"
-    assert list(restored_df.columns) == list(empty_df.columns), "Column schema should be preserved"
-
-
-def test_ac1_restore_with_special_characters(
-    encrypt_config_single_field, decrypt_config_single_field
-):
-    """
-    AC1: Data restoration with special characters and edge case values
-
-    Scenario: Restore data containing special characters, unicode, etc.
-    Given: A dataset with special characters in string fields
-    When: Data is encrypted and then restored
-    Then: All special characters are preserved accurately
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Create DataFrame with special characters
-    special_df = pd.DataFrame(
-        {
-            "id": [1, 2, 3, 4],
-            "name": ["José García", "François Müller", "李明", "O'Brien"],
-            "email": [
-                "josé@example.com",
-                "françois@example.com",
-                "li@example.cn",
-                "o'brien@example.ie",
-            ],
-            "ssn": ["123-45-6789", "234-56-7890", "345-67-8901", "456-78-9012"],
-            "age": [25, 30, 35, 40],
-            "salary": [50000.0, 60000.0, 70000.0, 80000.0],
-            "department": ["HR", "IT", "Finance", "IT"],
-        }
-    )
-
-    # Encrypt and restore
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, special_df.copy())
-    restored_df, _ = run_decrypt_op(decrypt_config_single_field, encrypted_df.copy())
-
-    # Verify special characters preserved
-    assert restored_df["email"].equals(
-        special_df["email"]
-    ), "Special characters should be preserved during restoration"
-
-    for idx, (original, restored) in enumerate(zip(special_df["email"], restored_df["email"])):
-        assert (
-            original == restored
-        ), f"Row {idx}: Special characters in '{original}' should be preserved"
-
-
-# ------------------- AC2: Restoration Denial when Key is Missing ----------------------------
-
-
-def test_ac2_restore_fails_when_key_missing(sample_df, encrypt_config_single_field):
-    """
-    AC2: Restoration Denial when Decryption Key is missing
-
-    Scenario: Attempt to restore encrypted fields when decryption key is missing
-    Given: A pseudonymised dataset
-    And: The decryption key is missing from Vault
-    And: The participant provides the correct key name
-    When: The participant attempts to restore the data
-    Then: The system fails the restoration request
-    And: Logs the failed key retrieval for auditing (via exception)
-    And: An error message is presented to the user
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Encrypt data first
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Delete the key from Vault to simulate missing key
-    clear_vault_key("test_restoration_key_single")
-
-    # Create decrypt config with missing key
-    decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],
-                    key_name="test_restoration_key_single",
-                )
-            )
-        ]
-    )
-
-    # Attempt restoration - should fail with clear error
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config, encrypted_df.copy())
-
-    # Verify error message is informative
-    error_message = str(exc_info.value)
-    assert (
-        "not found" in error_message.lower() or "decrypt" in error_message.lower()
-    ), "Error message should indicate key not found for decrypt operation"
-    assert (
-        "test_restoration_key_single" in error_message
-    ), "Error message should include the key name for auditing"
-
-
-def test_ac2_restore_fails_with_nonexistent_key_name(sample_df, encrypt_config_single_field):
-    """
-    AC2: Restoration fails when using a key name that never existed
-
-    Scenario: Attempt to restore with a key name that was never created
-    Given: A pseudonymised dataset
-    And: A key name that does not exist in Vault
-    When: The participant attempts to restore the data
-    Then: The system fails the restoration request with appropriate error
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Encrypt data with one key
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Try to decrypt with a different, non-existent key
-    decrypt_config_wrong_key = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt", columns=["email"], key_name="nonexistent_key_name"
-                )
-            )
-        ]
-    )
-
-    # Attempt restoration - should fail
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config_wrong_key, encrypted_df.copy())
-
-    error_message = str(exc_info.value)
-    assert "not found" in error_message.lower(), "Error message should indicate key not found"
-
-
-def test_ac2_restore_fails_when_key_corrupted(sample_df, encrypt_config_single_field):
-    """
-    AC2: Restoration Denial when Decryption Key is corrupted
-
-    Scenario: Attempt to restore when key is corrupted in Vault
-    Given: A pseudonymised dataset
-    And: The decryption key is corrupted (invalid format)
-    When: The participant attempts to restore the data
-    Then: The system fails the restoration request
-    And: An appropriate error message is presented
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Encrypt data first
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Corrupt the key by replacing it with invalid data
-    set_vault_key("test_restoration_key_single", "corrupted_invalid_key_data")
-
-    # Create decrypt config
-    decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],
-                    key_name="test_restoration_key_single",
-                )
-            )
-        ]
-    )
-
-    # Attempt restoration - should fail due to corrupted key
-    with pytest.raises(Exception) as exc_info:
-        run_decrypt_op(decrypt_config, encrypted_df.copy())
-
-    # Should raise either ValueError or Fernet-related exception
-    assert "Fernet" in str(type(exc_info.value)) or "ValueError" in str(
-        type(exc_info.value)
-    ), "Should raise Fernet or ValueError for corrupted key"
-
-
-# ------------- AC3: Restoration Denial when Access is Unauthorized --------------------------
-
-
-def test_ac3_restore_fails_when_access_unauthorized(sample_df, encrypt_config_single_field):
-    """
-    AC3: Restoration Denial when Decryption Key access is unauthorized
-
-    Scenario: Attempt to restore encrypted fields without authorization
-    Given: A pseudonymised dataset
-    And: A decryption key in secret management tool
-    And: The participant is not authorized to access the key
-    When: The participant attempts to restore the data
-    Then: The system denies the participant access to the key
-    And: The system denies the initiation of the restoration process
-    And: The system logs the unauthorized access attempt (via exception)
-    And: An appropriate error message is presented to the user
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Encrypt data first
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Set access control to deny access
-    deny_vault_access("test_restoration_key_single")
-
-    # Create decrypt config
-    decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],
-                    key_name="test_restoration_key_single",
-                )
-            )
-        ]
-    )
-
-    # Attempt restoration - should fail with ValueError (wrapping Forbidden)
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config, encrypted_df.copy())
-
-    # Verify error indicates access denial
-    error_message = str(exc_info.value)
-    assert (
-        "access denied" in error_message.lower() or "error while reading" in error_message.lower()
-    ), "Error message should indicate access denial or error reading key"
-    assert (
-        "test_restoration_key_single" in error_message
-    ), "Error message should include the key name for auditing"
-
-
-def test_ac3_restore_multiple_keys_with_mixed_authorization(sample_df):
-    """
-    AC3: Restoration with mixed authorization - some keys authorized, others not
-
-    Scenario: Attempt to restore multiple fields where user has access to some keys but not others
-    Given: A pseudonymised dataset with multiple encrypted fields using different keys
-    And: The participant is authorized for some keys but not others
-    When: The participant attempts to restore all fields
-    Then: The system denies access when unauthorized key is encountered
-    """
-    # Encrypt email with one key, ssn with another
-    encrypt_config_multi_keys = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt", columns=["email"], key_name="authorized_key"
-                )
-            )
-        ]
-    )
-
-    clear_vault_key("authorized_key")
-    clear_vault_key("unauthorized_key")
-
-    # Encrypt data
-    encrypted_df, _ = run_encrypt_op(encrypt_config_multi_keys, sample_df.copy())
-
-    # Manually encrypt another field with different key (simulating separate encryption)
-    encrypt_config_ssn = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt", columns=["ssn"], key_name="unauthorized_key"
-                )
-            )
-        ]
-    )
-    encrypted_df, _ = run_encrypt_op(encrypt_config_ssn, encrypted_df.copy())
-
-    # Deny access to unauthorized_key
-    deny_vault_access("unauthorized_key")
-
-    # Try to decrypt both fields
-    decrypt_config_both = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt", columns=["email"], key_name="authorized_key"
-                )
-            ),
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt", columns=["ssn"], key_name="unauthorized_key"
-                )
-            ),
-        ]
-    )
-
-    # Should fail when trying to access unauthorized_key with ValueError (wrapping Forbidden)
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config_both, encrypted_df.copy())
-
-    # Verify error indicates access issue with unauthorized key
-    error_message = str(exc_info.value)
-    assert (
-        "access denied" in error_message.lower() or "error while reading" in error_message.lower()
-    ), "Error message should indicate access denial"
-    assert "unauthorized_key" in error_message, "Error message should mention the unauthorized key"
-
-
-# ------------------- AC4: Restoration Denial when Key is Invalid ----------------------------
-
-
-def test_ac4_restore_fails_with_wrong_key(sample_df):
-    """
-    AC4: Restoration Denial when Decryption Key is invalid
-
-    Scenario: Attempt to restore encrypted fields with a key that doesn't match the encryption key
-    Given: A pseudonymised dataset encrypted with key A
-    And: A different valid decryption key B is stored in secret management tool
-    And: The participant provides key B (which is not the correct key)
-    And: Key B does not correspond to the fields to be restored
-    When: The participant attempts to restore the data
-    Then: The system fails the restoration request
-    And: Logs the failed decryption attempt for auditing (via exception)
-    And: An error message is presented to the user
-    """
-    # Encrypt with one key
-    encrypt_config_key_a = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt", columns=["email"], key_name="encryption_key_a"
-                )
-            )
-        ]
-    )
-
-    clear_vault_key("encryption_key_a")
-    clear_vault_key("encryption_key_b")
-
-    # Encrypt data with key A
-    encrypted_df, _ = run_encrypt_op(encrypt_config_key_a, sample_df.copy())
-
-    # Generate a different valid key B in Vault
-    different_key = Fernet.generate_key().decode()
-    set_vault_key("encryption_key_b", different_key)
-
-    # Try to decrypt with key B (wrong key)
-    decrypt_config_key_b = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt", columns=["email"], key_name="encryption_key_b"
-                )
-            )
-        ]
-    )
-
-    # Attempt restoration - should fail with InvalidToken or ValueError
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config_key_b, encrypted_df.copy())
-
-    # Verify error message indicates decryption failure
-    error_message = str(exc_info.value)
-    assert (
-        "invalid" in error_message.lower() or "token" in error_message.lower()
-    ), "Error message should indicate invalid token or decryption failure"
-    assert (
-        "encryption_key_b" in error_message
-    ), "Error message should include the key name for auditing"
-
-
-def test_ac4_restore_fails_with_key_from_different_field(sample_df):
-    """
-    AC4: Restoration fails when using a key intended for a different field
-
-    Scenario: Attempt to restore field A using the key for field B
-    Given: A dataset with multiple fields encrypted with different keys
-    And: The participant provides the key for field B to decrypt field A
-    When: The participant attempts to restore field A
-    Then: The system fails the restoration request
-    """
-    # Encrypt email and ssn with different keys
-    encrypt_config_email = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(type="encrypt", columns=["email"], key_name="email_key")
-            )
-        ]
-    )
-
-    encrypt_config_ssn = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(type="encrypt", columns=["ssn"], key_name="ssn_key")
-            )
-        ]
-    )
-
-    clear_vault_key("email_key")
-    clear_vault_key("ssn_key")
-
-    # Encrypt both fields
-    encrypted_df, _ = run_encrypt_op(encrypt_config_email, sample_df.copy())
-    encrypted_df, _ = run_encrypt_op(encrypt_config_ssn, encrypted_df.copy())
-
-    # Try to decrypt email field using ssn_key
-    decrypt_config_wrong_field = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],  # Trying to decrypt email
-                    key_name="ssn_key",  # But using ssn's key
-                )
-            )
-        ]
-    )
-
-    # Should fail with InvalidToken
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config_wrong_field, encrypted_df.copy())
-
-    error_message = str(exc_info.value)
-    assert (
-        "invalid" in error_message.lower() or "token" in error_message.lower()
-    ), "Error message should indicate invalid token"
-
-
-def test_ac4_restore_fails_with_tampered_encrypted_data(sample_df, encrypt_config_single_field):
-    """
-    AC4: Restoration fails when encrypted data has been tampered with
-
-    Scenario: Attempt to restore encrypted data that has been modified
-    Given: A pseudonymised dataset
-    And: Some encrypted values have been tampered with
-    And: The correct decryption key is provided
-    When: The participant attempts to restore the data
-    Then: The system fails the restoration for tampered values
-    And: An appropriate error message is presented
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Encrypt data
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Tamper with encrypted data (modify one encrypted value)
-    encrypted_df.loc[0, "email"] = "tampered_invalid_encrypted_data"
-
-    # Create decrypt config
-    decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],
-                    key_name="test_restoration_key_single",
-                )
-            )
-        ]
-    )
-
-    # Attempt restoration - should fail on tampered data
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config, encrypted_df.copy())
-
-    error_message = str(exc_info.value)
-    assert (
-        "invalid" in error_message.lower() or "token" in error_message.lower()
-    ), "Error message should indicate invalid token due to tampering"
-
-
-# ---------------- Additional Edge Cases and Integration Tests -------------------------------
-
-
-def test_integration_full_cycle_encrypt_decrypt_multiple_operations(sample_df):
-    """
-    Integration test: Full cycle of multiple encrypt/decrypt operations
-
-    Scenario: Complex workflow with multiple encryption and restoration operations
-    Given: A dataset
-    When: Multiple fields are encrypted at different times
-    And: Fields are restored in different orders
-    Then: All operations complete successfully
-    And: Final restored data matches original
-    """
-    # Phase 1: Encrypt email
-    encrypt_config_1 = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(type="encrypt", columns=["email"], key_name="key_1")
-            )
-        ]
-    )
-    clear_vault_key("key_1")
-    encrypted_df_1, _ = run_encrypt_op(encrypt_config_1, sample_df.copy())
-
-    # Phase 2: Encrypt name and ssn
-    encrypt_config_2 = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(type="encrypt", columns=["name", "ssn"], key_name="key_2")
-            )
-        ]
-    )
-    clear_vault_key("key_2")
-    encrypted_df_2, _ = run_encrypt_op(encrypt_config_2, encrypted_df_1.copy())
-
-    # Phase 3: Restore email first
-    decrypt_config_1 = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(type="decrypt", columns=["email"], key_name="key_1")
-            )
-        ]
-    )
-    restored_df_1, _ = run_decrypt_op(decrypt_config_1, encrypted_df_2.copy())
-    assert restored_df_1["email"].equals(sample_df["email"]), "Email should be restored"
-
-    # Phase 4: Restore name and ssn
-    decrypt_config_2 = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(type="decrypt", columns=["name", "ssn"], key_name="key_2")
-            )
-        ]
-    )
-    restored_df_2, _ = run_decrypt_op(decrypt_config_2, restored_df_1.copy())
-
-    # Verify all fields restored
-    assert restored_df_2["email"].equals(sample_df["email"]), "Email should remain restored"
-    assert restored_df_2["name"].equals(sample_df["name"]), "Name should be restored"
-    assert restored_df_2["ssn"].equals(sample_df["ssn"]), "SSN should be restored"
-
-
-def test_restore_with_null_values(encrypt_config_single_field, decrypt_config_single_field):
-    """
-    Edge case: Restoration of dataset with null/NaN values
-
-    Scenario: Dataset contains null values in encrypted fields
-    Given: A dataset with null values in fields to be encrypted
-    When: Data is encrypted and then restored
-    Then: Null values are handled appropriately
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Create DataFrame with null values
-    df_with_nulls = pd.DataFrame(
-        {
-            "id": [1, 2, 3, 4],
-            "name": ["Alice", "Bob", None, "David"],
-            "email": [
-                "alice@example.com",
-                None,
-                "charlie@example.com",
-                "david@example.com",
-            ],
-            "ssn": ["123-45-6789", "234-56-7890", "345-67-8901", None],
-            "age": [25, 30, 35, 40],
-            "salary": [50000.0, 60000.0, 70000.0, 80000.0],
-            "department": ["HR", "IT", "Finance", "IT"],
-        }
-    )
-
-    # Note: Encryption of NaN/None values will convert them to string "nan" or "None"
-    # This is expected behavior - Fernet encryption requires string input
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, df_with_nulls.copy())
-    restored_df, _ = run_decrypt_op(decrypt_config_single_field, encrypted_df.copy())
-
-    # Verify non-null values are restored correctly
-    assert restored_df.loc[0, "email"] == "alice@example.com"
-    assert restored_df.loc[2, "email"] == "charlie@example.com"
-    assert restored_df.loc[3, "email"] == "david@example.com"
-
-
-def test_restore_large_dataset_performance():
-    """
-    Performance test: Restoration of large dataset
-
-    Scenario: Restore a large dataset with many rows
-    Given: A large dataset with 10,000 rows
-    When: Data is encrypted and then restored
-    Then: Operation completes without errors or timeout
-    And: All values are restored correctly
-    """
-    # Create large dataset
-    large_df = pd.DataFrame(
-        {
-            "id": range(1, 10001),
-            "email": [f"user{i}@example.com" for i in range(1, 10001)],
-            "name": [f"User {i}" for i in range(1, 10001)],
-            "ssn": [f"{i:03d}-{i:02d}-{i:04d}" for i in range(1, 10001)],
-            "age": [20 + (i % 50) for i in range(1, 10001)],
-            "salary": [30000 + (i * 10) for i in range(1, 10001)],
-            "department": [["HR", "IT", "Finance", "Sales"][i % 4] for i in range(1, 10001)],
-        }
-    )
-
-    encrypt_config = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt", columns=["email"], key_name="test_large_dataset"
-                )
-            )
-        ]
-    )
-
-    decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt", columns=["email"], key_name="test_large_dataset"
-                )
-            )
-        ]
-    )
-
-    clear_vault_key("test_large_dataset")
-
-    # Encrypt and restore
-    encrypted_df, _ = run_encrypt_op(encrypt_config, large_df.copy())
-    restored_df, _ = run_decrypt_op(decrypt_config, encrypted_df.copy())
-
-    # Verify sample of values
-    assert len(restored_df) == 10000, "Should restore all 10,000 rows"
-    assert restored_df["email"].equals(large_df["email"]), "All emails should be restored"
-
-    # Spot check specific values
-    assert restored_df.loc[0, "email"] == "user1@example.com"
-    assert restored_df.loc[5000, "email"] == "user5001@example.com"
-    assert restored_df.loc[9999, "email"] == "user10000@example.com"
-
-
-@pytest.mark.edge_case
-@pytest.mark.security
-def test_restore_after_key_rotation(sample_df, encrypt_config_single_field):
-    """
-    AC4: Restoration fails after key rotation (key changed in Vault)
-
-    Scenario: Key is rotated in Vault after encryption
-    Given: Data encrypted with key version 1
-    And: Key is rotated to version 2 in Vault
-    When: Participant attempts to restore using new key version
-    Then: Restoration fails with clear error message
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Encrypt with original key
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Rotate key (replace with new key)
-    new_key = Fernet.generate_key().decode()
-    set_vault_key("test_restoration_key_single", new_key)
-
-    decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],
-                    key_name="test_restoration_key_single",
-                )
-            )
-        ]
-    )
-
-    # Should fail - key mismatch
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config, encrypted_df.copy())
-
-    assert (
-        "invalid" in str(exc_info.value).lower() or "decrypt" in str(exc_info.value).lower()
-    ), "Should indicate invalid token due to key rotation"
-
-
-@pytest.mark.edge_case
-def test_restore_partially_encrypted_column(sample_df, encrypt_config_single_field):
-    """
-    Edge case: Attempt to restore column where only some rows are encrypted
-
-    Scenario: Column has mixed encrypted/plaintext values (data corruption scenario)
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # Encrypt data
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Corrupt by replacing some encrypted values with plaintext
-    encrypted_df.loc[0, "email"] = "plaintext@example.com"
-    encrypted_df.loc[2, "email"] = "another_plaintext@example.com"
-
-    decrypt_config = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(
-                    type="decrypt",
-                    columns=["email"],
-                    key_name="test_restoration_key_single",
-                )
-            )
-        ]
-    )
-
-    # Should fail on plaintext values
-    with pytest.raises(ValueError) as exc_info:
-        run_decrypt_op(decrypt_config, encrypted_df.copy())
-
-    assert (
-        "invalid" in str(exc_info.value).lower() or "decrypt" in str(exc_info.value).lower()
-    ), "Should indicate invalid token for plaintext values"
-
-
-@pytest.mark.edge_case
-def test_restore_with_missing_column_in_encrypted_data(
-    sample_df, encrypt_config_single_field, decrypt_config_single_field
-):
-    """
-    AC2: Restoration fails when specified column doesn't exist in encrypted dataset
-    """
-    clear_vault_key("test_restoration_key_single")
-
-    # First encrypt the sample data to create the key
-    encrypted_df, _ = run_encrypt_op(encrypt_config_single_field, sample_df.copy())
-
-    # Create encrypted DataFrame missing the 'email' column
-    incomplete_df = pd.DataFrame(
-        {
-            "id": [1, 2, 3],
-            "name": ["Alice", "Bob", "Charlie"],
-            # Missing 'email' column that decrypt config expects
-            "age": [25, 30, 35],
-            "salary": [50000.0, 60000.0, 70000.0],
-            "department": ["HR", "IT", "Finance"],
-        }
-    )
-
-    with pytest.raises((ValueError, KeyError)) as exc_info:
-        run_decrypt_op(decrypt_config_single_field, incomplete_df)
-
-    error_msg = str(exc_info.value)
-    assert (
-        "email" in error_msg or "not present" in error_msg or "not found" in error_msg
-    ), f"Error should indicate missing column, got: {error_msg}"
-
-
-@pytest.mark.integration
-def test_restore_with_multiple_encryption_keys(sample_df):
-    """
-    Integration test: Restore data encrypted with multiple different keys
-
-    Scenario: Different fields encrypted with different keys
-    Given: name encrypted with key_a, email encrypted with key_b
-    When: Participant provides both keys for restoration
-    Then: Both fields are restored correctly
-    """
-    clear_vault_key("key_a")
-    clear_vault_key("key_b")
-
-    # Encrypt name with key_a
-    encrypt_config_name = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(type="encrypt", columns=["name"], key_name="key_a")
-            )
-        ]
-    )
-
-    # Encrypt email with key_b
-    encrypt_config_email = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(type="encrypt", columns=["email"], key_name="key_b")
-            )
-        ]
-    )
-
-    # Encrypt both fields
-    df_encrypted = sample_df.copy()
-    df_encrypted, _ = run_encrypt_op(encrypt_config_name, df_encrypted)
-    df_encrypted, _ = run_encrypt_op(encrypt_config_email, df_encrypted)
-
-    # Decrypt name with key_a
-    decrypt_config_name = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(type="decrypt", columns=["name"], key_name="key_a")
-            )
-        ]
-    )
-
-    # Decrypt email with key_b
-    decrypt_config_email = DepseudonymizeStructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(type="decrypt", columns=["email"], key_name="key_b")
-            )
-        ]
-    )
-
-    # Restore both fields
-    df_restored = df_encrypted.copy()
-    df_restored, _ = run_decrypt_op(decrypt_config_name, df_restored)
-    df_restored, _ = run_decrypt_op(decrypt_config_email, df_restored)
-
-    # Verify both fields restored
-    assert df_restored["name"].equals(sample_df["name"]), "Name field should be restored with key_a"
-    assert df_restored["email"].equals(
-        sample_df["email"]
-    ), "Email field should be restored with key_b"
diff --git a/tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py b/tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py
deleted file mode 100644
index 1ce8585..0000000
--- a/tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py
+++ /dev/null
@@ -1,288 +0,0 @@
-"""
-Test suite for data restoration (depseudonymisation) of unstructured text.
-
-## Test Coverage Summary
-
-### Acceptance Criteria Coverage:
-- AC1 (Data Restoration with Valid Key): 2 tests
-- AC2 (Restoration Denial - Missing Key): 1 test
-- AC3 (Restoration Denial - Unauthorized Access): 1 test
-- AC4 (Restoration Denial - Invalid Key): 1 test
-- Additional Coverage: 2 tests (edge cases)
-
-### Test Pattern:
-- Each test uses build_op_context with .model_dump() for configuration
-- Tests validate dual outputs (data, metrics)
-- Tests verify complete restoration of original text
-- Tests validate security controls and error handling
-- Tests use descriptive names mapping to AC scenarios
-
-"""
-
-import pytest
-from unittest.mock import patch
-from cryptography.fernet import Fernet
-from dagster import build_op_context
-
-from src.field_level_pseudo_anonymisation.unstructured_ops import (
-    depseudonymize_unstructured,
-)
-from src.field_level_pseudo_anonymisation.config_models.unstructured_config import (
-    DepseudonymizeUnstructuredConfig,
-    DecryptConfig,
-    DepseudoTechniqueConfig,
-)
-
-
-@pytest.fixture
-def fernet_key() -> bytes:
-    """Generate a valid Fernet key for encryption in tests."""
-    return Fernet.generate_key()
-
-
-@pytest.fixture
-def encrypted_text_data(fernet_key: bytes) -> dict:
-    """
-    Create encrypted data for testing decryption.
-
-    Returns a dict with:
-    - original_text: The unencrypted text
-    - encrypted_text: Text with PII values encrypted in {encrypt:...} format
-    """
-    original_text = "My name is John Doe and my email is john.doe@example.com."
-    fernet = Fernet(fernet_key)
-    encrypted_name = fernet.encrypt(b"John Doe").decode()
-    encrypted_email = fernet.encrypt(b"john.doe@example.com").decode()
-    encrypted_text = (
-        f"My name is {{encrypt:{encrypted_name}}} and my email is {{encrypt:{encrypted_email}}}."
-    )
-    return {
-        "original_text": original_text,
-        "encrypted_text": encrypted_text,
-    }
-
-
-# ---------------------- AC1: Data Restoration with Valid Key --------------------------------
-
-
-@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
-def test_ac1_restore_encrypted_pii_entities_with_valid_key(
-    mock_create_get_key, fernet_key: bytes, encrypted_text_data: dict
-):
-    """AC1: Restore encrypted PII entities with a valid key from secret management tool."""
-    # Arrange - Mock the Vault key retrieval to return the valid key
-    mock_create_get_key.return_value = fernet_key
-    config = DepseudonymizeUnstructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="test_key"))
-        ]
-    )
-    context = build_op_context(op_config=config.model_dump())
-
-    # Act - Request data restoration
-    result_gen = depseudonymize_unstructured(
-        context, input_text=encrypted_text_data["encrypted_text"]
-    )
-    data_output = next(result_gen)
-    metrics_output = next(result_gen)
-
-    # Assert - Verify successful restoration
-    # 1. All original values are restored exactly
-    assert (
-        data_output.value == encrypted_text_data["original_text"]
-    ), "Original text should be fully restored"
-
-    # 2. Correct output structure
-    assert data_output.output_name == "data", "Output should be named 'data'"
-
-    # 3. Metrics show correct number of restored entities
-    assert (
-        metrics_output.value["total_depseudo_count"] == 2
-    ), "Should restore 2 encrypted entities (name and email)"
-
-    # 4. System retrieved key from secret management tool
-    mock_create_get_key.assert_called_once_with("decrypt", "test_key")
-
-
-@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
-def test_ac1_restore_multiple_pii_types_with_valid_key(mock_create_get_key, fernet_key: bytes):
-    """AC1: Restore multiple encrypted PII entity types (name, email, phone) with a valid key."""
-    # Arrange - Create text with multiple PII types encrypted
-    original_text = "Contact John Doe at john.doe@example.com or call 555-1234."
-    fernet = Fernet(fernet_key)
-    encrypted_name = fernet.encrypt(b"John Doe").decode()
-    encrypted_email = fernet.encrypt(b"john.doe@example.com").decode()
-    encrypted_phone = fernet.encrypt(b"555-1234").decode()
-    encrypted_text = (
-        f"Contact {{encrypt:{encrypted_name}}} at "
-        f"{{encrypt:{encrypted_email}}} or call {{encrypt:{encrypted_phone}}}."
-    )
-
-    mock_create_get_key.return_value = fernet_key
-    config = DepseudonymizeUnstructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(type="decrypt", key_name="multi_pii_key")
-            )
-        ]
-    )
-    context = build_op_context(op_config=config.model_dump())
-
-    # Act
-    result_gen = depseudonymize_unstructured(context, input_text=encrypted_text)
-    data_output = next(result_gen)
-    metrics_output = next(result_gen)
-
-    # Assert
-    assert data_output.value == original_text, "All PII types should be restored"
-    assert (
-        metrics_output.value["total_depseudo_count"] == 3
-    ), "Should restore 3 encrypted entities (name, email, phone)"
-    mock_create_get_key.assert_called_once_with("decrypt", "multi_pii_key")
-
-
-# ------------------- AC2: Restoration Denial when Key is Missing ----------------------------
-
-
-@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
-def test_ac2_restoration_denial_when_key_missing(mock_create_get_key, encrypted_text_data: dict):
-    """AC2: Deny restoration when decryption key is missing from secret management tool."""
-    # Arrange - Mock Vault to indicate key is missing
-    mock_create_get_key.side_effect = ValueError(
-        "Fernet key 'non_existent_key' not found in Vault for decrypt."
-    )
-    config = DepseudonymizeUnstructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(type="decrypt", key_name="non_existent_key")
-            )
-        ]
-    )
-    context = build_op_context(op_config=config.model_dump())
-
-    # Act & Assert - Verify system fails the restoration request
-    with pytest.raises(
-        ValueError,
-        match="Fernet key 'non_existent_key' not found in Vault for decrypt.",
-    ) as exc_info:
-        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
-
-    # Verify error message is clear and actionable
-    assert "not found in Vault" in str(
-        exc_info.value
-    ), "Error message should indicate key is missing from Vault"
-
-    # Verify system attempted to retrieve the key (logged attempt)
-    mock_create_get_key.assert_called_once_with("decrypt", "non_existent_key")
-
-
-# ------------- AC3: Restoration Denial when Access is Unauthorized --------------------------
-
-
-@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
-def test_ac3_restoration_denial_when_unauthorized_access(
-    mock_create_get_key, encrypted_text_data: dict
-):
-    """AC3: Deny restoration when participant is not authorized to access the decryption key."""
-    # Arrange - Mock Vault to deny access
-    mock_create_get_key.side_effect = ValueError("Access denied to secret: unauthorized_key")
-    config = DepseudonymizeUnstructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(
-                technique=DecryptConfig(type="decrypt", key_name="unauthorized_key")
-            )
-        ]
-    )
-    context = build_op_context(op_config=config.model_dump())
-
-    # Act & Assert - Verify system denies access
-    with pytest.raises(ValueError, match="Access denied to secret: unauthorized_key") as exc_info:
-        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
-
-    # Verify error message clearly indicates access denial
-    assert "Access denied" in str(
-        exc_info.value
-    ), "Error message should clearly indicate access was denied"
-
-    # Verify the unauthorized access attempt was logged (function was called)
-    mock_create_get_key.assert_called_once_with("decrypt", "unauthorized_key")
-
-
-# ------------------- AC4: Restoration Denial when Key is Invalid ----------------------------
-
-
-@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
-def test_ac4_restoration_denial_when_key_invalid(mock_create_get_key, encrypted_text_data: dict):
-    """AC4: Deny restoration when decryption key does not correspond to the encrypted fields."""
-    # Arrange - Mock Vault to return a different (wrong) key
-    invalid_key = Fernet.generate_key()  # A different, incorrect key
-    mock_create_get_key.return_value = invalid_key
-    config = DepseudonymizeUnstructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="wrong_key"))
-        ]
-    )
-    context = build_op_context(op_config=config.model_dump())
-
-    # Act & Assert - Verify system fails the restoration
-    with pytest.raises(ValueError, match="Invalid Fernet token") as exc_info:
-        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
-
-    # Verify error message indicates decryption failure
-    assert "Invalid Fernet token" in str(
-        exc_info.value
-    ), "Error message should indicate the key is invalid for this data"
-
-    # Verify key was retrieved (system attempted decryption)
-    mock_create_get_key.assert_called_once_with("decrypt", "wrong_key")
-
-
-# -------------------------------- Additional Edge Cases ----------------------------------------
-
-
-def test_depseudonymize_unstructured_no_decrypt_config():
-    """Edge case: Text is returned unchanged when no decryption techniques are configured."""
-    # Arrange
-    original_text = "This text has no {encrypt:values} to decrypt."
-    config = DepseudonymizeUnstructuredConfig(used_function=[])  # No techniques
-    context = build_op_context(op_config=config.model_dump())
-
-    # Act
-    result_gen = depseudonymize_unstructured(context, input_text=original_text)
-    result_output = next(result_gen)
-    metrics_output = next(result_gen)
-
-    # Assert
-    assert (
-        result_output.value == original_text
-    ), "Text should remain unchanged when no decryption is configured"
-    assert (
-        metrics_output.value["total_depseudo_count"] == 0
-    ), "Should report zero decryptions performed"
-
-
-def test_depseudonymize_unstructured_empty_text():
-    """Edge case: Empty input text is returned unchanged with zero decryptions performed."""
-    # Arrange
-    empty_text = ""
-    config = DepseudonymizeUnstructuredConfig(
-        used_function=[
-            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="test_key"))
-        ]
-    )
-    context = build_op_context(op_config=config.model_dump())
-
-    # Act
-    with patch(
-        "src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key"
-    ) as mock_key:
-        mock_key.return_value = Fernet.generate_key()
-        result_gen = depseudonymize_unstructured(context, input_text=empty_text)
-        result_output = next(result_gen)
-        metrics_output = next(result_gen)
-
-    # Assert
-    assert result_output.value == "", "Empty text should remain empty"
-    assert (
-        metrics_output.value["total_depseudo_count"] == 0
-    ), "Should report zero decryptions for empty text"
diff --git a/tests/field_level_pseudo_anonymisation/test_encrypt_structured.py b/tests/field_level_pseudo_anonymisation/test_encrypt_structured.py
deleted file mode 100644
index b89fad3..0000000
--- a/tests/field_level_pseudo_anonymisation/test_encrypt_structured.py
+++ /dev/null
@@ -1,1119 +0,0 @@
-"""
-Test suite for field-level pseudonymisation operations (encrypt technique).
-
-This test suite covers the encryption pseudonymisation technique for structured dataframes,
-validating the following Acceptance Criteria:
-
-## Test Coverage Summary
-
-### Acceptance Criteria Coverage:
-- AC1 (Supported Technique Applied Correctly): 7 tests
-- AC2 (Invalid Execution Handling): 7 tests
-- AC3 (DataFrame Compliance): 6 tests
-- AC4 (Audit Logging - Success): 2 tests
-- AC5 (Audit Logging - Failure): 3 tests
-- Additional Coverage: 7 tests
-
-### Test Pattern:
-- Each test uses build_op_context with config_to_dagster_dict for configuration
-- Tests validate dual outputs (data, metrics)
-- Vault access is mocked for isolation
-
-"""
-
-import pandas as pd
-import pytest
-from dagster import build_op_context
-from cryptography.fernet import Fernet
-from hvac.exceptions import InvalidPath
-from unittest.mock import patch, MagicMock
-
-from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
-    AnonymisePseudonymizeStructuredConfig,
-    EncryptConfig,
-    HashConfig,
-    PseudoTechniqueConfig,
-)
-from template_code_location.field_level_pseudo_anonymisation.ops import anonymize_pseudonymize_structured
-
-# Import helper functions (fixtures are auto-discovered by pytest)
-from .conftest import (
-    run_encrypt_op,
-    clear_vault_key,
-    get_vault_key,
-    config_to_dagster_dict,
-)
-
-
-# -------------------------------- Test Markers Configuration --------------------------------
-
-# Register custom markers
-pytest.mark.slow = pytest.mark.slow
-pytest.mark.security = pytest.mark.security
-pytest.mark.edge_case = pytest.mark.edge_case
-
-
-# -------------------------------- Test-Specific Fixtures ----------------------------------------
-
-
-@pytest.fixture
-def encrypt_single_column_config():
-    """
-    Configuration for encrypting a single column (email).
-    Tests basic encryption functionality.
-    """
-    return AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt", columns=["email"], key_name="test_email_key"
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def encrypt_multiple_columns_config():
-    """
-    Configuration for encrypting multiple columns (name, email).
-    Tests encryption across multiple fields.
-    """
-    return AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt", columns=["name", "email"], key_name="test_multi_key"
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def encrypt_mixed_types_config():
-    """
-    Configuration for encrypting columns with different data types.
-    Tests that encryption handles type conversion (int, float -> string).
-    """
-    return AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    columns=["id", "age", "salary"],
-                    key_name="test_numeric_key",
-                )
-            )
-        ]
-    )
-
-
-@pytest.fixture
-def encrypt_with_unchanged_columns_config():
-    """
-    Configuration that encrypts some columns while leaving others unchanged.
-    Tests AC3 requirement for unchanged column preservation.
-    """
-    return AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt", columns=["email"], key_name="test_partial_key"
-                )
-            )
-        ]
-    )
-
-
-# -------------------------------- Test-Specific Fixtures ----------------------------------------
-
-
-def test_encrypt_single_column_applied_correctly(sample_df, encrypt_single_column_config):
-    """
-    AC1: Tests that encryption is applied correctly to a single column.
-
-    Scenario: The system applies encryption to the 'email' field
-    Given: A structured dataset with an email column
-    And: A valid encryption configuration for the email field
-    When: The participant triggers the execution
-    Then: The email field must be transformed with Fernet encryption
-    And: The encrypted values must be different from the original values
-    And: The encrypted values must be valid Fernet tokens (decodable)
-    """
-    # Clear any existing test key
-    clear_vault_key("test_email_key")
-
-    result_df, metrics = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
-
-    # Verify output structure
-    assert result_df is not None, "Result DataFrame should not be None"
-    assert metrics is not None, "Metrics should not be None"
-
-    # Verify email column is encrypted (values changed)
-    assert not result_df["email"].equals(
-        sample_df["email"]
-    ), "Email column should be encrypted (values should change)"
-
-    # Verify all encrypted values are different from originals
-    for orig, enc in zip(sample_df["email"], result_df["email"]):
-        assert orig != enc, f"Original value '{orig}' should be encrypted"
-
-    # Verify encrypted values are valid Fernet tokens (can be decrypted)
-    key = get_vault_key("test_email_key")
-    f = Fernet(key)
-    for enc_value in result_df["email"]:
-        decrypted = f.decrypt(enc_value.encode()).decode()
-        assert (
-            decrypted in sample_df["email"].values
-        ), f"Decrypted value '{decrypted}' should match an original email"
-
-    # Verify row count is preserved
-    assert len(result_df) == len(sample_df), "Row count should be preserved"
-
-
-def test_encrypt_multiple_columns_applied_correctly(sample_df, encrypt_multiple_columns_config):
-    """
-    AC1: Tests that encryption is applied correctly to multiple columns.
-
-    Scenario: The system applies encryption to multiple fields (name, email)
-    Given: A structured dataset with name and email columns
-    And: A valid encryption configuration for both fields
-    When: The participant triggers the execution
-    Then: Both fields must be transformed with Fernet encryption
-    And: Each field uses the same encryption key (as specified)
-    """
-    clear_vault_key("test_multi_key")
-
-    result_df, metrics = run_encrypt_op(encrypt_multiple_columns_config, sample_df.copy())
-
-    # Verify both columns are encrypted
-    assert not result_df["name"].equals(sample_df["name"]), "Name column should be encrypted"
-    assert not result_df["email"].equals(sample_df["email"]), "Email column should be encrypted"
-
-    # Verify all values are encrypted
-    key = get_vault_key("test_multi_key")
-    f = Fernet(key)
-
-    for enc_name in result_df["name"]:
-        decrypted = f.decrypt(enc_name.encode()).decode()
-        assert decrypted in sample_df["name"].values
-
-    for enc_email in result_df["email"]:
-        decrypted = f.decrypt(enc_email.encode()).decode()
-        assert decrypted in sample_df["email"].values
-
-
-def test_encrypt_numeric_columns_applied_correctly(sample_df, encrypt_mixed_types_config):
-    """
-    AC1: Tests that encryption handles numeric data types correctly.
-
-    Scenario: The system applies encryption to numeric fields (id, age, salary)
-    Given: A structured dataset with integer and float columns
-    And: A valid encryption configuration for numeric fields
-    When: The participant triggers the execution
-    Then: Numeric values must be converted to strings and encrypted
-    And: Original numeric values should be recoverable via decryption
-    """
-    clear_vault_key("test_numeric_key")
-
-    result_df, metrics = run_encrypt_op(encrypt_mixed_types_config, sample_df.copy())
-
-    # Verify all numeric columns are now string type (encrypted)
-    assert result_df["id"].dtype == object, "Encrypted id should be object/string type"
-    assert result_df["age"].dtype == object, "Encrypted age should be object/string type"
-    assert result_df["salary"].dtype == object, "Encrypted salary should be object/string type"
-
-    # Verify original numeric values can be recovered
-    key = get_vault_key("test_numeric_key")
-    f = Fernet(key)
-
-    for enc_id in result_df["id"]:
-        decrypted = int(f.decrypt(enc_id.encode()).decode())
-        assert decrypted in sample_df["id"].values
-
-
-def test_encrypt_key_generation_on_first_use(sample_df, encrypt_single_column_config):
-    """
-    AC1: Tests that encryption key is automatically generated and stored in Vault.
-
-    Scenario: First-time encryption generates a key automatically
-    Given: A structured dataset with valid configuration
-    And: No encryption key exists in Vault for the specified key_name
-    When: The participant triggers the execution
-    Then: The system must generate a new Fernet key
-    And: Store it in Vault at the specified path
-    And: Use it for encryption
-    """
-    clear_vault_key("test_email_key")
-
-    # Verify key doesn't exist before encryption
-    with pytest.raises(InvalidPath):
-        get_vault_key("test_email_key")
-
-    result_df, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
-
-    # Verify key was created
-    key = get_vault_key("test_email_key")
-    assert key is not None, "Encryption key should be created in Vault"
-    assert len(key) == 44, "Fernet key should be 44 bytes (base64 encoded 32 bytes)"
-
-    # Verify the key works for decryption
-    f = Fernet(key)
-    for enc_email in result_df["email"]:
-        decrypted = f.decrypt(enc_email.encode()).decode()
-        assert decrypted in sample_df["email"].values
-
-
-def test_encrypt_uses_existing_vault_key(sample_df, encrypt_single_column_config):
-    """
-    AC1: Tests that encryption uses an existing key from Vault if present.
-
-    Scenario: Encryption reuses existing key for consistent pseudonymisation
-    Given: A structured dataset
-    And: An encryption key already exists in Vault
-    When: The participant triggers the execution
-    Then: The system must use the existing key (not generate a new one)
-    And: The same input produces the same encrypted output (deterministic with same key)
-    """
-    clear_vault_key("test_email_key")
-
-    # First encryption - generates key
-    result_df_1, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
-    key_1 = get_vault_key("test_email_key")
-
-    # Second encryption - should use same key
-    result_df_2, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
-    key_2 = get_vault_key("test_email_key")
-
-    # Verify same key is used
-    assert key_1 == key_2, "Encryption should reuse existing Vault key"
-
-
-# ----------------------- AC2: Invalid Execution Handling ------------------------------------
-
-
-def test_encrypt_missing_column_error(encrypt_single_column_config):
-    """
-    AC2: Tests graceful error handling when a specified column doesn't exist.
-
-    Scenario: The system aborts gracefully when column is missing
-    Given: A structured dataset
-    And: A configuration specifying a non-existent column
-    When: The participant triggers the execution
-    Then: The system must raise a clear ValueError
-    And: The error message must indicate which columns are missing
-    """
-    df_missing_column = pd.DataFrame(
-        {
-            "id": [1, 2, 3],
-            "name": ["Alice", "Bob", "Charlie"],
-            "age": [25, 30, 35],
-            # Missing 'email' column
-        }
-    )
-
-    with pytest.raises(ValueError) as exc_info:
-        run_encrypt_op(encrypt_single_column_config, df_missing_column)
-
-    assert "not present in the DataFrame" in str(
-        exc_info.value
-    ), "Error message should indicate missing columns"
-    assert "email" in str(exc_info.value), "Error message should mention the missing 'email' column"
-
-
-def test_encrypt_empty_dataframe_handled(encrypt_single_column_config):
-    """
-    AC2: Tests graceful handling of empty DataFrame input.
-
-    Scenario: The system processes empty DataFrame without errors
-    Given: An empty structured dataset (no rows)
-    And: A valid encryption configuration
-    When: The participant triggers the execution
-    Then: The system must return an empty DataFrame with correct schema
-    And: No errors should be raised
-    """
-    clear_vault_key("test_email_key")
-
-    empty_df = pd.DataFrame(columns=["id", "name", "email", "age", "salary", "department"])
-
-    result_df, metrics = run_encrypt_op(encrypt_single_column_config, empty_df)
-
-    assert len(result_df) == 0, "Result should be empty"
-    assert "email" in result_df.columns, "Email column should exist in schema"
-
-
-def test_encrypt_vault_connection_error():
-    """
-    AC2: Tests error handling when Vault is unreachable.
-
-    Scenario: The system fails gracefully when Vault is unavailable
-    Given: A structured dataset with valid configuration
-    When: Vault service is unreachable or misconfigured
-    Then: The system must raise a clear error
-    And: The error message must indicate the Vault connection issue
-
-    Note: This test requires Vault to be down or uses a bad URL.
-    For testing purposes, we simulate by using invalid credentials.
-    """
-    # Create a mock client that raises an exception when accessing Vault
-    mock_client_instance = MagicMock()
-    mock_client_instance.secrets.kv.v2.read_secret_version.side_effect = Exception(
-        "Simulated Vault connection error"
-    )
-
-    with patch("hvac.Client", return_value=mock_client_instance):
-        df = pd.DataFrame(
-            {
-                "id": [1],
-                "name": ["Test"],
-                "email": ["test@example.com"],
-                "age": [30],
-                "salary": [50000.0],
-                "department": ["IT"],
-            }
-        )
-        config = AnonymisePseudonymizeStructuredConfig(
-            used_function=[
-                PseudoTechniqueConfig(
-                    technique=EncryptConfig(
-                        type="encrypt", columns=["email"], key_name="test_email_key"
-                    )
-                )
-            ]
-        )
-        with pytest.raises(ValueError) as exc_info:
-            run_encrypt_op(config, df)
-
-        error_message = str(exc_info.value)
-        assert (
-            "Simulated Vault connection error" in error_message
-        ), "Error should indicate Vault connection issue"
-
-
-def test_encrypt_null_values_handled(encrypt_single_column_config):
-    """
-    AC2: Tests handling of NULL/NaN values in encrypted columns.
-
-    Scenario: The system handles null values appropriately
-    Given: A structured dataset with NULL values in the column to encrypt
-    And: A valid encryption configuration
-    When: The participant triggers the execution
-    Then: The system must process null values (encrypt "nan" string or handle appropriately)
-    And: Not raise an exception
-    """
-    clear_vault_key("test_email_key")
-
-    df_with_nulls = pd.DataFrame(
-        {
-            "id": [1, 2, 3, 4],
-            "name": ["Alice", "Bob", "Charlie", "David"],
-            "email": ["alice@example.com", None, "charlie@example.com", pd.NA],
-            "age": [25, 30, 35, 40],
-            "salary": [50000.0, 60000.0, 70000.0, 80000.0],
-            "department": ["HR", "IT", "Finance", "IT"],
-        }
-    )
-
-    result_df, metrics = run_encrypt_op(encrypt_single_column_config, df_with_nulls)
-
-    # Verify execution completed without errors
-    assert result_df is not None
-    assert len(result_df) == 4
-
-    # Verify null values were processed (encrypted as string "None" or "nan")
-    key = get_vault_key("test_email_key")
-    f = Fernet(key)
-
-    # The null values get converted to string "None" or "nan" before encryption
-    for enc_email in result_df["email"]:
-        decrypted = f.decrypt(enc_email.encode()).decode()
-        # Decrypted value should be original or string representation of null
-        assert decrypted in [
-            "alice@example.com",
-            "charlie@example.com",
-            "None",
-            "nan",
-            "<NA>",
-        ]
-
-
-def test_encrypt_duplicate_column_configuration_error():
-    """
-    AC2: Tests that duplicate columns across techniques are rejected.
-
-    Scenario: Configuration validation prevents duplicate column assignments
-    Given: A configuration that assigns the same column to multiple techniques
-    When: The configuration is validated
-    Then: The system must raise a ValueError during configuration creation
-    And: The error message must indicate duplicate column assignment
-    """
-    with pytest.raises(ValueError) as exc_info:
-        AnonymisePseudonymizeStructuredConfig(
-            used_function=[
-                PseudoTechniqueConfig(
-                    technique=EncryptConfig(type="encrypt", columns=["email"], key_name="key1")
-                ),
-                PseudoTechniqueConfig(
-                    technique=HashConfig(
-                        type="hash",
-                        columns=["email"],  # Duplicate column
-                        algorithm="sha256",
-                    )
-                ),
-            ]
-        )
-
-    assert "Duplicate column" in str(
-        exc_info.value
-    ), "Error should indicate duplicate column configuration"
-
-
-# ------------------ AC3: DataFrame Input and Output Compliance ------------------------------
-
-
-def test_encrypt_dataframe_input_output_format(sample_df, encrypt_single_column_config):
-    """
-    AC3: Tests that input and output are both pandas DataFrames.
-
-    Scenario: The system accepts DataFrame input and returns DataFrame output
-    Given: A structured dataset as pandas DataFrame
-    And: A valid encryption configuration
-    When: The participant triggers the execution
-    Then: The system must return a pandas DataFrame
-    And: The DataFrame structure must be preserved
-    """
-    clear_vault_key("test_email_key")
-
-    result_df, metrics = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
-
-    # Verify output is a DataFrame
-    assert isinstance(result_df, pd.DataFrame), "Output must be a pandas DataFrame"
-
-    # Verify DataFrame structure preserved
-    assert list(result_df.columns) == list(sample_df.columns), "Column names should be preserved"
-    assert len(result_df) == len(sample_df), "Row count should be preserved"
-
-
-def test_encrypt_data_types_transformed_correctly(sample_df, encrypt_mixed_types_config):
-    """
-    AC3: Tests that data types are transformed appropriately after encryption.
-
-    Scenario: Encrypted columns change to string type
-    Given: A structured dataset with various data types (int, float, str)
-    And: An encryption configuration for multiple columns
-    When: The participant triggers the execution
-    Then: All encrypted columns must be of type object/string
-    And: This transformation is valid and consistent with encryption technique
-    """
-    clear_vault_key("test_numeric_key")
-
-    # Store original types
-    original_types = sample_df.dtypes.to_dict()
-
-    result_df, _ = run_encrypt_op(encrypt_mixed_types_config, sample_df.copy())
-
-    # Verify encrypted columns are now object/string type
-    assert result_df["id"].dtype == object, "Encrypted integer column should become object type"
-    assert result_df["age"].dtype == object, "Encrypted integer column should become object type"
-    assert result_df["salary"].dtype == object, "Encrypted float column should become object type"
-
-    # Verify data types changed (not same as original)
-    assert result_df["id"].dtype != original_types["id"], "Data type should change after encryption"
-
-
-def test_encrypt_unchanged_columns_preserved(sample_df, encrypt_with_unchanged_columns_config):
-    """
-    AC3: Tests that columns not specified for encryption remain unchanged.
-
-    Scenario: Non-encrypted columns remain identical
-    Given: A structured dataset with multiple columns
-    And: An encryption configuration for only one column (email)
-    When: The participant triggers the execution
-    Then: Columns not specified (id, name, age, salary, department) must remain unchanged
-    And: Their values and data types must be identical to the input
-    """
-    clear_vault_key("test_partial_key")
-
-    result_df, _ = run_encrypt_op(encrypt_with_unchanged_columns_config, sample_df.copy())
-
-    # Verify unchanged columns are identical
-    assert result_df["id"].equals(sample_df["id"]), "ID column should remain unchanged"
-    assert result_df["name"].equals(sample_df["name"]), "Name column should remain unchanged"
-    assert result_df["age"].equals(sample_df["age"]), "Age column should remain unchanged"
-    assert result_df["salary"].equals(sample_df["salary"]), "Salary column should remain unchanged"
-    assert result_df["department"].equals(
-        sample_df["department"]
-    ), "Department column should remain unchanged"
-
-    # Verify encrypted column is changed
-    assert not result_df["email"].equals(
-        sample_df["email"]
-    ), "Email column should be encrypted (changed)"
-
-
-def test_encrypt_schema_consistency(sample_df, encrypt_multiple_columns_config):
-    """
-    AC3: Tests that DataFrame schema is consistent and coherent.
-
-    Scenario: Output DataFrame has consistent schema
-    Given: A structured dataset
-    And: A multi-column encryption configuration
-    When: The participant triggers the execution
-    Then: Output DataFrame must have same column names as input
-    And: Column order must be preserved
-    And: No columns should be added or removed
-    """
-    clear_vault_key("test_multi_key")
-
-    result_df, _ = run_encrypt_op(encrypt_multiple_columns_config, sample_df.copy())
-
-    # Verify column names are identical
-    assert list(result_df.columns) == list(sample_df.columns), "Column names must be identical"
-
-    # Verify column order is preserved
-    for i, col in enumerate(sample_df.columns):
-        assert result_df.columns[i] == col, f"Column order should be preserved at position {i}"
-
-    # Verify no extra columns added
-    assert len(result_df.columns) == len(
-        sample_df.columns
-    ), "Number of columns should remain the same"
-
-
-def test_encrypt_index_preservation(sample_df, encrypt_single_column_config):
-    """
-    AC3: Tests that DataFrame index is preserved after encryption.
-
-    Scenario: DataFrame index remains unchanged
-    Given: A structured dataset with default index
-    And: A valid encryption configuration
-    When: The participant triggers the execution
-    Then: The output DataFrame must preserve the original index
-    And: No extraneous index column should be added
-    """
-    clear_vault_key("test_email_key")
-
-    # Set custom index to verify preservation
-    sample_df_with_index = sample_df.copy()
-    sample_df_with_index.index = [10, 20, 30, 40, 50]
-
-    result_df, _ = run_encrypt_op(encrypt_single_column_config, sample_df_with_index)
-
-    # Verify index is preserved
-    assert list(result_df.index) == list(
-        sample_df_with_index.index
-    ), "DataFrame index should be preserved"
-
-
-# ------------- AC4: Execution Audit & Logging - Positive Scenario ---------------------------
-
-
-def test_encrypt_successful_execution_logging(sample_df, encrypt_single_column_config):
-    """
-    AC4: Tests that successful execution produces appropriate logs/metadata.
-
-    Scenario: Successful pseudonymisation execution is logged
-    Given: A structured dataset with valid configuration
-    When: The participant triggers the execution
-    And: The execution completes successfully
-    Then: The system must return metrics output
-    And: Metrics should confirm successful operation
-
-    Note: Dagster automatically logs:
-    - Timestamp of execution (run start/end times)
-    - Workflow run identifier (run_id)
-    - Configuration parameters (captured in op_config)
-    - Success status (run status in Dagster UI)
-
-    This test validates the op returns proper outputs for Dagster to log.
-    """
-    clear_vault_key("test_email_key")
-
-    op_config_dict = config_to_dagster_dict(encrypt_single_column_config)
-    context = build_op_context(op_config=op_config_dict)
-
-    # Capture run context information
-    run_id = context.run_id
-
-    # Execute the operation
-    result_df, metrics = anonymize_pseudonymize_structured(context, df=sample_df.copy())
-
-    # Verify outputs for logging
-    assert result_df is not None, "Data output should be present for logging"
-    assert metrics is not None, "Metrics output should be present for logging"
-    assert isinstance(metrics.value, dict), "Metrics should be a dict"
-
-    # Verify run context is available (Dagster provides this automatically)
-    assert run_id is not None, "Run ID should be available for audit logging"
-
-    # Verify configuration is captured (can be logged)
-    assert "used_function" in op_config_dict, "Configuration should be captured for audit"
-    # In Dagster format, technique is nested under the discriminator key
-    technique_config = op_config_dict["used_function"][0]["technique"]
-    assert "encrypt" in technique_config, "Encrypt technique should be present"
-    assert (
-        technique_config["encrypt"]["key_name"] == "test_email_key"
-    ), "Key name should be logged (but not key value)"
-
-    # Verify no PII is in metrics (compliance requirement)
-    metrics_str = str(metrics.value)
-    for email in sample_df["email"]:
-        assert email not in metrics_str, "PII values should not appear in metrics/logs"
-
-
-def test_encrypt_configuration_parameters_logged(sample_df, encrypt_multiple_columns_config):
-    """
-    AC4: Tests that configuration parameters are properly captured for audit.
-
-    Scenario: Configuration details are available for compliance logging
-    Given: A multi-column encryption configuration
-    When: The participant triggers the execution
-    Then: The system must capture configuration parameters including:
-    - Selected technique (encrypt)
-    - Columns to encrypt
-    - Key name (but not key value)
-    And: These parameters should be accessible for audit logging
-    """
-    clear_vault_key("test_multi_key")
-
-    op_config_dict = config_to_dagster_dict(encrypt_multiple_columns_config)
-    context = build_op_context(op_config=op_config_dict)
-
-    result_df, metrics = anonymize_pseudonymize_structured(context, df=sample_df.copy())
-
-    # Verify configuration details are captured
-    technique_config = op_config_dict["used_function"][0]["technique"]
-    assert "encrypt" in technique_config, "Encrypt technique should be present"
-    assert set(technique_config["encrypt"]["columns"]) == {"name", "email"}
-    assert technique_config["encrypt"]["key_name"] == "test_multi_key"
-
-    # Verify encryption key itself is NOT in config (security)
-    config_str = str(op_config_dict)
-    try:
-        key = get_vault_key("test_multi_key")
-        assert (
-            key.decode() not in config_str
-        ), "Encryption key value should never be in logged configuration"
-    except Exception:
-        pass  # Key might not exist yet
-
-
-# ------------- AC5: Execution Audit & Logging - Negative Scenario ---------------------------
-
-
-def test_encrypt_failed_execution_logging(encrypt_single_column_config):
-    """
-    AC5: Tests that failed execution provides error details for audit.
-
-    Scenario: Failed pseudonymisation execution is logged with error details
-    Given: A structured dataset with valid configuration
-    When: The participant triggers the execution
-    And: The execution fails (e.g., missing column)
-    Then: The system must raise an exception with clear error message
-    And: The error message should indicate the failure reason
-    And: Configuration parameters should still be accessible for audit
-    And: No PII should be exposed in error messages
-    """
-    df_missing_column = pd.DataFrame(
-        {
-            "id": [1, 2, 3],
-            "name": ["Alice", "Bob", "Charlie"],
-            # Missing 'email' column - will cause failure
-        }
-    )
-
-    op_config_dict = config_to_dagster_dict(encrypt_single_column_config)
-    context = build_op_context(op_config=op_config_dict)
-    run_id = context.run_id
-
-    # Execute and capture failure
-    with pytest.raises(ValueError) as exc_info:
-        # Need to consume the generator to trigger execution
-        list(anonymize_pseudonymize_structured(context, df=df_missing_column))
-
-    # Verify error details are available for logging
-    error_message = str(exc_info.value)
-    assert (
-        "not present in the DataFrame" in error_message
-    ), "Error message should explain failure reason"
-    assert "email" in error_message, "Error message should mention the problematic column"
-
-    # Verify run context is available for failure logging
-    assert run_id is not None, "Run ID should be available for failure audit"
-
-    # Verify configuration is still accessible for audit
-    assert op_config_dict is not None, "Configuration should be accessible for failure audit"
-
-    # Verify no actual data values in error message (PII protection)
-    for name in ["Alice", "Bob", "Charlie"]:
-        assert name not in error_message, "PII values should not appear in error messages"
-
-
-def test_encrypt_stack_trace_available_on_failure(encrypt_single_column_config):
-    """
-    AC5: Tests that stack trace is available for debugging failed executions.
-
-    Scenario: Failed execution provides stack trace for troubleshooting
-    Given: A configuration that will cause failure
-    When: The execution fails
-    Then: Python exception with stack trace should be raised
-    And: Stack trace should be available for logging (Dagster captures this)
-    And: Stack trace should not contain PII values
-    """
-    df_missing_column = pd.DataFrame({"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]})
-
-    try:
-        run_encrypt_op(encrypt_single_column_config, df_missing_column)
-        pytest.fail("Should have raised ValueError")
-    except ValueError:
-        # Verify exception information is available
-        import traceback
-
-        stack_trace = traceback.format_exc()
-
-        assert "ValueError" in stack_trace, "Exception type should be in stack trace"
-        assert (
-            "not present in the DataFrame" in stack_trace
-        ), "Error message should be in stack trace"
-
-        # Verify stack trace contains code location
-        assert (
-            "ops.py" in stack_trace or "anonymize_pseudonymize_structured" in stack_trace
-        ), "Stack trace should indicate error location"
-
-
-def test_encrypt_vault_error_logged_appropriately(sample_df):
-    """
-    AC5: Tests that Vault-related errors are logged with appropriate detail.
-
-    Scenario: Vault connection/authentication errors are captured
-    Given: A configuration with invalid Vault setup
-    When: The execution attempts to access Vault
-    And: Vault access fails
-    Then: The system must raise an error with Vault-specific details
-    And: The error should indicate the Vault-related nature of the failure
-
-    Note: This test validates error handling structure; actual Vault errors
-    depend on Vault availability.
-    """
-    # Create a mock client that raises an exception when accessing Vault
-    mock_client_instance = MagicMock()
-    mock_client_instance.secrets.kv.v2.read_secret_version.side_effect = Exception(
-        "Simulated Vault authentication error"
-    )
-
-    with patch("hvac.Client", return_value=mock_client_instance):
-        config = AnonymisePseudonymizeStructuredConfig(
-            used_function=[
-                PseudoTechniqueConfig(
-                    technique=EncryptConfig(
-                        type="encrypt", columns=["email"], key_name="test_email_key"
-                    )
-                )
-            ]
-        )
-        with pytest.raises(ValueError) as exc_info:
-            run_encrypt_op(config, sample_df)
-
-        error_message = str(exc_info.value)
-        assert (
-            "Simulated Vault authentication error" in error_message
-        ), "Error should indicate Vault-related failure"
-
-
-# --------------- Additional Edge Cases & Integration Tests ----------------------------------
-
-
-def test_encrypt_large_dataset_performance(encrypt_single_column_config):
-    """
-    Additional test: Validates encryption works with larger datasets.
-
-    Tests that encryption scales to realistic dataset sizes without errors.
-    """
-    clear_vault_key("test_email_key")
-
-    # Create a larger dataset (1000 rows)
-    large_df = pd.DataFrame(
-        {
-            "id": range(1000),
-            "name": [f"Person{i}" for i in range(1000)],
-            "email": [f"person{i}@example.com" for i in range(1000)],
-            "age": [25 + (i % 50) for i in range(1000)],
-            "salary": [50000.0 + (i * 100) for i in range(1000)],
-            "department": ["HR", "IT", "Finance"] * 333 + ["HR"],
-        }
-    )
-
-    # Save original values for comparison
-    original_emails = large_df["email"].copy()
-
-    result_df, metrics = run_encrypt_op(encrypt_single_column_config, large_df)
-
-    assert len(result_df) == 1000, "All rows should be processed"
-    assert not result_df["email"].equals(original_emails), "All email values should be encrypted"
-
-
-def test_encrypt_special_characters_in_data(encrypt_single_column_config):
-    """
-    Additional test: Validates encryption handles special characters correctly.
-
-    Tests that encryption works with unicode, special chars, emojis, etc.
-    """
-    clear_vault_key("test_email_key")
-
-    df_special = pd.DataFrame(
-        {
-            "id": [1, 2, 3, 4],
-            "name": ["Müller", "José", "李明", "🙂 John"],
-            "email": [
-                "test@müller.de",
-                "josé@example.com",
-                "李明@example.cn",
-                "emoji@😀.com",
-            ],
-            "age": [25, 30, 35, 40],
-            "salary": [50000.0, 60000.0, 70000.0, 80000.0],
-            "department": ["HR", "IT", "Finance", "IT"],
-        }
-    )
-
-    # Save original values for comparison
-    original_emails = df_special["email"].copy().tolist()
-
-    result_df, metrics = run_encrypt_op(encrypt_single_column_config, df_special)
-
-    # Verify special characters are encrypted and recoverable
-    key = get_vault_key("test_email_key")
-    f = Fernet(key)
-
-    decrypted_emails = [f.decrypt(enc.encode()).decode() for enc in result_df["email"]]
-    assert set(decrypted_emails) == set(
-        original_emails
-    ), "Special characters should be preserved through encryption/decryption"
-
-
-def test_encrypt_deterministic_within_session(sample_df, encrypt_single_column_config):
-    """
-    Additional test: Validates encryption produces consistent results with same key.
-
-    Note: Fernet encryption includes a timestamp, so it's NOT deterministic.
-    This test validates that decryption recovers the original value consistently.
-    """
-    clear_vault_key("test_email_key")
-
-    # First encryption
-    result_df_1, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
-
-    # Get the key used
-    key = get_vault_key("test_email_key")
-    f = Fernet(key)
-
-    # Verify first encryption decrypts correctly
-    decrypted_1 = [f.decrypt(enc.encode()).decode() for enc in result_df_1["email"]]
-    assert decrypted_1 == sample_df["email"].tolist(), "Decryption should recover original values"
-
-    # Second encryption with same key (different encrypted values due to timestamp)
-    result_df_2, _ = run_encrypt_op(encrypt_single_column_config, sample_df.copy())
-
-    # Verify second encryption also decrypts correctly
-    decrypted_2 = [f.decrypt(enc.encode()).decode() for enc in result_df_2["email"]]
-    assert (
-        decrypted_2 == sample_df["email"].tolist()
-    ), "Decryption should consistently recover original values"
-
-    # Note: Encrypted values will be different due to Fernet's timestamp
-    assert not result_df_1["email"].equals(
-        result_df_2["email"]
-    ), "Fernet encryption includes timestamp, so outputs differ"
-
-
-def test_encrypt_empty_string_values(encrypt_single_column_config):
-    """
-    Additional test: Validates encryption handles empty strings correctly.
-    """
-    clear_vault_key("test_email_key")
-
-    df_empty_strings = pd.DataFrame(
-        {
-            "id": [1, 2, 3],
-            "name": ["Alice", "", "Charlie"],
-            "email": ["alice@example.com", "", "charlie@example.com"],
-            "age": [25, 30, 35],
-            "salary": [50000.0, 60000.0, 70000.0],
-            "department": ["HR", "IT", "Finance"],
-        }
-    )
-
-    result_df, _ = run_encrypt_op(encrypt_single_column_config, df_empty_strings)
-
-    # Verify empty strings are encrypted
-    key = get_vault_key("test_email_key")
-    f = Fernet(key)
-
-    decrypted_emails = [f.decrypt(enc.encode()).decode() for enc in result_df["email"]]
-    assert "" in decrypted_emails, "Empty strings should be encrypted and recoverable"
-
-
-@pytest.mark.edge_case
-def test_encrypt_very_long_strings(encrypt_single_column_config):
-    """
-    Edge case: Encryption of very long string values (e.g., 10KB+)
-
-    Validates that Fernet encryption handles large strings without truncation.
-    """
-    clear_vault_key("test_email_key")
-
-    # Create DataFrame with very long strings
-    long_string = "x" * 10000  # 10KB string
-    df_long_strings = pd.DataFrame(
-        {
-            "id": [1, 2, 3],
-            "name": ["Alice", "Bob", "Charlie"],
-            "email": [
-                f"{long_string}@example.com",
-                "bob@example.com",
-                "charlie@example.com",
-            ],
-            "age": [25, 30, 35],
-            "salary": [50000.0, 60000.0, 70000.0],
-            "department": ["HR", "IT", "Finance"],
-        }
-    )
-
-    result_df, _ = run_encrypt_op(encrypt_single_column_config, df_long_strings)
-
-    # Verify long string is encrypted and recoverable
-    key = get_vault_key("test_email_key")
-    f = Fernet(key)
-    decrypted = f.decrypt(result_df.loc[0, "email"].encode()).decode()
-    assert (
-        decrypted == f"{long_string}@example.com"
-    ), "Very long strings should be encrypted and recoverable"
-
-
-@pytest.mark.edge_case
-def test_encrypt_column_with_all_identical_values(encrypt_single_column_config):
-    """
-    Edge case: Encryption when all values in a column are identical
-
-    Validates that encryption produces different outputs for identical inputs
-    (due to Fernet's timestamp-based nonce).
-    """
-    clear_vault_key("test_email_key")
-
-    df_identical = pd.DataFrame(
-        {
-            "id": [1, 2, 3, 4, 5],
-            "name": ["Alice"] * 5,
-            "email": ["same@example.com"] * 5,  # All identical
-            "age": [30] * 5,
-            "salary": [60000.0] * 5,
-            "department": ["IT"] * 5,
-        }
-    )
-
-    result_df, _ = run_encrypt_op(encrypt_single_column_config, df_identical)
-
-    # Verify all encrypted values are unique (due to Fernet timestamp)
-    encrypted_values = result_df["email"].tolist()
-    assert (
-        len(set(encrypted_values)) == 5
-    ), "Fernet should produce unique ciphertexts even for identical plaintexts"
-
-    # Verify all decrypt to same original value
-    key = get_vault_key("test_email_key")
-    f = Fernet(key)
-    decrypted_values = [f.decrypt(enc.encode()).decode() for enc in encrypted_values]
-    assert all(
-        val == "same@example.com" for val in decrypted_values
-    ), "All encrypted values should decrypt to same original"
-
-
-@pytest.mark.edge_case
-def test_encrypt_whitespace_only_values(encrypt_single_column_config):
-    """
-    Edge case: Encryption of whitespace-only values
-    """
-    clear_vault_key("test_email_key")
-
-    df_whitespace = pd.DataFrame(
-        {
-            "id": [1, 2, 3],
-            "name": ["Alice", "Bob", "Charlie"],
-            "email": ["   ", "\t\t", "\n\n"],  # Various whitespace
-            "age": [25, 30, 35],
-            "salary": [50000.0, 60000.0, 70000.0],
-            "department": ["HR", "IT", "Finance"],
-        }
-    )
-
-    # Store original values before encryption
-    original_emails = df_whitespace["email"].tolist()
-
-    result_df, _ = run_encrypt_op(encrypt_single_column_config, df_whitespace)
-
-    # Verify whitespace values are encrypted and recoverable
-    key = get_vault_key("test_email_key")
-    f = Fernet(key)
-    encrypted_emails = result_df["email"].tolist()
-
-    for orig_ws, enc_val in zip(original_emails, encrypted_emails):
-        decrypted = f.decrypt(enc_val.encode()).decode()
-        assert (
-            decrypted == orig_ws
-        ), f"Whitespace value {repr(orig_ws)} should be preserved, but got {repr(decrypted)}"
-
-
-@pytest.mark.edge_case
-@pytest.mark.parametrize(
-    "column_type,test_values",
-    [
-        ("integer", [1, 2, 3, 4, 5]),
-        ("float", [1.1, 2.2, 3.3, 4.4, 5.5]),
-        ("string", ["a", "b", "c", "d", "e"]),
-    ],
-)
-def test_encrypt_various_data_types(column_type, test_values):
-    """
-    Parameterized test: Encryption across different pandas data types
-    """
-    clear_vault_key("test_type_key")
-
-    df = pd.DataFrame(
-        {
-            "id": range(len(test_values)),
-            "test_column": test_values,
-            "name": ["Person"] * len(test_values),
-            "email": ["test@example.com"] * len(test_values),
-            "age": [30] * len(test_values),
-            "salary": [60000.0] * len(test_values),
-            "department": ["IT"] * len(test_values),
-        }
-    )
-
-    config = AnonymisePseudonymizeStructuredConfig(
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt", columns=["test_column"], key_name="test_type_key"
-                )
-            )
-        ]
-    )
-
-    result_df, _ = run_encrypt_op(config, df)
-
-    # Verify encryption occurred (values changed to strings)
-    assert (
-        result_df["test_column"].dtype == object
-    ), f"Encrypted {column_type} should become object type"
-
-    # Verify decryption recovers original values
-    key = get_vault_key("test_type_key")
-    f = Fernet(key)
-    for idx, orig_val in enumerate(test_values):
-        decrypted = f.decrypt(result_df.loc[idx, "test_column"].encode()).decode()
-        assert decrypted == str(
-            orig_val
-        ), f"Decrypted value should match original {column_type} value"
diff --git a/tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py b/tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py
deleted file mode 100644
index 8d6a3cc..0000000
--- a/tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py
+++ /dev/null
@@ -1,853 +0,0 @@
-"""
-Test suite for field-level pseudonymisation operations on unstructured data.
-
-This test suite validates the pseudonymisation of unstructured text with PII detection,
-covering the following Acceptance Criteria:
-
-## Test Coverage Summary
-
-### Acceptance Criteria Coverage:
-- AC1 (Pseudonymisation and Retention Applied Correctly): 8 tests
-- AC2 (Invalid Execution Handling): 5 tests
-- AC3 (Execution Audit & Logging - Positive Scenario): 3 tests
-- AC4 (Execution Audit & Logging - Negative Scenario): 4 tests
-- Additional Coverage: 3 tests
-
-### Test Pattern:
-- Each test uses build_op_context with config_to_dagster_dict for configuration
-- Tests validate dual outputs (data, metrics)
-- Vault access is mocked for isolation
-- Tests validate Scrubadub automatic PII detection
-- Tests ensure placeholder replacement for unconfigured PII
-"""
-
-import pytest
-import re
-from dagster import build_op_context
-from unittest.mock import patch, MagicMock
-
-from template_code_location.field_level_pseudo_anonymisation.config_models.unstructured_config import (
-    AnonymisePseudonymizeUnstructuredConfig,
-    EncryptConfig,
-    RetainConfig,
-    PseudoTechniqueConfig,
-)
-from template_code_location.field_level_pseudo_anonymisation.config_models import PIIEntityEnum, LanguageEnum
-from template_code_location.field_level_pseudo_anonymisation.unstructured_ops import (
-    anonymize_pseudonymize_unstructured,
-)
-
-from .conftest import clear_vault_key
-
-
-def config_to_dagster_dict_unstructured(config):
-    """Convert unstructured config to Dagster format."""
-    config_dict = {"language": config.language.value, "used_function": []}
-
-    for func_config in config.used_function:
-        technique = func_config.technique
-        technique_type = technique.type
-        technique_dict = technique.model_dump()
-
-        if "pii" in technique_dict:
-            technique_dict["pii"] = [pii_enum.name for pii_enum in technique.pii]
-
-        technique_dict_without_type = {k: v for k, v in technique_dict.items() if k != "type"}
-
-        config_dict["used_function"].append(
-            {"technique": {technique_type: technique_dict_without_type}}
-        )
-
-    return config_dict
-
-
-def run_unstructured_op(config, text):
-    """
-    Helper to run unstructured pseudonymisation op.
-
-    Returns:
-        tuple: (result_text: str, metrics_markdown: str)
-    """
-    context = build_op_context(op_config=config_to_dagster_dict_unstructured(config))
-    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=text)
-
-    # Extract actual values from Output objects
-    return result_text.value, metrics.value
-
-
-def parse_metrics_markdown(metrics_md: str) -> dict:
-    """
-    Parse markdown metrics into structured dict for easier testing.
-
-    Args:
-        metrics_md: Markdown metrics string from op output
-
-    Returns:
-        dict with keys: total_pii_detected, pii_by_type, techniques_applied, language
-    """
-    result = {
-        "total_pii_detected": 0,
-        "pii_by_type": {},
-        "techniques_applied": {},
-        "language": "",
-    }
-
-    # Extract total PII detected
-    total_match = re.search(r"\*\*Total PII Detected\*\*:\s*(\d+)", metrics_md)
-    if total_match:
-        result["total_pii_detected"] = int(total_match.group(1))
-
-    # Extract language
-    lang_match = re.search(r"\*\*Language\*\*:\s*(\w+)", metrics_md)
-    if lang_match:
-        result["language"] = lang_match.group(1)
-
-    # Extract PII by type from table
-    pii_table_section = re.search(
-        r"### PII by Type\n\| Entity Type \| Count \|\n\|[^\n]+\n((?:\|[^\n]+\n)+)",
-        metrics_md,
-    )
-    if pii_table_section:
-        for line in pii_table_section.group(1).strip().split("\n"):
-            parts = [p.strip() for p in line.split("|") if p.strip()]
-            if len(parts) == 2:
-                entity_type, count = parts
-                result["pii_by_type"][entity_type] = int(count)
-
-    # Extract techniques applied
-    techniques_section = re.search(r"### Techniques Applied\n((?:- \*\*[^\n]+\n)+)", metrics_md)
-    if techniques_section:
-        for line in techniques_section.group(1).strip().split("\n"):
-            tech_match = re.match(r"-\s*\*\*(.+?)\*\*:\s*(.+)", line)
-            if tech_match:
-                pii_type, technique = tech_match.groups()
-                result["techniques_applied"][pii_type] = technique
-
-    return result
-
-
-# -------------------------------- Fixtures ----------------------------------------
-
-
-@pytest.fixture
-def sample_text_en():
-    """English text with various PII types."""
-    return """
-    John Smith works at Acme Corporation. His email is john.smith@example.com
-    and his phone number is +1-555-123-4567. He lives in New York City at
-    123 Main Street, Apartment 4B. His SSN is 123-45-6789.
-    """
-
-
-@pytest.fixture
-def sample_text_multi_person():
-    """Text with multiple person names."""
-    return """
-    The meeting included Alice Johnson, Bob Williams, and Charlie Brown.
-    They discussed the project with Maria Garcia and David Wilson.
-    """
-
-
-@pytest.fixture
-def sample_text_mixed_pii():
-    """Text with multiple PII types for AC1 comprehensive testing."""
-    return """
-    Contact Information:
-    Name: Dr. Emily Watson
-    Email: emily.watson@hospital.com
-    Phone: +44-20-7946-0958
-    Website: https://patient-portal.hospital.com/records
-    """
-
-
-@pytest.fixture
-def encrypt_person_config():
-    """Configuration to encrypt PERSON entities."""
-    return AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_person_key",
-                )
-            )
-        ],
-    )
-
-
-@pytest.fixture
-def retain_person_config():
-    """Configuration to retain PERSON entities unchanged."""
-    return AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PERSON]))
-        ],
-    )
-
-
-@pytest.fixture
-def mixed_technique_config():
-    """Configuration with encryption and retention for AC1 testing."""
-    return AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
-                    key_name="test_mixed_key",
-                )
-            ),
-            PseudoTechniqueConfig(
-                technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PHONE_NUMBERS])
-            ),
-        ],
-    )
-
-
-# ================================================================================================
-# AC1: Pseudonymisation and Retention Are Applied Correctly
-# ================================================================================================
-
-
-def test_ac1_encrypt_configured_pii_types(sample_text_mixed_pii, encrypt_person_config):
-    """AC1: Test that configured PII types are encrypted correctly."""
-    clear_vault_key("test_person_key")
-
-    result_text, metrics_md = run_unstructured_op(encrypt_person_config, sample_text_mixed_pii)
-    metrics = parse_metrics_markdown(metrics_md)
-
-    # Verify person name is encrypted (not in plaintext)
-    assert "Emily Watson" not in result_text, "Configured PERSON PII should be encrypted"
-
-    # Verify encryption token is present
-    assert "{encrypt:" in result_text, "Encrypted token should be present in result"
-
-    # Verify PII was detected and processed
-    assert metrics["total_pii_detected"] > 0, "System should detect PII entities"
-    assert "PERSON" in metrics["pii_by_type"], "PERSON type should be in detected PII"
-
-    # Verify text structure is preserved (surrounding text intact)
-    assert "Contact Information:" in result_text, "Non-PII text structure should be preserved"
-
-
-def test_ac1_retain_configured_pii_unchanged(sample_text_multi_person):
-    """AC1: Test that PII types marked for retention remain unchanged."""
-    retain_config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PERSON]))
-        ],
-    )
-
-    result_text, metrics_md = run_unstructured_op(retain_config, sample_text_multi_person)
-    metrics = parse_metrics_markdown(metrics_md)
-
-    # Verify retained PII types remain in plaintext
-    assert "Alice Johnson" in result_text, "Retained PERSON PII should remain unchanged"
-    assert "Bob Williams" in result_text, "Retained PERSON PII should remain unchanged"
-
-    # Verify technique applied is 'retain'
-    assert (
-        "retain" in metrics["techniques_applied"].get("PERSON", "").lower()
-    ), "Retain technique should be recorded for PERSON type"
-
-
-def test_ac1_unconfigured_pii_replaced_with_placeholders(sample_text_mixed_pii):
-    """AC1: Test that unconfigured PII types are replaced with placeholders."""
-    encrypt_person_only = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_person_only_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_person_only_key")
-
-    result_text, metrics_md = run_unstructured_op(encrypt_person_only, sample_text_mixed_pii)
-
-    # Verify person is encrypted (configured)
-    assert "Emily Watson" not in result_text, "Configured PERSON should be encrypted"
-
-    # Verify unconfigured PII types have placeholders
-    assert (
-        "{{" in result_text and "}}" in result_text
-    ), "Unconfigured PII should be replaced with placeholders"
-
-    # Verify original unconfigured PII values are not in result
-    assert (
-        "emily.watson@hospital.com" not in result_text
-    ), "Unconfigured EMAIL should be replaced with placeholder"
-
-    # Verify placeholder format
-    assert (
-        "{{EMAIL}}" in result_text or "{{URL}}" in result_text
-    ), "Placeholders should indicate entity type"
-
-
-def test_ac1_mixed_techniques_applied_correctly(sample_text_mixed_pii, mixed_technique_config):
-    """AC1: Test that multiple techniques (encrypt, retain) are applied correctly."""
-    clear_vault_key("test_mixed_key")
-
-    result_text, metrics_md = run_unstructured_op(mixed_technique_config, sample_text_mixed_pii)
-    metrics = parse_metrics_markdown(metrics_md)
-
-    # Verify encrypted PII types (PERSON, EMAIL)
-    assert "Emily Watson" not in result_text, "Configured PERSON should be encrypted"
-    assert "emily.watson@hospital.com" not in result_text, "Configured EMAIL should be encrypted"
-
-    # Verify retained PII type (PHONE_NUMBERS)
-    assert "+44-20-7946-0958" in result_text, "Configured PHONE_NUMBERS should be retained"
-
-    # Verify metrics reflect different techniques
-    assert (
-        "encrypt" in metrics["techniques_applied"].get("PERSON", "").lower()
-    ), "Encrypt technique should be applied to PERSON"
-    assert (
-        "encrypt" in metrics["techniques_applied"].get("EMAIL", "").lower()
-    ), "Encrypt technique should be applied to EMAIL"
-    assert (
-        "retain" in metrics["techniques_applied"].get("PHONE_NUMBERS", "").lower()
-    ), "Retain technique should be applied to PHONE_NUMBERS"
-
-
-def test_ac1_multiple_instances_same_pii_type(sample_text_multi_person, encrypt_person_config):
-    """AC1: Test that all instances of a configured PII type are processed."""
-    clear_vault_key("test_person_key")
-
-    result_text, metrics_md = run_unstructured_op(encrypt_person_config, sample_text_multi_person)
-    metrics = parse_metrics_markdown(metrics_md)
-
-    # Verify all person names are encrypted
-    person_names = [
-        "Alice Johnson",
-        "Bob Williams",
-        "Charlie Brown",
-        "Maria Garcia",
-        "David Wilson",
-    ]
-    for name in person_names:
-        assert name not in result_text, f"All PERSON instances should be encrypted: {name}"
-
-    # Verify metrics count multiple instances
-    assert metrics["pii_by_type"].get("PERSON", 0) >= len(
-        person_names
-    ), f"Should detect at least {len(person_names)} PERSON entities"
-
-
-def test_ac1_empty_text_returns_empty(encrypt_person_config):
-    """AC1: Test that empty or null text input raises a ValueError."""
-    clear_vault_key("test_person_key")
-
-    with pytest.raises(ValueError) as exc_info:
-        run_unstructured_op(encrypt_person_config, "")
-
-    assert "empty" in str(exc_info.value).lower(), "Error should indicate empty input"
-
-
-def test_ac1_text_without_pii_remains_unchanged():
-    """AC1: Test that text without any PII remains unchanged after processing."""
-    no_pii_text = """
-    The weather today is sunny with a high of 25 degrees Celsius.
-    The conference starts at 9:00 AM in Room 301.
-    """
-
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_no_pii_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_no_pii_key")
-
-    result_text, metrics_md = run_unstructured_op(config, no_pii_text)
-    metrics = parse_metrics_markdown(metrics_md)
-
-    assert result_text.strip() == no_pii_text.strip(), "Text without PII should remain unchanged"
-    assert metrics["total_pii_detected"] == 0, "No PII should be detected"
-
-
-def test_ac1_placeholder_format_indicates_entity_type(sample_text_mixed_pii):
-    """AC1: Test that placeholders for unconfigured PII indicate the entity type."""
-    encrypt_person_only = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_placeholder_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_placeholder_key")
-
-    result_text, metrics_md = run_unstructured_op(encrypt_person_only, sample_text_mixed_pii)
-    metrics = parse_metrics_markdown(metrics_md)
-
-    # Verify placeholder format (scrubadub uses {{TYPE}} format)
-    placeholder_pattern = r"\{\{[A-Z_]+\}\}"
-    placeholders = re.findall(placeholder_pattern, result_text)
-
-    assert (
-        len(placeholders) > 0
-    ), "Result should contain entity-type placeholders for unconfigured PII"
-
-    # Verify metrics track which PII types were detected
-    assert len(metrics["pii_by_type"]) > 0, "Metrics should list detected PII types"
-
-
-# ================================================================================================
-# AC2: Invalid Execution Handling
-# ================================================================================================
-
-
-def test_ac2_graceful_abort_on_scrubadub_failure():
-    """AC2: Test graceful abort when the PII detection engine (Scrubadub) fails."""
-    text = "Test user John Smith with email john@example.com"
-
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_abort_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_abort_key")
-
-    # Mock Scrubadub to fail at the right import path
-    with patch(
-        "field_level_pseudo_anonymisation.unstructured_ops.scrubadub.Scrubber"
-    ) as mock_scrubber_class:
-        mock_scrubber = MagicMock()
-        mock_scrubber.iter_filth.side_effect = RuntimeError("Scrubadub internal error")
-        mock_scrubber_class.return_value = mock_scrubber
-
-        with pytest.raises(RuntimeError) as exc_info:
-            run_unstructured_op(config, text)
-
-        error_msg = str(exc_info.value).lower()
-        assert (
-            "pii" in error_msg
-            or "detection" in error_msg
-            or "scrubadub" in error_msg
-            or "failed" in error_msg
-        ), "Error message should indicate PII detection failure"
-
-
-def test_ac2_graceful_abort_on_encryption_failure(sample_text_en):
-    """AC2: Test graceful abort when an encryption technique fails during execution."""
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_encrypt_fail_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_encrypt_fail_key")
-
-    # Mock encrypt function at correct path - it's imported from techniques module
-    encrypt_path = (
-        "field_level_pseudo_anonymisation"
-        ".techniques.anonymisation_pseudonymisation_techniques.encrypt"
-    )
-    with patch(encrypt_path) as mock_encrypt:
-        mock_encrypt.side_effect = Exception("Encryption algorithm failure")
-
-        with pytest.raises(RuntimeError) as exc_info:
-            run_unstructured_op(config, sample_text_en)
-
-        error_msg = str(exc_info.value).lower()
-        assert (
-            "encrypt" in error_msg or "failed" in error_msg or "technique" in error_msg
-        ), "Error message should indicate encryption failure"
-
-
-def test_ac2_null_text_input_raises_error(encrypt_person_config):
-    """AC2: Test that a null (None) text input is rejected with an error."""
-    clear_vault_key("test_person_key")
-
-    # Dagster will raise DagsterTypeCheckDidNotPass before op executes
-    from dagster import DagsterTypeCheckDidNotPass
-
-    with pytest.raises((ValueError, DagsterTypeCheckDidNotPass, TypeError)):
-        run_unstructured_op(encrypt_person_config, None)
-
-
-def test_ac2_invalid_language_configuration():
-    """AC2: Test that an unsupported language in the config raises a validation error."""
-    # This should fail at config creation due to Pydantic validation
-    with pytest.raises((ValueError, TypeError)):
-        AnonymisePseudonymizeUnstructuredConfig(
-            language="invalid_lang",  # Should fail Pydantic validation
-            used_function=[
-                PseudoTechniqueConfig(
-                    technique=EncryptConfig(
-                        type="encrypt", pii=[PIIEntityEnum.PERSON], key_name="test_key"
-                    )
-                )
-            ],
-        )
-
-
-def test_ac2_very_large_text_processing():
-    """AC2: Test that very large text inputs are processed successfully without memory errors."""
-    # Create large text with repeated PII patterns
-    large_text = (
-        """
-    John Smith works at company. Email: john.smith@example.com.
-    """
-        * 1000
-    )  # ~60KB of text with repeated PII
-
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
-                    key_name="test_large_text_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_large_text_key")
-
-    result_text, metrics_md = run_unstructured_op(config, large_text)
-    metrics = parse_metrics_markdown(metrics_md)
-
-    # Verify processing completed
-    assert result_text is not None, "Large text should be processed successfully"
-    assert len(result_text) > 0, "Result should not be empty"
-    assert metrics["total_pii_detected"] > 0, "PII should be detected in large text"
-
-
-# ================================================================================================
-# AC3: Execution Audit & Logging - Positive Scenario
-# ================================================================================================
-
-
-def test_ac3_successful_execution_logs_timestamp_and_run_id(sample_text_en, encrypt_person_config):
-    """AC3: Test that successful execution context contains a run ID for logging."""
-    clear_vault_key("test_person_key")
-
-    op_config_dict = config_to_dagster_dict_unstructured(encrypt_person_config)
-    context = build_op_context(op_config=op_config_dict)
-
-    # Capture run context
-    run_id = context.run_id
-
-    # Execute operation
-    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_en)
-
-    # Verify run identifier is available for logging
-    assert run_id is not None, "Run ID must be available for audit logging"
-
-    # Verify outputs are returned (for Dagster to log)
-    assert result_text is not None, "Result text should be available for logging"
-    assert metrics is not None, "Metrics should be available for logging"
-
-
-def test_ac3_successful_execution_logs_configuration_parameters(
-    sample_text_en, mixed_technique_config
-):
-    """AC3: Test that the used configuration is accessible for logging on success."""
-    clear_vault_key("test_mixed_key")
-
-    op_config_dict = config_to_dagster_dict_unstructured(mixed_technique_config)
-    context = build_op_context(op_config=op_config_dict)
-
-    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_en)
-
-    # Verify configuration is captured and accessible
-    assert "used_function" in op_config_dict, "Configuration must be accessible for logging"
-    assert len(op_config_dict["used_function"]) == 2, "Multiple techniques should be captured"
-
-    # Verify techniques are logged
-    techniques = [func["technique"] for func in op_config_dict["used_function"]]
-    assert any(
-        "encrypt" in str(tech) for tech in techniques
-    ), "Encrypt technique should be in configuration"
-    assert any(
-        "retain" in str(tech) for tech in techniques
-    ), "Retain technique should be in configuration"
-
-    # Verify metrics contain technique information (in markdown string)
-    metrics_str = metrics.value
-    assert (
-        "Techniques Applied" in metrics_str
-    ), "Applied techniques should be in metrics for logging"
-
-
-def test_ac3_successful_execution_logs_no_raw_pii(sample_text_mixed_pii, encrypt_person_config):
-    """AC3: Test that logs and metrics from a successful run do not contain raw PII."""
-    clear_vault_key("test_person_key")
-
-    op_config_dict = config_to_dagster_dict_unstructured(encrypt_person_config)
-    context = build_op_context(op_config=op_config_dict)
-
-    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_mixed_pii)
-
-    # Verify raw PII values are not in metrics
-    metrics_str = metrics.value
-
-    sensitive_values = ["Emily Watson", "emily.watson@hospital.com", "+44-20-7946-0958"]
-
-    for pii_value in sensitive_values:
-        assert (
-            pii_value not in metrics_str
-        ), f"Raw PII value should not appear in metrics: {pii_value}"
-
-    # Verify configuration logs do not contain raw PII
-    config_str = str(op_config_dict)
-    for pii_value in sensitive_values:
-        assert (
-            pii_value not in config_str
-        ), f"Raw PII value should not appear in configuration logs: {pii_value}"
-
-
-# ================================================================================================
-# AC4: Execution Audit & Logging - Negative Scenario
-# ================================================================================================
-
-
-def test_ac4_failed_execution_logs_error_details():
-    """AC4: Negative execution should surface clear error details (encryption key failure)."""
-    text = "Test user John Smith"
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_fail_log_key",
-                )
-            )
-        ],
-    )
-    clear_vault_key("test_fail_log_key")
-    ctx = build_op_context(op_config=config_to_dagster_dict_unstructured(config))
-
-    # Patch the key retrieval used inside unstructured_ops to force failure
-    with patch(
-        "field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key",
-        side_effect=RuntimeError("Encryption key retrieval failed"),
-    ):
-        with pytest.raises(RuntimeError) as exc_info:
-            # Consume the generator to trigger execution and raise the exception
-            list(anonymize_pseudonymize_unstructured(ctx, text=text))
-
-        msg = str(exc_info.value).lower()
-        assert "key" in msg and "failed" in msg, "Error message should mention key failure"
-
-
-def test_ac4_failed_execution_logs_configuration_used():
-    """AC4: Test that the attempted configuration is available for logging on failure."""
-    text = "Test data with person John Doe"
-
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_config_fail_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_config_fail_key")
-
-    op_config_dict = config_to_dagster_dict_unstructured(config)
-    context = build_op_context(op_config=op_config_dict)
-
-    # Mock _initialize_scrubber to fail
-    with patch(
-        "field_level_pseudo_anonymisation.unstructured_ops._initialize_scrubber"
-    ) as mock_init_scrubber:
-        mock_init_scrubber.side_effect = Exception("Scrubber module not available")
-
-        with pytest.raises((RuntimeError, Exception)) as exc_info:
-            list(anonymize_pseudonymize_unstructured(context, text=text))
-
-        # Verify configuration is still accessible despite failure
-        assert op_config_dict is not None, "Configuration must be accessible for failure audit"
-        assert (
-            "used_function" in op_config_dict
-        ), "Technique configuration should be available for diagnosis"
-
-        # Verify error was raised with proper message
-        error_msg = str(exc_info.value).lower()
-        assert (
-            "pii" in error_msg
-            or "detection" in error_msg
-            or "failed" in error_msg
-            or "scrubber" in error_msg
-            or "module" in error_msg
-        ), "Error should indicate detection/processing failed"
-
-
-def test_ac4_failed_execution_logs_failure_reason():
-    """AC4: Test that the reason for a failure is clearly indicated in the error message."""
-    text = "User: Alice Smith, Email: alice@example.com"
-
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.en,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
-                    key_name="test_failure_reason_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_failure_reason_key")
-
-    # Mock key retrieval function to fail
-    with patch(
-        "field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key"
-    ) as mock_get_key:
-        mock_get_key.side_effect = RuntimeError("Vault connection timeout")
-
-        with pytest.raises(RuntimeError) as exc_info:
-            run_unstructured_op(config, text)
-
-        # Verify failure reason is in error message
-        error_msg = str(exc_info.value).lower()
-        assert (
-            "encrypt" in error_msg
-            or "key" in error_msg
-            or "timeout" in error_msg
-            or "failed" in error_msg
-        ), "Error should indicate key retrieval/encryption failure"
-
-
-# ================================================================================================
-# Additional Tests - Edge Cases and Integration
-# ================================================================================================
-
-
-def test_multi_language_support_italian():
-    """Additional test: Verify that Italian text is processed correctly."""
-    italian_text = """
-    Il dottor Marco Rossi lavora presso l'ospedale.
-    Email: marco.rossi@ospedale.it
-    Telefono: +39-06-12345678
-    """
-
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.it,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON],
-                    key_name="test_italian_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_italian_key")
-
-    result_text, metrics_md = run_unstructured_op(config, italian_text)
-    metrics = parse_metrics_markdown(metrics_md)
-
-    # Verify processing occurred
-    assert result_text != italian_text, "Italian text should be processed"
-    assert metrics["total_pii_detected"] > 0, "PII should be detected in Italian text"
-
-
-def test_special_characters_in_text():
-    """Additional test: Verify handling of text with special Unicode characters."""
-    special_text = """
-    User: João da Silva 🇧🇷
-    Email: joão@empresa.com.br
-    Message: "Hello, World!" — Testing special chars: €, £, ¥, ©, ®
-    """
-
-    config = AnonymisePseudonymizeUnstructuredConfig(
-        language=LanguageEnum.pt,
-        used_function=[
-            PseudoTechniqueConfig(
-                technique=EncryptConfig(
-                    type="encrypt",
-                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
-                    key_name="test_special_chars_key",
-                )
-            )
-        ],
-    )
-
-    clear_vault_key("test_special_chars_key")
-
-    result_text, metrics_md = run_unstructured_op(config, special_text)
-
-    # Verify processing completed without encoding errors
-    assert result_text is not None, "Special characters should not cause processing failure"
-    assert len(result_text) > 0, "Result should not be empty"
-
-
-def test_deterministic_encryption_within_session(sample_text_en, encrypt_person_config):
-    """Additional test: Verify encryption format consistency across runs."""
-    clear_vault_key("test_person_key")
-
-    result1, metrics_md1 = run_unstructured_op(encrypt_person_config, sample_text_en)
-    result2, metrics_md2 = run_unstructured_op(encrypt_person_config, sample_text_en)
-
-    # Both should have encryption tokens
-    assert "{encrypt:" in result1, "First run should produce encrypted tokens"
-    assert "{encrypt:" in result2, "Second run should produce encrypted tokens"
-
-    # Verify consistent PII detection
-    metrics1 = parse_metrics_markdown(metrics_md1)
-    metrics2 = parse_metrics_markdown(metrics_md2)
-
-    assert (
-        metrics1["total_pii_detected"] == metrics2["total_pii_detected"]
-    ), "PII detection should be consistent across runs"
-
-    # Verify token format is consistent (Fernet base64 pattern)
-    token_pattern = r"\{encrypt:gAAAAAB[A-Za-z0-9+/=_-]+\}"
-    tokens1 = re.findall(token_pattern, result1)
-    tokens2 = re.findall(token_pattern, result2)
-
-    assert len(tokens1) == len(tokens2), "Same number of encryption tokens should be generated"
diff --git a/tests/field_level_pseudo_anonymisation/test_jobs.py b/tests/field_level_pseudo_anonymisation/test_jobs.py
deleted file mode 100644
index 616c3d5..0000000
--- a/tests/field_level_pseudo_anonymisation/test_jobs.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from template_code_location.field_level_pseudo_anonymisation.jobs import (
-    anonymize_pseudonymize_structured_job,
-    anonymize_pseudonymize_structured_job_s3,
-    depseudonymize_structured_job,
-    depseudonymize_structured_job_s3,
-    anonymize_pseudonymize_unstructured_job_s3,
-    anonymize_pseudonymize_unstructured_job,
-    depseudonymize_unstructured_job_s3,
-    depseudonymize_unstructured_job
-)
-
-
-def test_anonymize_pseudonymize_structured_job_is_callable():
-    """Test anonymize_pseudonymize_structured_job is a valid Dagster job"""
-    assert callable(anonymize_pseudonymize_structured_job)
-    assert hasattr(anonymize_pseudonymize_structured_job, 'execute_in_process')
-
-
-def test_anonymize_pseudonymize_structured_job_s3_is_callable():
-    """Test anonymize_pseudonymize_structured_job_s3 is a valid Dagster job"""
-    assert callable(anonymize_pseudonymize_structured_job_s3)
-    assert hasattr(anonymize_pseudonymize_structured_job_s3, 'execute_in_process')
-
-
-def test_depseudonymize_structured_job_is_callable():
-    """Test depseudonymize_structured_job is a valid Dagster job"""
-    assert callable(depseudonymize_structured_job)
-    assert hasattr(depseudonymize_structured_job, 'execute_in_process')
-
-
-def test_depseudonymize_structured_job_s3_is_callable():
-    """Test depseudonymize_structured_job_s3 is a valid Dagster job"""
-    assert callable(depseudonymize_structured_job_s3)
-    assert hasattr(depseudonymize_structured_job_s3, 'execute_in_process')
-
-
-def test_anonymize_pseudonymize_unstructured_job_is_callable():
-    """Test anonymize_pseudonymize_unstructured_job is a valid Dagster job"""
-    assert callable(anonymize_pseudonymize_unstructured_job)
-    assert hasattr(anonymize_pseudonymize_unstructured_job, 'execute_in_process')
-
-
-def test_anonymize_pseudonymize_unstructured_job_s3_is_callable():
-    """Test anonymize_pseudonymize_unstructured_job_s3 is a valid Dagster job"""
-    assert callable(anonymize_pseudonymize_unstructured_job_s3)
-    assert hasattr(anonymize_pseudonymize_unstructured_job_s3, 'execute_in_process')
-
-
-def test_depseudonymize_unstructured_job_is_callable():
-    """Test depseudonymize_unstructured_job is a valid Dagster job"""
-    assert callable(depseudonymize_unstructured_job)
-    assert hasattr(depseudonymize_unstructured_job, 'execute_in_process')
-
-
-def test_depseudonymize_unstructured_job_s3_is_callable():
-    """Test depseudonymize_unstructured_job_s3 is a valid Dagster job"""
-    assert callable(depseudonymize_unstructured_job_s3)
-    assert hasattr(depseudonymize_unstructured_job_s3, 'execute_in_process')

From 3ff92fc1134fd79871c2136b222a5caf0136e4ba Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Wed, 6 May 2026 11:57:52 +0200
Subject: [PATCH 13/15] pin code-locations to develop

---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5eb1ab4..2d1fc57 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,9 +53,9 @@ exclude-dependencies = ["transformers", "spacy-transformers"]
 [tool.uv.sources]
 torch = { index = "pytorch-cpu" }
 util-services = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/util-services.git", rev = "v0.5.0" }
-data-processing = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/data-processing.git", branch = "feature/SIMPL-24642" }
-dataframe-level-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/dataframe-level-anonymisation.git", branch = "feature/SIMPL-24642" }
-field-level-pseudo-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/field-level-pseudo-anonymisation.git", branch = "feature/SIMPL-24642" }
+data-processing = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/data-processing.git", branch = "develop" }
+dataframe-level-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/dataframe-level-anonymisation.git", branch = "develop" }
+field-level-pseudo-anonymisation = { git = "https://code.europa.eu/simpl/simpl-open/development/data-services/field-level-pseudo-anonymisation.git", branch = "develop" }
 
 [[tool.uv.index]]
 name = "pytorch-cpu"

From 9aaee17d20fb4ffea9d31454f7f1885901cb16d8 Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Wed, 6 May 2026 15:12:52 +0200
Subject: [PATCH 14/15] clean dependencies

---
 pyproject.toml | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2d1fc57..f85da15 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,35 +10,6 @@ requires-python = ">=3.12"
 dependencies = [
     # Dagster core
     "dagster>=1.8.13",
-    "dagster-webserver>=1.8.13",
-    "dagster-postgres>=0.24.13",
-    # Data processing
-    "pandas>=2.1.4",
-    "pyarrow>=23.0",
-    "numpy>=2.0.1",
-    "lxml>=6.0",
-    "xmltodict>=1.0",
-    "rdflib>=7.6",
-    "openpyxl>=3.1.0",
-    "xlrd>=2.0.1",
-    "tabulate>=0.9",
-    "pyspellchecker>=0.8.4",
-    "PyGeodesy>=24.6.11",
-    # Validation
-    "great_expectations>=1.16",
-    "pandera>=0.31",
-    "pydantic>=2.6.0,<3.0.0",
-    # Scraping
-    "scrapy>=2.15",
-    "BeautifulSoup4>=4.14",
-    # Anonymisation libraries
-    "pycanon==1.0.1.post2",
-    "anjana>=1.0.0",
-    # Field-level pseudo-anonymisation
-    "scrubadub>=2.0.0",
-    "scrubadub_spacy>=1.0.0",
-    "hvac>=2.0.0",
-    "cryptography>=42.0.0",
     # Util services — resolved via [tool.uv.sources] (git)
     "util-services",
     # Code location packages — resolved via [tool.uv.sources] (git)

From 9ebba755ad2302a994c8aee4388cc368d8917cee Mon Sep 17 00:00:00 2001
From: ILay <ilia.zakharchuk@t-systems.com>
Date: Wed, 6 May 2026 15:18:02 +0200
Subject: [PATCH 15/15] update Development Guide to clarify project layout and
 external dependencies

---
 documents/Development Guide.md | 97 ++++++++++++++++++++--------------
 1 file changed, 58 insertions(+), 39 deletions(-)

diff --git a/documents/Development Guide.md b/documents/Development Guide.md
index 23c60d7..6582768 100644
--- a/documents/Development Guide.md	
+++ b/documents/Development Guide.md	
@@ -9,81 +9,100 @@ By following a *code-first approach*, developers ensure consistency, traceabilit
 Development must always begin in a local environment. This allows developers to rapidly iterate, test business logic, and validate DAG (Directed Acyclic Graph) structures without impacting production data.
 
 ### 2.1 Project Layout
-This repository (`template-code-location`) serves as the **single consolidated code location** for all data services workflows. It contains the jobs, ops, and configurations previously spread across `data-processing`, `dataframe-level-anonymisation`, and `field-level-pseudo-anonymisation`.
+This repository (`template-code-location`) serves as the **single consolidated code location** for all data services workflows. It imports jobs and ops from three external packages (`data-processing`, `dataframe-level-anonymisation`, and `field-level-pseudo-anonymisation`) which are installed as Git dependencies, and also provides a place for custom template jobs/ops.
 
 ```text
 template-code-location/
 ├── src/
 │   └── template_code_location/
+│       ├── __init__.py
 │       ├── repository.py                  # Unified entry point (all jobs/sensors/resources)
-│       ├── data_processing/               # Data cleaning & transformation ops/jobs
-│       │   ├── config_models/
-│       │   ├── jobs.py
-│       │   └── ops.py
-│       ├── dataframe_level_anonymisation/  # k-anonymity, l-diversity, t-closeness
-│       │   ├── config_models/
-│       │   ├── jobs.py
-│       │   ├── ops.py
-│       │   └── utils.py
-│       ├── field_level_pseudo_anonymisation/  # Field-level encryption/hashing/redaction
-│       │   ├── config_models/
-│       │   ├── techniques/
-│       │   ├── jobs.py
-│       │   ├── ops.py
-│       │   ├── unstructured_ops.py
-│       │   └── utils.py
-│       ├── jobs/                           # Template example jobs
-│       └── ops/                            # Template example ops
-├── tests/                                  # All tests (migrated from source repos)
+│       ├── jobs/                           # Custom jobs specific to this code location
+│       │   ├── __init__.py
+│       │   └── jobs.py
+│       └── ops/                            # Custom ops specific to this code location
+│           ├── __init__.py
+│           └── ops.py
+├── tests/                                  # Unit & integration tests
 ├── Dockerfile
-├── pyproject.toml
+├── pyproject.toml                          # Dependencies & external package sources
 └── README.md
 ```
 
-### 2.2 Code Examples (Ops, Jobs, and Definitions)
+### 2.2 External Dependencies (Git Packages)
+
+The heavy-lifting logic lives in separate repositories, pulled in as installable Python packages via `pyproject.toml` and `[tool.uv.sources]`:
+
+| Package | Purpose | Source |
+|---------|---------|--------|
+| `data-processing` | Data cleaning & transformation jobs | Git (branch: `develop`) |
+| `dataframe-level-anonymisation` | k-anonymity, l-diversity, t-closeness | Git (branch: `develop`) |
+| `field-level-pseudo-anonymisation` | Field-level encryption/hashing/redaction | Git (branch: `develop`) |
+| `util-services` | Shared resources, sensors, and logging | Git (tag: `v0.5.0`) |
+
+These packages expose their jobs and ops which are then imported and registered in `repository.py`.
+
+### 2.3 Code Examples (Ops, Jobs, and Definitions)
 The orchestration logic should be modular. Here is a practical example of how to construct a workflow.
 
-**1. Defining Ops (ops.py)**  
+**1. Defining Ops (`ops/ops.py`)**  
 Ops are the core units of computation. Keep them focused on a single task.
+
 ```python
 from dagster import op
 
 @op
-def fetch_raw_data() -> list:
-    """Fetches raw data from an external source."""
+def fetch_data() -> list:
+    """Fetches raw data from a source."""
     return [{"id": 1, "value": "A"}, {"id": 2, "value": "B"}]
 
 @op
 def process_data(data: list) -> dict:
-    """Transforms raw data into an aggregated format."""
-    return {"processed_count": len(data), "status": "success"}
+    """Processes raw data and returns a summary."""
+    return {"count": len(data), "status": "success"}
 ```
-**2. Assembling Jobs (jobs.py)**  
+
+**2. Assembling Jobs (`jobs/jobs.py`)**  
 Jobs link ops together to form a dependency graph (workflow).
+
 ```python
 from dagster import job
-from .ops import fetch_raw_data, process_data
+from ..ops.ops import fetch_data, process_data
 
 @job
 def data_processing_job():
-    """A workflow that fetches and processes data."""
-    raw_data = fetch_raw_data()
-    process_data(raw_data)
+    """A simple job that fetches and processes data."""
+    raw = fetch_data()
+    process_data(raw)
 ```
-**3. Registering Definitions (repository.py)**  
-This file acts as the entry point for the Simpl-Open orchestration platform to discover your code.
+
+**3. Registering Definitions (`repository.py`)**  
+This file acts as the entry point for the Simpl-Open orchestration platform to discover your code. It imports jobs from local modules as well as from external packages.
+
 ```python
 from dagster import Definitions
-from .jobs import data_processing_job
+from util_services.resources import s3_resource
+from util_services.sensors import notify_success, notify_failure, notify_canceled
+from util_services.custom_json_logger import simpl_json_logger
+
+# External package jobs
+from data_processing.jobs import remove_duplicates_job_s3, fill_missing_values_job_s3
+from dataframe_level_anonymisation.jobs import k_anonymity_job_s3, l_diversity_job_s3
+from field_level_pseudo_anonymisation.jobs import anonymise_pseudonymise_structured_job_s3
+
+# Local template jobs
+from template_code_location.jobs.jobs import data_processing_job
 
-# The platform will load this Definitions object
 defs = Definitions(
-    jobs=[data_processing_job]
-    # You can also declare schedules, sensors, and resources here
+    jobs=[data_processing_job, remove_duplicates_job_s3, ...],
+    sensors=[notify_success, notify_failure, notify_canceled],
+    resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},
+    loggers={"simpl": simpl_json_logger},
 )
 ```
 
-### 2.3 Best Practices & Constraints
+### 2.4 Best Practices & Constraints
+
 - **Separation of Concerns**: Keep orchestration logic (how ops connect) strictly separate from heavy business logic (which should ideally live in separate Python modules/classes).
 - **Naming Conventions**: Use snake_case for jobs and ops. Code locations should be named based on the domain they represent (e.g., inventory_sync_service).
 - **Dependency Management**: All dependencies must be explicitly declared in pyproject.toml or requirements.txt.