feat(SIMPL-24642): migrate tests from 3 source repos with updated imports

2026-04-24 18:42:07 +02:00
parent 4e0b216410
commit d14b2dfac4
26 changed files with 6280 additions and 0 deletions
--- a/tests/init.py
+++ b/tests/init.py
@@ -0,0 +1 @@
+
--- a/tests/data_processing/init.py
+++ b/tests/data_processing/init.py
@@ -0,0 +1 @@
+
--- a/tests/data_processing/conftest.py
+++ b/tests/data_processing/conftest.py
@@ -0,0 +1,53 @@
+"""Pytest configuration and shared fixtures."""
+
+import pytest
+import pandas as pd
+from unittest.mock import MagicMock, patch
+import sys
+from dagster import build_op_context
+
+# Mock external dependencies that might not be available in test environment
+sys.modules['spellchecker'] = MagicMock()
+
+
+@pytest.fixture
+def mock_context():
+    """Create a mock Dagster context for testing operations."""
+    context = build_op_context()
+    return context
+
+
+@pytest.fixture
+def sample_dataframe():
+    """Create a sample DataFrame for testing."""
+    return pd.DataFrame({
+        'Name': ['John Doe', 'jane smith', 'John Doe', 'bob johnson', 'John Doe'],
+        'Age': [25, 30, 25, None, 25],
+        'City': ['New York', 'los angeles', 'New York', 'chicago', 'New York'],
+        'Status': ['Active', 'INACTIVE', 'Active', 'penDing', 'Active']
+    })
+
+
+@pytest.fixture
+def sample_dataframe_with_typos():
+    """Create a sample DataFrame with typos for spell checking."""
+    return pd.DataFrame({
+        'Name': ['jon doe', 'jane smith', 'bob jonson'],
+        'Description': ['developer', 'analst', 'enginer']
+    })
+
+
+@pytest.fixture
+def empty_dataframe():
+    """Create an empty DataFrame."""
+    return pd.DataFrame()
+
+
+@pytest.fixture
+def dataframe_with_missing_values():
+    """Create a DataFrame with various missing values."""
+    return pd.DataFrame({
+        'Column1': [1, None, 3, None, 5],
+        'Column2': ['a', 'b', None, 'd', None],
+        'Column3': [None, None, None, None, None]
+    })
--- a/tests/data_processing/conftest_utils.py
+++ b/tests/data_processing/conftest_utils.py
@@ -0,0 +1,7 @@
+"""Configuration utilities for testing."""
+
+import os
+import sys
+
+# Add src directory to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
--- a/tests/data_processing/test_config_models.py
+++ b/tests/data_processing/test_config_models.py
@@ -0,0 +1,202 @@
+"""Unit tests for configuration models."""
+
+import pytest
+from pydantic import ValidationError
+from template_code_location.data_processing.config_models import (
+    FillMissingConfiguration,
+    ColumnsSelectConfiguration,
+    SpellCheckConfiguration,
+    AggregationConfiguration
+)
+
+
+class TestColumnsSelectConfiguration:
+    """Tests for ColumnsSelectConfiguration."""
+
+    def test_default_columns(self):
+        """Test default columns configuration."""
+        config = ColumnsSelectConfiguration()
+        assert config.columns == ['Name']
+
+    def test_custom_columns(self):
+        """Test custom columns configuration."""
+        config = ColumnsSelectConfiguration(columns=['Col1', 'Col2', 'Col3'])
+        assert config.columns == ['Col1', 'Col2', 'Col3']
+
+    def test_empty_columns_list(self):
+        """Test with empty columns list."""
+        config = ColumnsSelectConfiguration(columns=[])
+        assert config.columns == []
+
+    def test_single_column(self):
+        """Test with a single column."""
+        config = ColumnsSelectConfiguration(columns=['SingleCol'])
+        assert config.columns == ['SingleCol']
+
+    def test_columns_with_special_characters(self):
+        """Test columns with special characters."""
+        config = ColumnsSelectConfiguration(columns=['Col-1', 'Col_2', 'Col.3'])
+        assert config.columns == ['Col-1', 'Col_2', 'Col.3']
+
+    def test_duplicate_columns_are_removed(self):
+        """Verifica che i duplicati vengano rimossi mantenendo l'ordine (grazie a dict.fromkeys)."""
+        config = ColumnsSelectConfiguration(columns=['A', 'B', 'A', 'C', 'B'])
+        
+        assert config.columns == ['A', 'B', 'C']
+
+    def test_duplicate_default_behavior(self):
+        """Verifica che anche input estremi vengano gestiti correttamente."""
+        config = ColumnsSelectConfiguration(columns=['Name', 'Name', 'Name'])
+        assert config.columns == ['Name']
+
+
+class TestFillMissingConfiguration:
+    """Tests for FillMissingConfiguration."""
+
+    def test_default_fill_map(self):
+        """Test default fill map configuration."""
+        config = FillMissingConfiguration()
+        
+        assert config.fill_map == {'Age': 'UNKNOWN_AGE'}
+
+    def test_custom_fill_map(self):
+        """Test custom fill map configuration."""
+        fill_map = {'Age': '0', 'Name': 'UNKNOWN', 'City': 'N/A'}
+        config = FillMissingConfiguration(fill_map=fill_map)
+        
+        assert config.fill_map == fill_map
+
+    def test_empty_fill_map(self):
+        """Test with empty fill map."""
+        config = FillMissingConfiguration(fill_map={})
+        
+        assert config.fill_map == {}
+
+    def test_fill_map_with_numeric_values(self):
+        """Test fill map with numeric string values."""
+        fill_map = {'Age': '0', 'Score': '-1', 'Count': '999'}
+        config = FillMissingConfiguration(fill_map=fill_map)
+        
+        assert config.fill_map == fill_map
+
+    def test_fill_map_with_string_values(self):
+        """Test fill map with string values."""
+        fill_map = {'Name': 'Unknown', 'Email': 'no-email'}
+        config = FillMissingConfiguration(fill_map=fill_map)
+        
+        assert config.fill_map == fill_map
+
+    def test_fill_map_mixed_types(self):
+        """Test fill map with mixed value types (all strings)."""
+        fill_map = {'IntCol': '0', 'StrCol': 'Unknown', 'FloatCol': '0.0'}
+        config = FillMissingConfiguration(fill_map=fill_map)
+        
+        assert config.fill_map == fill_map
+
+
+class TestSpellCheckConfiguration:
+    """Tests for SpellCheckConfiguration."""
+
+    def test_default_spell_check_config(self):
+        """Test default spell check configuration."""
+        config = SpellCheckConfiguration()
+        
+        assert config.columns == ['Name']
+        assert config.language == 'en'
+
+    def test_custom_spell_check_config(self):
+        """Test custom spell check configuration."""
+        config = SpellCheckConfiguration(
+            columns=['Description', 'Notes'],
+            language='es'
+        )
+        
+        assert config.columns == ['Description', 'Notes']
+        assert config.language == 'es'
+
+    def test_spell_check_all_languages(self):
+        """Test spell check with all supported languages."""
+        supported_languages = ['en', 'es', 'it', 'fr', 'pt', 'de', 'nl']
+        
+        for lang in supported_languages:
+            config = SpellCheckConfiguration(language=lang)
+            assert config.language == lang
+
+    def test_spell_check_invalid_language(self):
+        """Test spell check with invalid language."""
+        with pytest.raises(ValidationError):
+            SpellCheckConfiguration(language='invalid')
+
+    def test_spell_check_multiple_columns(self):
+        """Test spell check with multiple columns."""
+        columns = ['Col1', 'Col2', 'Col3', 'Col4']
+        config = SpellCheckConfiguration(columns=columns)
+        
+        assert config.columns == columns
+
+    def test_spell_check_empty_columns(self):
+        """Test spell check with empty columns list."""
+        config = SpellCheckConfiguration(columns=[])
+        
+        assert config.columns == []
+        assert config.language == 'en'
+
+    def test_spell_check_inheritance(self):
+        """Test that SpellCheckConfiguration inherits from ColumnsSelectConfiguration."""
+        config = SpellCheckConfiguration()
+        
+        assert isinstance(config, ColumnsSelectConfiguration)
+        assert hasattr(config, 'columns')
+        assert hasattr(config, 'language')
+
+    @pytest.mark.parametrize("language", ['en', 'es', 'it', 'fr', 'pt', 'de', 'nl'])
+    def test_spell_check_languages_parametrized(self, language):
+        """Test spell check with parametrized languages."""
+        config = SpellCheckConfiguration(language=language)
+        assert config.language == language
+
+class TestAggregationConfiguration:
+    """Tests for AggregationConfiguration."""
+
+    def test_aggregation_default_config(self):
+        """Test default aggregation configuration."""
+        config = AggregationConfiguration()
+        
+        assert config.columns == ['Name']
+        assert config.operation == 'sum'
+
+    @pytest.mark.parametrize("op", ["sum", "mean", "min", "max", "count"])
+    def test_aggregation_valid_operations(self, op):
+        """Test all allowed aggregation operations."""
+        config = AggregationConfiguration(operation=op)
+        assert config.operation == op
+
+    def test_aggregation_invalid_operation(self):
+        """Test that an invalid operation raises a ValidationError."""
+        with pytest.raises(ValidationError) as excinfo:
+            AggregationConfiguration(operation="invalid_op")
+        
+        assert "Invalid aggregation operation 'invalid_op'" in str(excinfo.value)
+
+    def test_aggregation_custom_columns(self):
+        """Test aggregation with custom columns."""
+        config = AggregationConfiguration(columns=['Price', 'Quantity'], operation='mean')
+        
+        assert config.columns == ['Price', 'Quantity']
+        assert config.operation == 'mean'
+
+    def test_aggregation_inheritance(self):
+        """Test that AggregationConfiguration inherits from ColumnsSelectConfiguration."""
+        config = AggregationConfiguration()
+        
+        assert isinstance(config, ColumnsSelectConfiguration)
+        assert hasattr(config, 'columns')
+        assert hasattr(config, 'operation')
+
+    def test_aggregation_model_dump(self):
+        """Test that model_dump contains all expected fields (useful for the Dagster op)."""
+        config = AggregationConfiguration(columns=['Value'], operation='max')
+        dump = config.model_dump()
+        
+        assert dump['columns'] == ['Value']
+        assert dump['operation'] == 'max'
--- a/tests/data_processing/test_integration.py
+++ b/tests/data_processing/test_integration.py
@@ -0,0 +1,185 @@
+"""Integration tests for data processing jobs."""
+
+import pytest
+import pandas as pd
+from unittest.mock import patch, MagicMock
+from template_code_location.data_processing.ops import (
+    remove_duplicates,
+    fill_missing_values,
+    standardize_categorical_values,
+    correct_typos
+)
+from template_code_location.data_processing.config_models import (
+    FillMissingConfiguration,
+    ColumnsSelectConfiguration,
+    SpellCheckConfiguration
+)
+
+
+class TestPipelineIntegration:
+    """Integration tests for data processing pipeline."""
+
+    def test_pipeline_remove_duplicates_then_standardize(self, mock_context):
+        """Test pipeline: remove duplicates then standardize."""
+        df = pd.DataFrame({
+            'Name': ['  JOHN DOE  ', 'jane smith', '  JOHN DOE  ', 'bob johnson'],
+            'City': ['NEW YORK', 'los angeles', 'NEW YORK', 'chicago']
+        })
+        
+        # Step 1: Remove duplicates
+        df_no_dupes = remove_duplicates(mock_context, df)
+        assert df_no_dupes.shape[0] == 3
+        
+        # Step 2: Standardize
+        config = ColumnsSelectConfiguration(columns=['Name', 'City'])
+        df_standardized = standardize_categorical_values(mock_context, config, df_no_dupes)
+        
+        assert df_standardized['Name'].iloc[0] == 'john doe'
+        assert df_standardized['City'].iloc[0] == 'new york'
+
+    def test_pipeline_fill_missing_then_standardize(self, mock_context):
+        """Test pipeline: fill missing values then standardize."""
+        df = pd.DataFrame({
+            'Category': ['  ACTIVE  ', None, '  PENDING  '],
+            'Value': ['1', '2', None]
+        })
+        
+        # Step 1: Fill missing values
+        fill_config = FillMissingConfiguration(fill_map={'Value': '0'})
+        df_filled = fill_missing_values(mock_context, fill_config, df)
+        
+        # Step 2: Standardize
+        std_config = ColumnsSelectConfiguration(columns=['Category'])
+        df_standardized = standardize_categorical_values(mock_context, std_config, df_filled)
+        
+        assert df_standardized['Category'].iloc[0] == 'active'
+        assert df_filled['Value'].iloc[2] == '0'
+
+    def test_pipeline_all_operations(self, mock_context):
+        """Test complete pipeline with all operations."""
+        df = pd.DataFrame({
+            'Name': ['  john doe  ', 'JANE SMITH', '  john doe  ', None],
+            'Value': ['1', None, '1', '2']
+        })
+        
+        # Step 1: Remove duplicates
+        df = remove_duplicates(mock_context, df)
+        assert df.shape[0] == 3
+        
+        # Step 2: Fill missing
+        fill_config = FillMissingConfiguration(fill_map={'Value': '0'})
+        df = fill_missing_values(mock_context, fill_config, df)
+        assert df['Value'].isna().sum() == 0
+        
+        # Step 3: Standardize
+        std_config = ColumnsSelectConfiguration(columns=['Name'])
+        df = standardize_categorical_values(mock_context, std_config, df)
+        
+        assert df['Name'].iloc[0] == 'john doe'
+
+    def test_pipeline_with_large_dataset(self, mock_context):
+        """Test pipeline performance with larger dataset."""
+        # Create larger dataset
+        size = 1000
+        df = pd.DataFrame({
+            'ID': list(range(size)),
+            'Name': ['User_' + str(i % 50) for i in range(size)],
+            'Status': ['ACTIVE', 'INACTIVE', 'PENDING'] * (size // 3) + ['ACTIVE'] * (size % 3),
+            'Score': [i % 100 for i in range(size)]
+        })
+        
+        # Add some duplicates
+        df = pd.concat([df, df.head(100)], ignore_index=True)
+        
+        # Process
+        df_cleaned = remove_duplicates(mock_context, df)
+        
+        assert df_cleaned.shape[0] == 1000
+        assert df_cleaned.shape[1] == 4
+
+
+class TestErrorHandling:
+    """Tests for error handling and edge cases."""
+
+    def test_operation_with_corrupted_data(self, mock_context):
+        """Test operations with corrupted/unusual data."""
+        df = pd.DataFrame({
+            'Col': [float('nan'), float('inf'), -float('inf'), 0, 1, 2]
+        })
+        
+        # Should handle special float values
+        result = remove_duplicates(mock_context, df)
+        assert result.shape[0] > 0
+
+    def test_operation_preserves_index(self, mock_context):
+        """Test that index is handled correctly."""
+        df = pd.DataFrame(
+            {'Col': [1, 2, 1, 3]},
+            index=['a', 'b', 'c', 'd']
+        )
+        
+        result = remove_duplicates(mock_context, df)
+        # Index may be reset, so just check shape
+        assert result.shape[0] == 3
+
+    def test_standardize_with_unicode_characters(self, mock_context):
+        """Test standardization with unicode characters."""
+        df = pd.DataFrame({
+            'Name': ['José', 'François', 'Müller']
+        })
+        
+        config = ColumnsSelectConfiguration(columns=['Name'])
+        result = standardize_categorical_values(mock_context, config, df)
+        
+        # Should handle unicode correctly
+        assert result.shape[0] == 3
+
+    def test_fill_with_same_key_multiple_times(self, mock_context):
+        """Test filling when fill_map has multiple entries."""
+        df = pd.DataFrame({
+            'A': ['1', None, '3'],
+            'B': [None, None, 'c'],
+            'C': [None, '2', None]
+        })
+        
+        config = FillMissingConfiguration(fill_map={
+            'A': '-1',
+            'B': 'EMPTY',
+            'C': '0'
+        })
+        
+        result = fill_missing_values(mock_context, config, df)
+        
+        assert result.loc[1, 'A'] == '-1'
+        assert result.loc[0, 'B'] == 'EMPTY'
+        assert result.loc[0, 'C'] == '0'
+
+
+class TestDataTypePreservation:
+    """Tests to ensure data types are preserved appropriately."""
+
+    def test_remove_duplicates_preserves_dtypes(self, mock_context):
+        """Test that remove_duplicates preserves column data types."""
+        df = pd.DataFrame({
+            'int32': pd.array([1, 2, 1], dtype='int32'),
+            'float64': pd.array([1.5, 2.5, 1.5], dtype='float64'),
+            'str': ['a', 'b', 'a']
+        })
+        
+        result = remove_duplicates(mock_context, df)
+        
+        assert result['int32'].dtype == df['int32'].dtype
+        assert result['float64'].dtype == df['float64'].dtype
+
+    def test_fill_missing_preserves_column_types_where_possible(self, mock_context):
+        """Test that fill_missing handles type preservation."""
+        df = pd.DataFrame({
+            'A': pd.array(['1', None, '3'], dtype='string'),
+            'B': ['x', 'y', 'z']
+        })
+        
+        config = FillMissingConfiguration(fill_map={'A': '0'})
+        result = fill_missing_values(mock_context, config, df)
+        
+        assert result['A'].loc[1] == '0'
+        assert result['B'].dtype == df['B'].dtype
--- a/tests/data_processing/test_jobs.py
+++ b/tests/data_processing/test_jobs.py
@@ -0,0 +1,56 @@
+from template_code_location.data_processing.jobs import (
+    remove_duplicates_job_s3,
+    fill_missing_values_job_s3,
+    standardize_categorical_values_job_s3,
+    correct_typos_job_s3,
+    normalize_numeric_min_max_job_s3,
+    normalize_datetime_job_s3,
+    normalize_coordinates_job_s3,
+    add_global_aggregations_job_s3
+)
+
+
+def test_remove_duplicates_job_s3_is_callable():
+    """Test remove_duplicates_job_s3 is a valid Dagster job"""
+    assert callable(remove_duplicates_job_s3)
+    assert hasattr(remove_duplicates_job_s3, 'execute_in_process')
+
+
+def test_fill_missing_values_job_s3_is_callable():
+    """Test fill_missing_values_job_s3 is a valid Dagster job"""
+    assert callable(fill_missing_values_job_s3)
+    assert hasattr(fill_missing_values_job_s3, 'execute_in_process')
+
+
+def test_standardize_categorical_values_job_s3_is_callable():
+    """Test standardize_categorical_values_job_s3 is a valid Dagster job"""
+    assert callable(standardize_categorical_values_job_s3)
+    assert hasattr(standardize_categorical_values_job_s3, 'execute_in_process')
+
+
+def test_correct_typos_job_s3_is_callable():
+    """Test correct_typos_job_s3 is a valid Dagster job"""
+    assert callable(correct_typos_job_s3)
+    assert hasattr(correct_typos_job_s3, 'execute_in_process')
+
+
+def test_normalize_numeric_min_max_job_s3_is_callable():
+    """Test normalize_numeric_min_max_job_s3 is a valid Dagster job"""
+    assert callable(normalize_numeric_min_max_job_s3)
+    assert hasattr(normalize_numeric_min_max_job_s3, 'execute_in_process')
+
+
+def test_normalize_datetime_job_s3_is_callable():
+    """Test normalize_datetime_job_s3 is a valid Dagster job"""
+    assert callable(normalize_datetime_job_s3)
+    assert hasattr(normalize_datetime_job_s3, 'execute_in_process')
+
+def test_normalize_coordinates_job_s3_is_callable():
+    """Test normalize_coordinates_job_s3 is a valid Dagster job"""
+    assert callable(normalize_coordinates_job_s3)
+    assert hasattr(normalize_coordinates_job_s3, 'execute_in_process')
+
+def test_add_global_aggregations_job_s3_is_callable():
+    """Test add_global_aggregations_job_s3 is a valid Dagster job"""
+    assert callable(add_global_aggregations_job_s3)
+    assert hasattr(add_global_aggregations_job_s3, 'execute_in_process')
--- a/tests/data_processing/test_ops.py
+++ b/tests/data_processing/test_ops.py
@@ -0,0 +1,700 @@
+"""Unit tests for data processing operations."""
+
+import pytest
+import pandas as pd
+from template_code_location.data_processing.ops import (
+    remove_duplicates,
+    fill_missing_values,
+    standardize_categorical_values,
+    correct_typos,
+    normalize_datetime,
+    normalize_numeric_min_max,
+    normalize_coordinates,
+    add_global_aggregations
+)
+from template_code_location.data_processing.config_models import (
+    FillMissingConfiguration,
+    ColumnsSelectConfiguration,
+    SpellCheckConfiguration,
+    AggregationConfiguration,
+    CoordinatesNormalizationConfiguration
+)
+
+
+class TestRemoveDuplicates:
+    """Tests for the remove_duplicates operation."""
+
+    def test_remove_duplicates_basic(self, mock_context, sample_dataframe):
+        """Test basic duplicate removal."""
+        result = remove_duplicates(mock_context, sample_dataframe)
+        
+        # Should have 3 unique rows (john doe appears 3x, jane smith 1x, bob johnson 1x)
+        assert result.shape[0] == 3
+        assert len(result) < len(sample_dataframe)
+
+    def test_remove_duplicates_no_duplicates(self, mock_context):
+        """Test remove_duplicates when there are no duplicates."""
+        df = pd.DataFrame({
+            'A': [1, 2, 3],
+            'B': ['x', 'y', 'z']
+        })
+        result = remove_duplicates(mock_context, df)
+        
+        assert result.shape[0] == 3
+        pd.testing.assert_frame_equal(result, df)
+
+    def test_remove_duplicates_all_duplicates(self, mock_context):
+        """Test remove_duplicates when all rows are identical."""
+        df = pd.DataFrame({
+            'A': [1, 1, 1],
+            'B': ['x', 'x', 'x']
+        })
+        result = remove_duplicates(mock_context, df)
+        
+        assert result.shape[0] == 1
+
+    def test_remove_duplicates_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test remove_duplicates with empty DataFrame."""
+        result = remove_duplicates(mock_context, empty_dataframe)
+        
+        assert result.shape[0] == 0
+        assert result.shape[1] == 0
+
+    def test_remove_duplicates_preserves_data_types(self, mock_context):
+        """Test that remove_duplicates preserves data types."""
+        df = pd.DataFrame({
+            'int_col': [1, 2, 1],
+            'str_col': ['a', 'b', 'a'],
+            'float_col': [1.5, 2.5, 1.5]
+        })
+        result = remove_duplicates(mock_context, df)
+        
+        assert result['int_col'].dtype == df['int_col'].dtype
+        assert result['str_col'].dtype == df['str_col'].dtype
+        assert result['float_col'].dtype == df['float_col'].dtype
+
+
+class TestFillMissingValues:
+    """Tests for the fill_missing_values operation."""
+
+    def test_fill_missing_values_basic(self, mock_context, dataframe_with_missing_values):
+        """Test basic missing value filling."""
+        config = FillMissingConfiguration(fill_map={'Column1': '0', 'Column2': 'N/A'})
+        result = fill_missing_values(mock_context, config, dataframe_with_missing_values)
+        
+        # Check that no NaN values remain
+        assert result['Column1'].isna().sum() == 0
+        assert result['Column2'].isna().sum() == 0
+
+    def test_fill_missing_values_with_different_values(self, mock_context):
+        """Test filling with different replacement values."""
+        df = pd.DataFrame({
+            'A': [1, None, 3],
+            'B': [None, 'b', 'c']
+        })
+        config = FillMissingConfiguration(fill_map={'A': '-1', 'B': 'UNKNOWN'})
+        result = fill_missing_values(mock_context, config, df)
+        
+        assert result.loc[1, 'A'] == '-1'
+        assert result.loc[0, 'B'] == 'UNKNOWN'
+
+    def test_fill_missing_values_partial_columns(self, mock_context):
+        """Test filling only specified columns."""
+        df = pd.DataFrame({
+            'A': [1, None, 3],
+            'B': [None, 'b', 'c']
+        })
+        config = FillMissingConfiguration(fill_map={'A': '999'})
+        result = fill_missing_values(mock_context, config, df)
+        
+        assert result.loc[1, 'A'] == '999'
+        assert pd.isna(result.loc[0, 'B'])  # B should still have NaN
+
+    def test_fill_missing_values_no_missing(self, mock_context):
+        """Test when there are no missing values."""
+        df = pd.DataFrame({
+            'A': ['1', '2', '3'],
+            'B': ['a', 'b', 'c']
+        })
+        config = FillMissingConfiguration(fill_map={'A': '0'})
+        result = fill_missing_values(mock_context, config, df)
+        
+        pd.testing.assert_frame_equal(result, df)
+
+    def test_fill_missing_values_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test with empty DataFrame."""
+        config = FillMissingConfiguration(fill_map={})
+        result = fill_missing_values(mock_context, config, empty_dataframe)
+        
+        assert result.shape[0] == 0
+
+
+class TestStandardizeCategoricalValues:
+    """Tests for the standardize_categorical_values operation."""
+
+    def test_standardize_categorical_basic(self, mock_context, sample_dataframe):
+        """Test basic categorical standardization."""
+        config = ColumnsSelectConfiguration(columns=['Name', 'City', 'Status'])
+        result = standardize_categorical_values(mock_context, config, sample_dataframe)
+        
+        # Check that values are lowercase and stripped
+        assert result['Name'].iloc[0] == 'john doe'
+        assert result['City'].iloc[1] == 'los angeles'
+        assert result['Status'].iloc[1] == 'inactive'
+
+    def test_standardize_categorical_single_column(self, mock_context):
+        """Test standardization on a single column."""
+        df = pd.DataFrame({
+            'City': ['  NEW YORK  ', 'LOS ANGELES', '  chicago  ']
+        })
+        config = ColumnsSelectConfiguration(columns=['City'])
+        result = standardize_categorical_values(mock_context, config, df)
+        
+        assert result['City'].iloc[0] == 'new york'
+        assert result['City'].iloc[1] == 'los angeles'
+        assert result['City'].iloc[2] == 'chicago'
+
+    def test_standardize_categorical_missing_column(self, mock_context, sample_dataframe):
+        """Test with non-existent column (should skip)."""
+        config = ColumnsSelectConfiguration(columns=['NonExistent', 'Name'])
+        result = standardize_categorical_values(mock_context, config, sample_dataframe)
+        
+        # Should process 'Name' column without error
+        assert result['Name'].iloc[0] == 'john doe'
+
+    def test_standardize_categorical_with_missing_values(self, mock_context):
+        """Test standardization with missing values."""
+        df = pd.DataFrame({
+            'Category': ['  ACTIVE  ', None, '  pending  ']
+        })
+        config = ColumnsSelectConfiguration(columns=['Category'])
+        result = standardize_categorical_values(mock_context, config, df)
+        
+        assert result['Category'].iloc[0] == 'active'
+        assert result['Category'].iloc[1] == ''
+        assert result['Category'].iloc[2] == 'pending'
+
+    def test_standardize_categorical_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test with empty DataFrame."""
+        config = ColumnsSelectConfiguration(columns=['A', 'B'])
+        result = standardize_categorical_values(mock_context, config, empty_dataframe)
+        
+        assert result.shape[0] == 0
+
+    def test_standardize_categorical_numeric_columns(self, mock_context):
+        """Test that numeric columns are converted to strings."""
+        df = pd.DataFrame({
+            'NumCol': [1, 2, 3]
+        })
+        config = ColumnsSelectConfiguration(columns=['NumCol'])
+        result = standardize_categorical_values(mock_context, config, df)
+        
+        assert result['NumCol'].iloc[0] == '1'
+        assert isinstance(result['NumCol'].iloc[0], str)
+
+
+class TestCorrectTypos:
+    """Tests for the correct_typos operation."""
+
+    def test_correct_typos_basic(self, mock_context):
+        """Test basic typo correction."""
+        df = pd.DataFrame({
+            'Name': ['jon', 'jayne', 'bob']
+        })
+        config = SpellCheckConfiguration(columns=['Name'], language='en')
+        result = correct_typos(mock_context, config, df)
+        
+        # Result should have corrections applied
+        assert result.shape[0] == 3
+
+    def test_correct_typos_missing_column(self, mock_context):
+        """Test with non-existent column (should skip)."""
+        df = pd.DataFrame({
+            'Name': ['jon', 'jayne']
+        })
+        config = SpellCheckConfiguration(columns=['NonExistent'], language='en')
+        result = correct_typos(mock_context, config, df)
+        
+        # Should not raise error, just skip
+        pd.testing.assert_frame_equal(result, df)
+
+    def test_correct_typos_with_missing_values(self, mock_context):
+        """Test typo correction with missing values."""
+        df = pd.DataFrame({
+            'Text': ['helo', '', 'wrld']
+        })
+        config = SpellCheckConfiguration(columns=['Text'], language='en')
+        result = correct_typos(mock_context, config, df)
+        
+        # Empty strings should be preserved
+        assert result.loc[1, 'Text'] == ''
+
+    def test_correct_typos_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test with empty DataFrame."""
+        config = SpellCheckConfiguration(columns=['A'], language='en')
+        result = correct_typos(mock_context, config, empty_dataframe)
+        
+        assert result.shape[0] == 0
+
+    def test_correct_typos_different_languages(self, mock_context):
+        """Test typo correction with different languages."""
+        df = pd.DataFrame({
+            'Text': ['ciao', 'mondo']
+        })
+        
+        for lang in ['en', 'es', 'it']:
+            config = SpellCheckConfiguration(columns=['Text'], language=lang)
+            result = correct_typos(mock_context, config, df)
+            
+            # Should process without error
+            assert result.shape[0] == 2
+
+    def test_correct_typos_numeric_values(self, mock_context):
+        """Test typo correction on numeric values converted to strings."""
+        df = pd.DataFrame({
+            'Values': [123, 456, 789]
+        })
+        config = SpellCheckConfiguration(columns=['Values'], language='en')
+        result = correct_typos(mock_context, config, df)
+        
+        # Numeric values should be converted to string and processed
+        assert result.shape[0] == 3
+
+class TestNormalizeDatetime:
+    """Tests for the normalize_datetime operation."""
+
+    def test_normalize_datetime_basic(self, mock_context):
+        """Test basic datetime normalization to ISO format."""
+        df = pd.DataFrame({
+            'date_col': ['2023-01-01 10:00:00', '2023-12-31T23:59:59']
+        })
+        
+        config = ColumnsSelectConfiguration(columns=['date_col'])
+        
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'date_col_iso' in result.columns
+        assert result['date_col_iso'].iloc[0] == '2023-01-01T10:00:00Z'
+        assert result['date_col_iso'].iloc[1] == '2023-12-31T23:59:59Z'
+
+    def test_normalize_datetime_missing_column(self, mock_context, sample_dataframe):
+        """Test behavior when a configured column is missing in the DataFrame."""
+        config = ColumnsSelectConfiguration(columns=['non_existent_column'])
+        
+        result = normalize_datetime(mock_context, config, sample_dataframe.copy())
+
+        pd.testing.assert_frame_equal(result, sample_dataframe)
+
+    def test_normalize_datetime_unparseable_values(self, mock_context):
+        """Test column with values that cannot be parsed as dates."""
+        df = pd.DataFrame({
+            'invalid_col': ['not-a-date', 'completely-random-text']
+        })
+        config = ColumnsSelectConfiguration(columns=['invalid_col'])
+        
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'invalid_col_iso' not in result.columns
+
+    def test_normalize_datetime_mixed_and_nulls(self, mock_context):
+        """Test column with mixed valid dates, invalid dates, and NaNs."""
+        df = pd.DataFrame({
+            'mixed_col': ['2023-05-01', None, 'invalid-date']
+        })
+        config = ColumnsSelectConfiguration(columns=['mixed_col'])
+        
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'mixed_col_iso' in result.columns
+        assert result['mixed_col_iso'].iloc[0] == '2023-05-01T00:00:00Z'
+        
+        assert result['mixed_col_iso'].iloc[1] == ""
+        assert result['mixed_col_iso'].iloc[2] == ""
+
+    def test_normalize_datetime_empty_dataframe(self, mock_context, empty_dataframe):
+        """Test with an empty DataFrame."""
+        config = ColumnsSelectConfiguration(columns=['some_col'])
+        
+        result = normalize_datetime(mock_context, config, empty_dataframe)
+        
+        assert result.empty
+
+    def test_normalize_datetime_epoch_only(self, mock_context, capsys):
+        """If parsing a column yields only the Unix epoch date, it should be skipped."""
+        df = pd.DataFrame({
+            'weird_col': ['0', 0, '0000', '']
+        })
+
+        config = ColumnsSelectConfiguration(columns=['weird_col'])
+
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'weird_col_iso' not in result.columns
+
+        captured = capsys.readouterr()
+        assert "all normalized values are '1970-01-01'" in captured.err
+
+    def test_normalize_datetime_all_1970_skipped(self, mock_context, capsys):
+        """If all formatted values are '1970-01-01', the column should be skipped with a warning."""
+        df = pd.DataFrame({
+            'ts_col': ['1970-01-01 05:30:00', '1970-01-01 12:00:00']
+        })
+
+        config = ColumnsSelectConfiguration(columns=['ts_col'])
+
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'ts_col_iso' not in result.columns
+
+        captured = capsys.readouterr()
+        assert "all normalized values are '1970-01-01'" in captured.err
+
+    def test_normalize_datetime_integer_age_column_skipped(self, mock_context, capsys):
+        """If an integer column like 'age' is passed, all values become 1970-01-01 and should be skipped."""
+        df = pd.DataFrame({
+            'age': [66, 45, 40, 43, 20, 26, 69, 21, 46]
+        })
+
+        config = ColumnsSelectConfiguration(columns=['age'])
+
+        result = normalize_datetime(mock_context, config, df.copy())
+
+        assert 'age_iso' not in result.columns
+
+        captured = capsys.readouterr()
+        assert "all normalized values are '1970-01-01'" in captured.err
+
+class TestNormalizeNumericMinMax:
+    """Tests for the normalize_numeric_min_max operation."""
+
+    def test_normalize_numeric_basic(self, mock_context):
+        """Test standard min-max normalization between 0 and 1."""
+        df = pd.DataFrame({
+            'score': [10, 20, 30, 40, 50]
+        })
+        config = ColumnsSelectConfiguration(columns=['score'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'score_norm' in result.columns
+        assert result['score_norm'].min() == 0.0
+        assert result['score_norm'].max() == 1.0
+        
+        assert result['score_norm'].iloc[2] == 0.5
+
+    def test_normalize_numeric_missing_column(self, mock_context):
+        """Test skipping of non-existent columns."""
+        df = pd.DataFrame({'existing': [1, 2, 3]})
+        config = ColumnsSelectConfiguration(columns=['missing_col'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'missing_col_norm' not in result.columns
+
+    def test_normalize_numeric_constant_values(self, mock_context):
+        """Test skipping when min == max to avoid division by zero."""
+        df = pd.DataFrame({
+            'constant': [10, 10, 10]
+        })
+        config = ColumnsSelectConfiguration(columns=['constant'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'constant_norm' not in result.columns
+
+    def test_normalize_numeric_with_nans(self, mock_context):
+        """Test normalization with NaN values (pandas min/max ignore NaNs by default)."""
+        df = pd.DataFrame({
+            'with_nans': [10, None, 50]
+        })
+        config = ColumnsSelectConfiguration(columns=['with_nans'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'with_nans_norm' in result.columns
+        assert result['with_nans_norm'].iloc[0] == 0.0
+        assert result['with_nans_norm'].iloc[2] == 1.0
+        assert pd.isna(result['with_nans_norm'].iloc[1])
+
+    def test_normalize_numeric_multiple_columns(self, mock_context):
+        """Test processing multiple columns in one call."""
+        df = pd.DataFrame({
+            'A': [1, 2],
+            'B': [10, 20]
+        })
+        config = ColumnsSelectConfiguration(columns=['A', 'B'])
+        
+        result = normalize_numeric_min_max(mock_context, config, df.copy())
+
+        assert 'A_norm' in result.columns
+        assert 'B_norm' in result.columns
+
+class TestNormalizeCoordinates:
+    """Tests for the normalize_coordinates operation."""
+
+    def test_normalize_coordinates_basic(self, mock_context):
+        """Test rounding and basic coordinate normalization."""
+        df = pd.DataFrame({
+            'lat': [45.123456, 46.0],
+            'lon': [9.123456, 10.0]
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert result['lat'].iloc[0] == 45.1235
+        assert result['lon'].iloc[0] == 9.1235
+        
+        assert len(result) == 2
+
+    def test_normalize_coordinates_filtering(self, mock_context):
+        """Test filtering of out-of-range coordinates."""
+        df = pd.DataFrame({
+            'lat': [45.0, 100.0, -91.0, 0.0],  # 100 e -91 sono out of range
+            'lon': [9.0, 0.0, 0.0, 200.0]      # 200 è out of range
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 1
+        assert result['lat'].iloc[0] == 45.0
+
+    def test_normalize_coordinates_invalid_types(self, mock_context):
+        """Test conversion of strings to numeric and handling of NaNs."""
+        df = pd.DataFrame({
+            'lat': ["45.5", "invalid", None],
+            'lon': ["9.5", "10.0", "11.0"]
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 1
+        assert isinstance(result['lat'].iloc[0], float)
+
+    def test_normalize_coordinates_empty_df(self, mock_context, empty_dataframe):
+        """Test with an empty DataFrame."""
+        
+        df = pd.DataFrame(columns=['lat', 'lon'])
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        
+        result = normalize_coordinates(mock_context, config, df)
+        
+        assert len(result) == 0
+        assert result.empty
+
+    def test_normalize_coordinates_default_config(self, mock_context):
+        """Test that normalize_coordinates uses default 'lat'/'lon' columns when no config is provided."""
+        df = pd.DataFrame({
+            'lat': [45.123456, 46.0],
+            'lon': [9.123456, 10.0]
+        })
+        config = CoordinatesNormalizationConfiguration()
+
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert result['lat'].iloc[0] == 45.1235
+        assert result['lon'].iloc[0] == 9.1235
+        assert len(result) == 2
+
+    def test_normalize_coordinates_null_config_values(self, mock_context):
+        """Test that null lat/lon column names fall back to defaults ('lat'/'lon')."""
+        df = pd.DataFrame({
+            'lat': [45.123456, 46.0],
+            'lon': [9.123456, 10.0]
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn=None, lonColumn=None)
+
+        assert config.latColumn == "lat"
+        assert config.lonColumn == "lon"
+
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert result['lat'].iloc[0] == 45.1235
+        assert result['lon'].iloc[0] == 9.1235
+        assert len(result) == 2
+
+    def test_normalize_coordinates_dms_degree_symbol(self, mock_context):
+        """Test DMS parsing with degree/minute/second symbols like 40°26'46\"N."""
+        df = pd.DataFrame({
+            'lat': ["40°26'46\"N", "51°30'26\"N"],
+            'lon': ["79°58'56\"W", "0°7'39\"W"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 2
+        # 40°26'46"N ≈ 40.4461
+        assert abs(result['lat'].iloc[0] - 40.4461) < 0.001
+        # 79°58'56"W ≈ -79.9822
+        assert abs(result['lon'].iloc[0] - (-79.9822)) < 0.001
+
+    def test_normalize_coordinates_dms_spaced_format(self, mock_context):
+        """Test DMS parsing with space-separated format like '40 26 46 N'."""
+        df = pd.DataFrame({
+            'lat': ["40 26 46 N"],
+            'lon': ["79 58 56 W"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 1
+        assert abs(result['lat'].iloc[0] - 40.4461) < 0.001
+        assert abs(result['lon'].iloc[0] - (-79.9822)) < 0.001
+
+    def test_normalize_coordinates_dms_already_decimal(self, mock_context):
+        """Test that string columns with decimal values are auto-parsed correctly."""
+        df = pd.DataFrame({
+            'lat': ["45.5", "46.0"],
+            'lon': ["9.5", "10.0"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 2
+        assert result['lat'].iloc[0] == 45.5
+        assert result['lon'].iloc[0] == 9.5
+
+    def test_normalize_coordinates_dms_mixed_valid_invalid(self, mock_context):
+        """Test auto-detection with a mix of valid DMS, valid decimal, and unparseable values."""
+        df = pd.DataFrame({
+            'lat': ["40°26'46\"N", "not_a_coord", "51.5"],
+            'lon': ["79°58'56\"W", "10.0", "0.1"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        # Row with "not_a_coord" for lat should be dropped (NaN lat)
+        assert len(result) == 2
+
+    def test_normalize_coordinates_dms_out_of_range(self, mock_context):
+        """Test that DMS-parsed coordinates outside valid range are filtered out."""
+        df = pd.DataFrame({
+            'lat': ["91°0'0\"N", "45°0'0\"N"],
+            'lon': ["0°0'0\"E", "9°0'0\"E"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        # First row has lat=91° which is out of [-90, 90]
+        assert len(result) == 1
+        assert abs(result['lat'].iloc[0] - 45.0) < 0.001
+
+    def test_normalize_coordinates_dms_south_and_east(self, mock_context):
+        """Test DMS parsing with south latitude and east longitude."""
+        df = pd.DataFrame({
+            'lat': ["33°51'54\"S"],
+            'lon': ["151°12'36\"E"]
+        })
+        config = CoordinatesNormalizationConfiguration(
+            latColumn='lat', lonColumn='lon'
+        )
+        result = normalize_coordinates(mock_context, config, df.copy())
+
+        assert len(result) == 1
+        # 33°51'54"S ≈ -33.865
+        assert result['lat'].iloc[0] < 0
+        assert abs(result['lat'].iloc[0] - (-33.865)) < 0.001
+        # 151°12'36"E ≈ 151.21
+        assert result['lon'].iloc[0] > 0
+        assert abs(result['lon'].iloc[0] - 151.21) < 0.01
+
+    def test_normalize_coordinates_autodetect_numeric_vs_dms(self, mock_context):
+        """Test that numeric columns are coerced directly while string columns are parsed as DMS."""
+        # Numeric columns — should go through pd.to_numeric path
+        df_numeric = pd.DataFrame({
+            'lat': [45.123456, 46.0],
+            'lon': [9.123456, 10.0]
+        })
+        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
+        result_numeric = normalize_coordinates(mock_context, config, df_numeric.copy())
+
+        assert result_numeric['lat'].iloc[0] == 45.1235
+        assert len(result_numeric) == 2
+
+        # String DMS columns — should go through _parse_dms_to_decimal path
+        df_dms = pd.DataFrame({
+            'lat': ["40°26'46\"N"],
+            'lon': ["79°58'56\"W"]
+        })
+        result_dms = normalize_coordinates(mock_context, config, df_dms.copy())
+
+        assert len(result_dms) == 1
+        assert abs(result_dms['lat'].iloc[0] - 40.4461) < 0.001
+
+class TestAddGlobalAggregations:
+    """Tests for the add_global_aggregations operation."""
+
+    def test_add_global_aggregations_success(self, mock_context):
+        """Test a successful group by and aggregation."""
+        df = pd.DataFrame({
+            'category': ['A', 'A', 'B'],
+            'value': [10, 20, 100],
+            'ignored_str': ['x', 'y', 'z']
+        })
+        
+        config = AggregationConfiguration(
+            columns=['category'], 
+            operation='sum'
+        )
+        
+        result = add_global_aggregations(mock_context, config, df.copy())
+
+        assert len(result) == 2 
+        assert result.loc[result['category'] == 'A', 'value'].values[0] == 30
+        assert result.loc[result['category'] == 'B', 'value'].values[0] == 100
+        assert 'ignored_str' not in result.columns
+        mock_context.log.info.assert_called()
+
+    def test_add_global_aggregations_missing_column(self, mock_context):
+        """Test skipping a column that does not exist in the dataframe."""
+        df = pd.DataFrame({'value': [1, 2, 3]})
+        config = AggregationConfiguration(
+            columns=['missing_col'], 
+            operation='count'
+        )
+
+        result = add_global_aggregations(mock_context, config, df.copy())
+
+        mock_context.log.warning.assert_any_call("Column 'missing_col' not found, skipping aggregation.")
+        assert len(result) == 1
+
+    def test_add_global_aggregations_unsupported_op(self, mock_context):
+        """Test the warning when an unsupported operation is provided."""
+        df = pd.DataFrame({'category': ['A'], 'value': [1]})
+        
+        config = AggregationConfiguration(
+            columns=['category'], 
+            operation='unsupported' 
+        )
+        
+        with pytest.raises(Exception):
+            add_global_aggregations(mock_context, config, df.copy())
+            
+        mock_context.log.warning.assert_any_call("Unsupported aggregation 'unsupported'")
+
+    def test_add_global_aggregations_only_numeric_kept(self, mock_context):
+        """Verify that non-numeric and non-grouping columns are dropped."""
+        df = pd.DataFrame({
+            'group': ['A', 'A'],
+            'num': [1, 2],
+            'text': ['hello', 'world']
+        })
+        config = AggregationConfiguration(columns=['group'], operation='mean')
+
+        result = add_global_aggregations(mock_context, config, df.copy())
+
+        assert 'text' not in result.columns
+        assert 'num' in result.columns
+        assert 'group' in result.columns
--- a/tests/dataframe_level_anonymisation/init.py
+++ b/tests/dataframe_level_anonymisation/init.py
@@ -0,0 +1 @@
+
--- a/tests/dataframe_level_anonymisation/config_models/init.py
+++ b/tests/dataframe_level_anonymisation/config_models/init.py
@@ -0,0 +1 @@
+
--- a/tests/dataframe_level_anonymisation/config_models/test_base_config.py
+++ b/tests/dataframe_level_anonymisation/config_models/test_base_config.py
@@ -0,0 +1,54 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.dataframe_level_anonymisation.config_models.base_config import BaseConfiguration
+
+
+def test_valid_configuration_with_overrides():
+    cfg = BaseConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        supp_level=10.0,
+        generalisation_hierarchies={"age": "age_hierarchy"},
+    )
+    assert cfg.ident == ["id"]
+    assert cfg.quasi_identifiers == ["age"]
+    assert cfg.supp_level == 10.0
+    assert cfg.generalisation_hierarchies == {"age": "age_hierarchy"}
+
+
+def test_default_values_are_loaded():
+    cfg = BaseConfiguration()
+    assert cfg.ident == ["Name"]
+    assert cfg.quasi_identifiers == ["Age"]
+    assert cfg.supp_level == 50.0
+    assert cfg.generalisation_hierarchies == {"Age": "simpl_age"}
+
+
+def test_missing_ident_raises_error():
+    with pytest.raises(ValidationError):
+        BaseConfiguration(
+            ident=[]
+        )
+
+
+def test_missing_quasi_ident_raises_error():
+    with pytest.raises(ValidationError):
+        BaseConfiguration(
+            quasi_identifiers=[]
+        )
+
+
+def test_overlap_between_ident_and_quasi_identifiers():
+    with pytest.raises(ValidationError):
+        BaseConfiguration(
+            ident=["age"],
+            quasi_identifiers=["age"]
+        )
+
+
+def test_supp_level_bounds():
+    with pytest.raises(ValidationError):
+        BaseConfiguration(
+            supp_level=150.0  # fuori range
+        )
--- a/tests/dataframe_level_anonymisation/config_models/test_hierarchies.py
+++ b/tests/dataframe_level_anonymisation/config_models/test_hierarchies.py
@@ -0,0 +1,48 @@
+from template_code_location.dataframe_level_anonymisation.config_models.hierarchies import (
+    simpl_age,
+    simpl_age2,
+    simpl_gender,
+    get_all_hierarchies,
+)
+
+
+def test_simpl_age_structure():
+    assert isinstance(simpl_age, dict)
+    assert 0 in simpl_age
+    assert isinstance(simpl_age[0], list)
+    # verify first level contains 100 ages
+    assert len(simpl_age[0]) == 100
+    assert simpl_age[0][0] == 0
+    assert simpl_age[0][-1] == 99
+
+
+def test_simpl_age2_structure():
+    assert isinstance(simpl_age2, dict)
+    assert 0 in simpl_age2
+    assert 1 in simpl_age2
+    assert isinstance(simpl_age2[0], list)
+    assert isinstance(simpl_age2[1], list)
+
+
+def test_simpl_gender_structure():
+    assert isinstance(simpl_gender, dict)
+    assert 0 in simpl_gender
+    assert 1 in simpl_gender
+    assert simpl_gender[0] == ["M", "F", "O"]
+    assert simpl_gender[1] == ["*", "*", "*"]
+
+
+def test_get_all_hierarchies():
+    hier = get_all_hierarchies()
+
+    # the function should return dicts only
+    assert isinstance(hier, dict)
+
+    # ensure expected dicts are included
+    assert "simpl_age" in hier
+    assert "simpl_age2" in hier
+    assert "simpl_gender" in hier
+
+    # ensure the values returned are references to the actual dicts
+    assert hier["simpl_age"] is simpl_age
+    assert hier["simpl_gender"] is simpl_gender
--- a/tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py
+++ b/tests/dataframe_level_anonymisation/config_models/test_k_anonymity_config.py
@@ -0,0 +1,41 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.dataframe_level_anonymisation.config_models.k_anonymity_configuration import (
+    KAnonymityConfiguration,
+)
+
+
+def test_valid_k_anonymity_config_with_overrides():
+    cfg = KAnonymityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        supp_level=5.0,
+        generalisation_hierarchies={"age": "age_hier"},
+        k=3,
+        sensitive_attributes=["disease"],
+    )
+    assert cfg.k == 3
+    assert cfg.sensitive_attributes == ["disease"]
+    assert cfg.generalisation_hierarchies == {"age": "age_hier"}
+
+
+def test_default_values_are_loaded():
+    cfg = KAnonymityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        generalisation_hierarchies={"age": "age_hier"}
+    )
+    assert cfg.k == 3
+    assert cfg.sensitive_attributes == ["Disease"]
+
+
+def test_invalid_k_value_raises_error():
+    with pytest.raises(ValidationError):
+        KAnonymityConfiguration(
+            ident=["id"],
+            quasi_identifiers=["age"],
+            generalisation_hierarchies={"age": "age_hier"},
+            k=1,  # invalid, must be >= 2
+            sensitive_attributes=["disease"],
+        )
--- a/tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py
+++ b/tests/dataframe_level_anonymisation/config_models/test_l_diversity_config.py
@@ -0,0 +1,44 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.dataframe_level_anonymisation.config_models.l_diversity_configuration import (
+    LDiversityConfiguration,
+)
+
+
+def test_valid_l_diversity_config_with_overrides():
+    cfg = LDiversityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        supp_level=5.0,
+        generalisation_hierarchies={"age": "age_hier"},
+        k=3,
+        l=2,
+        sensitive_attribute="disease",
+    )
+    assert cfg.k == 3
+    assert cfg.l == 2
+    assert cfg.sensitive_attribute == "disease"
+
+
+def test_default_values_are_loaded():
+    cfg = LDiversityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        generalisation_hierarchies={"age": "age_hier"}
+    )
+    assert cfg.k == 2
+    assert cfg.l == 3
+    assert cfg.sensitive_attribute == "Disease"
+
+
+def test_invalid_l_value_raises_error():
+    with pytest.raises(ValidationError):
+        LDiversityConfiguration(
+            ident=["id"],
+            quasi_identifiers=["age"],
+            generalisation_hierarchies={"age": "age_hier"},
+            k=3,
+            l=0,  # invalid, must be >= 1
+            sensitive_attribute="disease",
+        )
--- a/tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py
+++ b/tests/dataframe_level_anonymisation/config_models/test_t_closeness_config.py
@@ -0,0 +1,56 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.dataframe_level_anonymisation.config_models.t_closeness_configuration import (
+    TClosenessConfiguration,
+)
+
+
+def test_valid_t_closeness_config_with_overrides():
+    cfg = TClosenessConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        supp_level=5.0,
+        generalisation_hierarchies={"age": "age_hier"},
+        k=3,
+        t=0.4,
+        sensitive_attribute="disease",
+    )
+    assert cfg.k == 3
+    assert cfg.t == 0.4
+    assert cfg.sensitive_attribute == "disease"
+
+
+def test_default_values_are_loaded():
+    cfg = TClosenessConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        generalisation_hierarchies={"age": "age_hier"}
+    )
+    assert cfg.k == 2
+    assert cfg.t == 0.5
+    assert cfg.sensitive_attribute == "Disease"
+
+
+def test_invalid_t_value_low():
+    with pytest.raises(ValidationError):
+        TClosenessConfiguration(
+            ident=["id"],
+            quasi_identifiers=["age"],
+            generalisation_hierarchies={"age": "age_hier"},
+            k=3,
+            t=-0.1,  # invalid
+            sensitive_attribute="disease",
+        )
+
+
+def test_invalid_t_value_high():
+    with pytest.raises(ValidationError):
+        TClosenessConfiguration(
+            ident=["id"],
+            quasi_identifiers=["age"],
+            generalisation_hierarchies={"age": "age_hier"},
+            k=3,
+            t=2.0,  # invalid > 1
+            sensitive_attribute="disease",
+        )
--- a/tests/dataframe_level_anonymisation/test_jobs.py
+++ b/tests/dataframe_level_anonymisation/test_jobs.py
@@ -0,0 +1,44 @@
+from template_code_location.dataframe_level_anonymisation.jobs import (
+    k_anonymity_job,
+    l_diversity_job,
+    t_closeness_job,
+    k_anonymity_job_s3,
+    l_diversity_job_s3,
+    t_closeness_job_s3
+)
+
+
+def test_k_anonymity_job_is_callable():
+    """Test k_anonymity_job is a valid Dagster job"""
+    assert callable(k_anonymity_job)
+    assert hasattr(k_anonymity_job, 'execute_in_process')
+
+
+def test_l_diversity_job_is_callable():
+    """Test l_diversity_job is a valid Dagster job"""
+    assert callable(l_diversity_job)
+    assert hasattr(l_diversity_job, 'execute_in_process')
+
+
+def test_t_closeness_job_is_callable():
+    """Test t_closeness_job is a valid Dagster job"""
+    assert callable(t_closeness_job)
+    assert hasattr(t_closeness_job, 'execute_in_process')
+
+
+def test_k_anonymity_job_s3_is_callable():
+    """Test k_anonymity_job_s3 is a valid Dagster job"""
+    assert callable(k_anonymity_job_s3)
+    assert hasattr(k_anonymity_job_s3, 'execute_in_process')
+
+
+def test_l_diversity_job_s3_is_callable():
+    """Test l_diversity_job_s3 is a valid Dagster job"""
+    assert callable(l_diversity_job_s3)
+    assert hasattr(l_diversity_job_s3, 'execute_in_process')
+
+
+def test_t_closeness_job_s3_is_callable():
+    """Test t_closeness_job_s3 is a valid Dagster job"""
+    assert callable(t_closeness_job_s3)
+    assert hasattr(t_closeness_job_s3, 'execute_in_process')
--- a/tests/dataframe_level_anonymisation/test_ops.py
+++ b/tests/dataframe_level_anonymisation/test_ops.py
@@ -0,0 +1,230 @@
+import pytest
+import pandas as pd
+from unittest.mock import patch
+from dagster import DagsterInvalidInvocationError, build_op_context
+
+from template_code_location.dataframe_level_anonymisation.ops import (
+    apply_k_anonymity,
+    apply_l_diversity,
+    apply_t_closeness,
+)
+from template_code_location.dataframe_level_anonymisation.config_models import (
+    KAnonymityConfiguration,
+    LDiversityConfiguration,
+    TClosenessConfiguration,
+)
+
+
+# ---------------------------
+# Fixtures
+# ---------------------------
+@pytest.fixture
+def fake_df():
+    return pd.DataFrame({"id": [1, 2], "age": [30, 40]})
+
+
+@pytest.fixture
+def k_config():
+    return KAnonymityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        sensitive_attributes=["age"],
+        k=2,
+        supp_level=0.0,
+        generalisation_hierarchies={"age": "simpl_age"},
+    )
+
+
+@pytest.fixture
+def l_config():
+    return LDiversityConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        sensitive_attribute="age",
+        k=2,
+        l=1,
+        supp_level=0.0,
+        generalisation_hierarchies={"age": "simpl_age"},
+    )
+
+
+@pytest.fixture
+def t_config():
+    return TClosenessConfiguration(
+        ident=["id"],
+        quasi_identifiers=["age"],
+        sensitive_attribute="age",
+        k=2,
+        t=0.5,
+        supp_level=0.0,
+        generalisation_hierarchies={"age": "simpl_age"},
+    )
+
+
+@pytest.fixture
+def op_context():
+    return build_op_context()
+
+
+# ---------------------------
+# Helper for patching external functions
+# ---------------------------
+@pytest.fixture(autouse=True)
+def patch_external_ops():
+    with (
+        patch(
+            "dataframe_level_anonymisation.ops.get_all_hierarchies",
+            return_value={"simpl_age": {0: [30, 40]}},
+        ),
+        patch(
+            "dataframe_level_anonymisation.ops.k_anonymity",
+            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
+        ),
+        patch(
+            "dataframe_level_anonymisation.ops.l_diversity",
+            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
+        ),
+        patch(
+            "dataframe_level_anonymisation.ops.t_closeness",
+            return_value=pd.DataFrame({"id": [1, 2], "age": [30, 40]}),
+        ),
+    ):
+        yield
+
+
+# ---------------------------
+# Tests for apply_k_anonymity
+# ---------------------------
+def test_apply_k_anonymity_outputs(op_context, k_config, fake_df):
+    results = list(apply_k_anonymity(op_context, k_config, fake_df))
+    assert len(results) == 2
+
+    data_output = results[0].value
+    metrics_output = results[1].value
+
+    # Check types
+    assert isinstance(data_output, pd.DataFrame)
+    assert isinstance(metrics_output, dict)
+    assert "k_anon" in metrics_output
+    assert "l_div" in metrics_output
+    assert "t_clos" in metrics_output
+
+
+# ---------------------------
+# Tests for apply_l_diversity
+# ---------------------------
+def test_apply_l_diversity_outputs(op_context, l_config, fake_df):
+    results = list(apply_l_diversity(op_context, l_config, fake_df))
+    assert len(results) == 2
+
+    data_output = results[0].value
+    metrics_output = results[1].value
+
+    assert isinstance(data_output, pd.DataFrame)
+    assert isinstance(metrics_output, dict)
+    assert "k_anon" in metrics_output
+    assert "l_div" in metrics_output
+    assert "t_clos" in metrics_output
+
+
+def test_apply_l_diversity_empty_raises(op_context, l_config):
+    with patch("dataframe_level_anonymisation.ops.l_diversity", return_value=pd.DataFrame()):
+
+        with pytest.raises(DagsterInvalidInvocationError):
+            list(apply_l_diversity(op_context, l_config, pd.DataFrame({"id": [1], "age": [30]})))
+
+
+# ---------------------------
+# Tests for apply_t_closeness
+# ---------------------------
+def test_apply_t_closeness_outputs(op_context, t_config, fake_df):
+    results = list(apply_t_closeness(op_context, t_config, fake_df))
+    assert len(results) == 2
+
+    data_output = results[0].value
+    metrics_output = results[1].value
+
+    assert isinstance(data_output, pd.DataFrame)
+    assert isinstance(metrics_output, dict)
+    assert "k_anon" in metrics_output
+    assert "l_div" in metrics_output
+    assert "t_clos" in metrics_output
+
+
+def test_apply_t_closeness_empty_raises(op_context, t_config):
+    with patch("dataframe_level_anonymisation.ops.t_closeness", return_value=pd.DataFrame()):
+        with pytest.raises(DagsterInvalidInvocationError):
+            list(apply_t_closeness(op_context, t_config, pd.DataFrame({"id": [1], "age": [30]})))
+
+
+# ---------------------------
+# Additional tests for _validate_and_get_hierarchies
+# ---------------------------
+def test_validate_hierarchies_dataset_too_small(k_config):
+    small_df = pd.DataFrame({"id": [1], "age": [30]})
+    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
+
+    with pytest.raises(DagsterInvalidInvocationError):
+        _validate_and_get_hierarchies(k_config, small_df)
+
+
+def test_validate_hierarchies_missing_hierarchy(k_config, fake_df):
+    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
+
+    bad_config = k_config.model_copy(update={"generalisation_hierarchies": {}})
+
+    with pytest.raises(DagsterInvalidInvocationError):
+        _validate_and_get_hierarchies(bad_config, fake_df)
+
+
+def test_validate_hierarchies_hierarchy_not_in_code(k_config, fake_df):
+    from template_code_location.dataframe_level_anonymisation.ops import _validate_and_get_hierarchies
+
+    with patch("dataframe_level_anonymisation.ops.get_all_hierarchies", return_value={}):
+        with pytest.raises(DagsterInvalidInvocationError):
+            _validate_and_get_hierarchies(k_config, fake_df)
+
+
+# ---------------------------
+# Additional tests for _calc_dataframe_metrics
+# ---------------------------
+def test_calc_dataframe_metrics_basic():
+    from template_code_location.dataframe_level_anonymisation.ops import _calc_dataframe_metrics
+
+    df_org = pd.DataFrame({"age": [30, 40], "id": [1, 2]})
+    df_anon = df_org.copy()
+
+    with (
+        patch("dataframe_level_anonymisation.ops.anonymity.k_anonymity", return_value=2),
+        patch("dataframe_level_anonymisation.ops.anonymity.l_diversity", return_value=1),
+        patch("dataframe_level_anonymisation.ops.anonymity.t_closeness", return_value=0.1),
+    ):
+
+        report, metrics = _calc_dataframe_metrics(df_anon, df_org, ["age"], ["age"])
+
+        assert "k-anonymity" in report
+        assert metrics["k_anon"] == 2
+        assert metrics["l_div"] == 1
+        assert metrics["t_clos"] == 0.1
+
+
+# ---------------------------
+# Tests for apply_t_closeness exception branches
+# ---------------------------
+def test_apply_t_closeness_value_error_quasi_identifiers(op_context, t_config, fake_df):
+    """Covers the branch where ValueError contains 'Cannot be quasi-identifiers'."""
+    with patch(
+        "dataframe_level_anonymisation.ops.t_closeness",
+        side_effect=ValueError("Cannot be quasi-identifiers invalid"),
+    ):
+        with pytest.raises(DagsterInvalidInvocationError):
+            list(apply_t_closeness(op_context, t_config, fake_df))
+
+
+def test_apply_t_closeness_value_error_other_message(op_context, t_config, fake_df):
+    """Covers the branch where ValueError is raised but message does NOT contain that substring."""
+    with patch(
+        "dataframe_level_anonymisation.ops.t_closeness", side_effect=ValueError("Some other error")
+    ):
+        with pytest.raises(DagsterInvalidInvocationError):
+            list(apply_t_closeness(op_context, t_config, fake_df))
--- a/tests/dataframe_level_anonymisation/test_utils.py
+++ b/tests/dataframe_level_anonymisation/test_utils.py
@@ -0,0 +1,70 @@
+import numpy as np
+
+from template_code_location.dataframe_level_anonymisation.utils import (
+    parse_value_list,
+    normalize_hierarchy_levels,
+)
+
+
+# ------------------------------------
+# Tests for parse_value_list
+# ------------------------------------
+def test_parse_value_list_all_strings_digits():
+    values = ["1", "2", "3"]
+    assert parse_value_list(values) == [1, 2, 3]
+
+
+def test_parse_value_list_mixed_values():
+    values = ["1", 2, "abc", "5"]
+    assert parse_value_list(values) == [1, 2, "abc", 5]
+
+
+def test_parse_value_list_no_digits():
+    values = ["a", "b", "c"]
+    assert parse_value_list(values) == ["a", "b", "c"]
+
+
+# ------------------------------------
+# Tests for normalize_hierarchy_levels
+# ------------------------------------
+def test_normalize_hierarchy_levels_level_0_converted_to_numpy_array():
+    hierarchy = {"age": {"0": ["1", "2", "3"], "1": ["0-10", "11-20"]}}
+
+    normalized = normalize_hierarchy_levels(hierarchy)
+
+    assert "age" in normalized
+    assert 0 in normalized["age"]
+    assert isinstance(normalized["age"][0], np.ndarray)
+    assert normalized["age"][0].tolist() == [1, 2, 3]  # converted via parse_value_list
+    assert normalized["age"][1] == ["0-10", "11-20"]  # untouched
+
+
+def test_normalize_hierarchy_levels_multiple_columns():
+    hierarchy = {"age": {"0": ["10", "20"]}, "gender": {"0": ["M", "F"], "1": ["*"]}}
+
+    normalized = normalize_hierarchy_levels(hierarchy)
+
+    # First column
+    assert isinstance(normalized["age"][0], np.ndarray)
+    assert normalized["age"][0].tolist() == [10, 20]
+
+    # Second column
+    assert isinstance(normalized["gender"][0], np.ndarray)
+    assert normalized["gender"][0].tolist() == ["M", "F"]
+    assert normalized["gender"][1] == ["*"]
+
+
+def test_normalize_hierarchy_levels_mixed_digit_non_digit_at_level_0():
+    hierarchy = {"test": {"0": ["1", "x", "3"]}}
+
+    normalized = normalize_hierarchy_levels(hierarchy)
+
+    assert isinstance(normalized["test"][0], np.ndarray)
+    assert normalized["test"][0].tolist() == ["1", "x", "3"]
+
+
+def test_normalize_hierarchy_levels_empty_mapping():
+    hierarchy = {"col": {}}
+    normalized = normalize_hierarchy_levels(hierarchy)
+
+    assert normalized == {"col": {}}
--- a/tests/field_level_pseudo_anonymisation/init.py
+++ b/tests/field_level_pseudo_anonymisation/init.py
@@ -0,0 +1 @@
+
--- a/tests/field_level_pseudo_anonymisation/conftest.py
+++ b/tests/field_level_pseudo_anonymisation/conftest.py
@@ -0,0 +1,444 @@
+"""
+Shared pytest fixtures and helpers for field-level pseudonymisation tests.
+
+This module provides:
+- Mock Vault client for testing without real Vault connections
+- Sample data fixtures
+- Configuration fixtures for encryption/decryption operations
+- Helper functions for running ops and managing test Vault storage
+"""
+
+import pandas as pd
+import pytest
+from dagster import build_op_context
+from cryptography.fernet import Fernet
+from hvac.exceptions import InvalidPath, Forbidden
+from unittest.mock import patch, MagicMock
+
+from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
+    AnonymisePseudonymizeStructuredConfig,
+    DepseudonymizeStructuredConfig,
+    EncryptConfig,
+    DecryptConfig,
+    PseudoTechniqueConfig,
+    DepseudoTechniqueConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.ops import (
+    anonymize_pseudonymize_structured,
+    depseudonymize_structured,
+)
+
+
+# -------------------------------- Mock Vault Storage ----------------------------------------
+
+# In-memory Vault simulation for tests
+_test_vault_storage = {}
+_test_vault_access_control = {}  # For simulating access control
+
+
+@pytest.fixture(autouse=True)
+def mock_vault_client():
+    """
+    Auto-use fixture that mocks the hvac.Client to avoid real Vault connections.
+    Uses an in-memory dict to simulate Vault storage for tests.
+    Includes access control simulation for AC3.
+    """
+    global _test_vault_storage, _test_vault_access_control
+    _test_vault_storage = {}  # Reset storage before each test
+    _test_vault_access_control = {}  # Reset access control
+
+    def mock_read_secret(path, mount_point):
+        """Mock reading secret from Vault with access control"""
+        full_path = f"{mount_point}/{path}"
+
+        # Check access control first
+        if full_path in _test_vault_access_control:
+            if not _test_vault_access_control[full_path]:
+                raise Forbidden(f"Access denied to secret: {full_path}")
+
+        if full_path not in _test_vault_storage:
+            raise InvalidPath(f"Secret not found: {full_path}")
+        return {"data": {"data": {"value": _test_vault_storage[full_path]}}}
+
+    def mock_create_or_update_secret(path, mount_point, secret):
+        """Mock creating/updating secret in Vault"""
+        full_path = f"{mount_point}/{path}"
+        _test_vault_storage[full_path] = secret["value"]
+
+    def mock_delete_metadata(path, mount_point):
+        """Mock deleting secret from Vault"""
+        full_path = f"{mount_point}/{path}"
+        if full_path in _test_vault_storage:
+            del _test_vault_storage[full_path]
+        if full_path in _test_vault_access_control:
+            del _test_vault_access_control[full_path]
+
+    with patch("hvac.Client") as mock_client_class:
+        mock_instance = MagicMock()
+        mock_instance.secrets.kv.v2.read_secret_version.side_effect = mock_read_secret
+        mock_instance.secrets.kv.v2.create_or_update_secret.side_effect = (
+            mock_create_or_update_secret
+        )
+        mock_instance.secrets.kv.v2.delete_metadata_and_all_versions.side_effect = (
+            mock_delete_metadata
+        )
+        mock_client_class.return_value = mock_instance
+        yield mock_instance
+
+
+# -------------------------------- Sample Data Fixtures ----------------------------------------
+
+
+@pytest.fixture
+def sample_df():
+    """
+    Fixture providing a sample structured dataset with PII data.
+    Represents typical data that requires pseudonymisation and restoration.
+    """
+    return pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4, 5],
+            "name": [
+                "Alice Smith",
+                "Bob Jones",
+                "Charlie Brown",
+                "David Wilson",
+                "Eva Garcia",
+            ],
+            "email": [
+                "alice@example.com",
+                "bob@example.com",
+                "charlie@example.com",
+                "david@example.com",
+                "eva@example.com",
+            ],
+            "ssn": [
+                "123-45-6789",
+                "234-56-7890",
+                "345-67-8901",
+                "456-78-9012",
+                "567-89-0123",
+            ],
+            "age": [25, 30, 35, 40, 45],
+            "salary": [50000.0, 60000.0, 70000.0, 80000.0, 90000.0],
+            "department": ["HR", "IT", "Finance", "IT", "HR"],
+        }
+    )
+
+
+# -------------------------------- Configuration Fixtures ----------------------------------------
+
+
+@pytest.fixture
+def encrypt_config_single_field():
+    """
+    Configuration for encrypting a single field (email).
+    Used to create pseudonymised data for restoration tests.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def decrypt_config_single_field():
+    """
+    Configuration for decrypting a single field (email).
+    Used to restore original values.
+    """
+    return DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email"],
+                    key_name="test_restoration_key_single",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def encrypt_config_multiple_fields():
+    """
+    Configuration for encrypting multiple fields (name, email, ssn).
+    Tests restoration of multiple sensitive fields.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    columns=["name", "email", "ssn"],
+                    key_name="test_restoration_key_multi",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def decrypt_config_multiple_fields():
+    """
+    Configuration for decrypting multiple fields (name, email, ssn).
+    """
+    return DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["name", "email", "ssn"],
+                    key_name="test_restoration_key_multi",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def encrypt_config_partial_fields():
+    """
+    Configuration for encrypting only some fields (email, ssn).
+    Tests partial restoration scenarios.
+    """
+    return AnonymisePseudonymizeStructuredConfig(
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    columns=["email", "ssn"],
+                    key_name="test_restoration_key_partial",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def decrypt_config_partial_fields():
+    """
+    Configuration for decrypting only some fields (email, ssn).
+    """
+    return DepseudonymizeStructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(
+                    type="decrypt",
+                    columns=["email", "ssn"],
+                    key_name="test_restoration_key_partial",
+                )
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def authorized_multi_key_scenario():
+    """
+    Fixture for testing multi-key authorization scenarios.
+    Sets up two keys: one authorized, one denied.
+    """
+    clear_vault_key("authorized_key")
+    clear_vault_key("unauthorized_key")
+
+    # Create authorized key by generating it
+    authorized_key = Fernet.generate_key().decode()
+    set_vault_key("authorized_key", authorized_key)
+
+    # Create unauthorized key and deny access
+    unauthorized_key = Fernet.generate_key().decode()
+    set_vault_key("unauthorized_key", unauthorized_key)
+    deny_vault_access("unauthorized_key")
+
+    yield {"authorized": "authorized_key", "unauthorized": "unauthorized_key"}
+
+    # Cleanup
+    clear_vault_key("authorized_key")
+    clear_vault_key("unauthorized_key")
+
+
+@pytest.fixture
+def large_dataset():
+    """
+    Fixture providing a large dataset (10,000 rows) for performance testing.
+    Reusable across multiple performance tests.
+    """
+    return pd.DataFrame(
+        {
+            "id": range(1, 10001),
+            "email": [f"user{i}@example.com" for i in range(1, 10001)],
+            "name": [f"User {i}" for i in range(1, 10001)],
+            "ssn": [f"{i:03d}-{i:02d}-{i:04d}" for i in range(1, 10001)],
+            "age": [20 + (i % 50) for i in range(1, 10001)],
+            "salary": [30000.0 + (i * 10) for i in range(1, 10001)],
+            "department": [["HR", "IT", "Finance", "Sales"][i % 4] for i in range(1, 10001)],
+        }
+    )
+
+
+@pytest.fixture(scope="session")
+def vault_test_keys():
+    """
+    Session-scoped fixture to pre-generate test keys for faster test execution.
+    Avoids repeated key generation in each test.
+    """
+    keys = {f"test_key_{i}": Fernet.generate_key().decode() for i in range(10)}
+
+    return keys
+
+
+@pytest.fixture
+def cleanup_test_keys(request):
+    """
+    Fixture to automatically cleanup test keys after each test.
+    Use with: @pytest.mark.usefixtures("cleanup_test_keys")
+    """
+    yield
+
+    # Cleanup all test keys from mock Vault
+    test_keys = [k for k in _test_vault_storage.keys() if "test_" in k]
+    for key in test_keys:
+        _test_vault_storage.pop(key, None)
+
+
+# -------------------------------- Helper Functions ----------------------------------------
+
+
+def config_to_dagster_dict(config):
+    """
+    Convert Pydantic config to Dagster-compatible dictionary.
+
+    For AnonymisePseudonymizeStructuredConfig (uses discriminated Union):
+        Pydantic v2 outputs: {'technique': {'type': 'encrypt', 'columns': [...], 'key_name': '...'}}
+        Dagster expects: {'technique': {'encrypt': {'columns': [...], 'key_name': '...'}}}
+
+    For DepseudonymizeStructuredConfig (direct DecryptConfig, no Union):
+        Pydantic v2 outputs:
+        {'technique': {'type': 'decrypt', 'columns': [...], 'key_name': '...'}}
+        Dagster expects: Same flat structure with 'type' field
+
+    Args:
+        config: Pydantic config instance
+            (AnonymisePseudonymizeStructuredConfig or
+            DepseudonymizeStructuredConfig)
+
+    Returns:
+        dict: Dagster-compatible configuration dictionary
+    """
+    from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
+        AnonymisePseudonymizeStructuredConfig,
+    )
+
+    config_dict = config.model_dump()
+
+    # Only convert discriminated unions for AnonymisePseudonymizeStructuredConfig
+    # DepseudonymizeStructuredConfig uses direct DecryptConfig (no discriminated union)
+    if isinstance(config, AnonymisePseudonymizeStructuredConfig):
+        if "used_function" in config_dict:
+            for func_config in config_dict["used_function"]:
+                if "technique" in func_config:
+                    technique = func_config["technique"]
+                    # Pydantic outputs flat dict with 'type' field for discriminated unions
+                    if isinstance(technique, dict) and "type" in technique:
+                        # Extract the type discriminator
+                        technique_type = technique["type"]
+                        # Create nested structure without the 'type' field
+                        technique_data = {k: v for k, v in technique.items() if k != "type"}
+                        # Nest under the discriminator key for Dagster
+                        func_config["technique"] = {technique_type: technique_data}
+
+    return config_dict
+
+
+def run_encrypt_op(config, df):
+    """
+    Helper function to execute the anonymize_pseudonymize_structured op.
+
+    Args:
+        config: AnonymisePseudonymizeStructuredConfig instance
+        df: Input pandas DataFrame
+
+    Returns:
+        tuple: (result_df, metrics) - Output DataFrame and metrics dict
+    """
+    context = build_op_context(op_config=config_to_dagster_dict(config))
+    result_df, metrics = anonymize_pseudonymize_structured(context, df=df)
+    return result_df.value, metrics.value
+
+
+def run_decrypt_op(config, df):
+    """
+    Helper function to execute the depseudonymize_structured op.
+
+    Args:
+        config: DepseudonymizeStructuredConfig instance
+        df: Input pandas DataFrame
+
+    Returns:
+        tuple: (result_df, metrics) - Output DataFrame and metrics dict
+    """
+    context = build_op_context(op_config=config_to_dagster_dict(config))
+    result_df, metrics = depseudonymize_structured(context, df=df)
+    return result_df.value, metrics.value
+
+
+def clear_vault_key(key_name: str):
+    """
+    Helper function to clear a key from the simulated Vault storage for test isolation.
+
+    Args:
+        key_name: Name of the key to delete from Vault
+    """
+    full_path = f"secret/PseudonymKeys/{key_name}"
+    if full_path in _test_vault_storage:
+        del _test_vault_storage[full_path]
+    if full_path in _test_vault_access_control:
+        del _test_vault_access_control[full_path]
+
+
+def set_vault_key(key_name: str, key_value: str):
+    """
+    Helper function to set a key in the simulated Vault storage.
+
+    Args:
+        key_name: Name of the key
+        key_value: Value of the key (Fernet key as string)
+    """
+    full_path = f"secret/PseudonymKeys/{key_name}"
+    _test_vault_storage[full_path] = key_value
+
+
+def deny_vault_access(key_name: str):
+    """
+    Helper function to deny access to a key for authorization testing (AC3).
+
+    Args:
+        key_name: Name of the key to deny access to
+    """
+    full_path = f"secret/PseudonymKeys/{key_name}"
+    _test_vault_access_control[full_path] = False
+
+
+def get_vault_key(key_name: str) -> bytes:
+    """
+    Helper function to retrieve a key from the simulated Vault storage.
+
+    Args:
+        key_name: Name of the key to retrieve
+
+    Returns:
+        bytes: The encryption key
+    """
+    full_path = f"secret/PseudonymKeys/{key_name}"
+    if full_path not in _test_vault_storage:
+        raise InvalidPath(f"Key not found: {key_name}")
+    return _test_vault_storage[full_path].encode()
--- a/tests/field_level_pseudo_anonymisation/test_config_models_coverage.py
+++ b/tests/field_level_pseudo_anonymisation/test_config_models_coverage.py
@@ -0,0 +1,633 @@
+import pytest
+from pydantic import ValidationError
+
+from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
+    AnonymisePseudonymizeStructuredConfig,
+    DepseudonymizeStructuredConfig,
+    PseudoTechniqueConfig,
+    DepseudoTechniqueConfig,
+    HashConfig,
+    EncryptConfig,
+    RedactConfig,
+    ReplaceConfig,
+    DecryptConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.config_models.unstructured_config import (
+    AnonymisePseudonymizeUnstructuredConfig,
+    DepseudonymizeUnstructuredConfig,
+    PseudoTechniqueConfig as UnstructuredPseudoTechniqueConfig,
+    DepseudoTechniqueConfig as UnstructuredDepseudoTechniqueConfig,
+    HashConfig as UnstructuredHashConfig,
+    EncryptConfig as UnstructuredEncryptConfig,
+    RedactConfig as UnstructuredRedactConfig,
+    ReplaceConfig as UnstructuredReplaceConfig,
+    RetainConfig,
+    DecryptConfig as UnstructuredDecryptConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.config_models.languages import LanguageEnum
+from template_code_location.field_level_pseudo_anonymisation.config_models.pii_entities import PIIEntityEnum
+
+
+# ==================== Structured Config Tests ====================
+
+class TestStructuredConfigValidators:
+    """Tests for structured_config.py validators and validators."""
+    
+    def test_ensure_unique_columns_valid_single_technique(self):
+        """Test that single technique with single column passes validation."""
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        columns=["email"],
+                        key_name="key1"
+                    )
+                )
+            ]
+        )
+        assert config is not None
+        assert len(config.used_function) == 1
+    
+    def test_ensure_unique_columns_valid_multiple_techniques_different_columns(self):
+        """Test that multiple techniques with different columns passes validation."""
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        columns=["email"],
+                        key_name="key1"
+                    )
+                ),
+                PseudoTechniqueConfig(
+                    technique=HashConfig(
+                        columns=["ssn"],
+                        algorithm="sha256"
+                    )
+                )
+            ]
+        )
+        assert config is not None
+        assert len(config.used_function) == 2
+    
+    def test_ensure_unique_columns_duplicate_columns_same_technique(self):
+        """Test that duplicate columns in different techniques raises error."""
+        with pytest.raises(ValueError) as exc_info:
+            AnonymisePseudonymizeStructuredConfig(
+                used_function=[
+                    PseudoTechniqueConfig(
+                        technique=EncryptConfig(
+                            columns=["email"],
+                            key_name="key1"
+                        )
+                    ),
+                    PseudoTechniqueConfig(
+                        technique=HashConfig(
+                            columns=["email"],
+                            algorithm="sha256"
+                        )
+                    )
+                ]
+            )
+        assert "Duplicate column" in str(exc_info.value)
+        assert "email" in str(exc_info.value)
+    
+    def test_ensure_unique_columns_multiple_duplicates(self):
+        """Test error message with multiple duplicate columns."""
+        with pytest.raises(ValueError) as exc_info:
+            AnonymisePseudonymizeStructuredConfig(
+                used_function=[
+                    PseudoTechniqueConfig(
+                        technique=EncryptConfig(
+                            columns=["email", "phone"],
+                            key_name="key1"
+                        )
+                    ),
+                    PseudoTechniqueConfig(
+                        technique=HashConfig(
+                            columns=["email", "phone"],
+                            algorithm="sha256"
+                        )
+                    )
+                ]
+            )
+        error_msg = str(exc_info.value)
+        assert "Duplicate column" in error_msg
+        assert "email" in error_msg
+        assert "phone" in error_msg
+    
+    def test_collect_column_to_techniques_single_technique(self):
+        """Test _collect_column_to_techniques with single technique."""
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        columns=["email", "phone"],
+                        key_name="key1"
+                    )
+                )
+            ]
+        )
+        mapping = config._collect_column_to_techniques()
+        assert mapping == {
+            "email": ["encrypt"],
+            "phone": ["encrypt"]
+        }
+    
+    def test_extract_technique_and_columns_dict_with_type_field(self):
+        """Test _extract_technique_and_columns with dict containing 'type' field."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {
+                "technique": {
+                    "type": "encrypt",
+                    "columns": ["email", "ssn"],
+                    "key_name": "test_key"
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert columns == ["email", "ssn"]
+    
+    def test_extract_technique_and_columns_dict_with_variant_mapping(self):
+        """Test _extract_technique_and_columns with variant-key mapping {'hash': {...}}."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {
+                "technique": {
+                    "encrypt": {
+                        "columns": ["ssn"],
+                        "key_name": "test_key"
+                    }
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert columns == ["ssn"]
+    
+    def test_extract_technique_and_columns_model_instance(self):
+        """Test _extract_technique_and_columns with PseudoTechniqueConfig model instance."""
+        pseudo_config = PseudoTechniqueConfig(
+            technique=RedactConfig(columns=["address"])
+        )
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(pseudo_config)
+        assert technique_type == "redact"
+        assert columns == ["address"]
+    
+    def test_extract_technique_and_columns_empty_dict(self):
+        """Test _extract_technique_and_columns with empty dict."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {"technique": {}}
+        )
+        assert technique_type is None
+        assert columns == []
+    
+    def test_extract_technique_and_columns_none_technique(self):
+        """Test _extract_technique_and_columns with None technique."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {"technique": None}
+        )
+        assert technique_type is None
+        assert columns == []
+    
+    def test_extract_technique_and_columns_missing_columns_key(self):
+        """Test _extract_technique_and_columns when 'columns' key is missing."""
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(
+            {
+                "technique": {
+                    "type": "encrypt",
+                    "key_name": "test_key"
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert columns == []
+    
+    def test_extract_technique_and_columns_model_without_columns_attr(self):
+        """Test _extract_technique_and_columns with model instance missing columns attribute."""
+        pseudo_config = PseudoTechniqueConfig(
+            technique=ReplaceConfig(columns=["old_value"], new_value="NEW")
+        )
+        config = AnonymisePseudonymizeStructuredConfig()
+        technique_type, columns = config._extract_technique_and_columns(pseudo_config)
+        assert technique_type == "replace"
+        assert columns == ["old_value"]
+
+
+class TestStructuredDepseudonymizeConfig:
+    """Tests for DepseudonymizeStructuredConfig."""
+    
+    def test_depseudonymize_config_normalize_used_function_with_dict(self):
+        """Test _normalize_depseudo_used_function with dict input."""
+        config = DepseudonymizeStructuredConfig(
+            used_function=[
+                {
+                    "technique": {
+                        "type": "decrypt",
+                        "columns": ["email"],
+                        "key_name": "key1"
+                    }
+                }
+            ]
+        )
+        assert len(config.used_function) == 1
+        assert isinstance(config.used_function[0], DepseudoTechniqueConfig)
+        assert config.used_function[0].technique.type == "decrypt"
+    
+    def test_depseudonymize_config_normalize_used_function_with_model(self):
+        """Test _normalize_depseudo_used_function with model instance."""
+        depseudo_tech = DepseudoTechniqueConfig(
+            technique=DecryptConfig(
+                columns=["email"],
+                key_name="key1"
+            )
+        )
+        config = DepseudonymizeStructuredConfig(
+            used_function=[depseudo_tech]
+        )
+        assert len(config.used_function) == 1
+        assert config.used_function[0] is depseudo_tech
+    
+    def test_depseudonymize_config_ensure_unique_columns_no_op(self):
+        """Test that ensure_unique_columns is a no-op for depseudonymize."""
+        # For depseudonymize, there's no per-column uniqueness constraint
+        config = DepseudonymizeStructuredConfig(
+            used_function=[
+                DepseudoTechniqueConfig(
+                    technique=DecryptConfig(
+                        columns=["email"],
+                        key_name="key1"
+                    )
+                ),
+                DepseudoTechniqueConfig(
+                    technique=DecryptConfig(
+                        columns=["email"],
+                        key_name="key2"
+                    )
+                )
+            ]
+        )
+        # Should not raise - no-op validator
+        assert config is not None
+
+
+# ==================== Unstructured Config Tests ====================
+
+class TestUnstructuredConfigValidators:
+    """Tests for unstructured_config.py validators."""
+    
+    def test_normalize_used_function_with_dict(self):
+        """Test _normalize_used_function with dict input."""
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[
+                {
+                    "technique": {
+                        "encrypt": {
+                            "pii": [PIIEntityEnum.EMAIL.value],
+                            "key_name": "key1"
+                        }
+                    }
+                }
+            ]
+        )
+        assert len(config.used_function) == 1
+    
+    def test_normalize_used_function_with_model(self):
+        """Test _normalize_used_function with model instance."""
+        pseudo_tech = UnstructuredPseudoTechniqueConfig(
+            technique=UnstructuredEncryptConfig(
+                pii=[PIIEntityEnum.EMAIL.value],
+                key_name="key1"
+            )
+        )
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[pseudo_tech]
+        )
+        assert len(config.used_function) == 1
+    
+    def test_ensure_unique_pii_valid_different_pii_types(self):
+        """Test that different PII types pass validation."""
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredEncryptConfig(
+                        pii=[PIIEntityEnum.EMAIL.value],
+                        key_name="key1"
+                    )
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredHashConfig(
+                        pii=[PIIEntityEnum.PERSON.value],
+                        algorithm="sha256"
+                    )
+                )
+            ]
+        )
+        assert config is not None
+        assert len(config.used_function) == 2
+    
+    def test_ensure_unique_pii_duplicate_pii_types(self):
+        """Test that duplicate PII types raise error."""
+        with pytest.raises(ValueError) as exc_info:
+            AnonymisePseudonymizeUnstructuredConfig(
+                language=LanguageEnum.en,
+                used_function=[
+                    UnstructuredPseudoTechniqueConfig(
+                        technique=UnstructuredEncryptConfig(
+                            pii=[PIIEntityEnum.EMAIL.value],
+                            key_name="key1"
+                        )
+                    ),
+                    UnstructuredPseudoTechniqueConfig(
+                        technique=UnstructuredHashConfig(
+                            pii=[PIIEntityEnum.EMAIL.value],
+                            algorithm="sha256"
+                        )
+                    )
+                ]
+            )
+        assert "Duplicate PII" in str(exc_info.value)
+        # Error message shows PIIEntityEnum.EMAIL (the enum repr) rather than the value
+        assert "EMAIL" in str(exc_info.value)
+    
+    def test_collect_pii_to_techniques_single_technique(self):
+        """Test _collect_pii_to_techniques with single technique."""
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredEncryptConfig(
+                        pii=[PIIEntityEnum.EMAIL.value, PIIEntityEnum.PERSON.value],
+                        key_name="key1"
+                    )
+                )
+            ]
+        )
+        mapping = config._collect_pii_to_techniques()
+        assert mapping == {
+            PIIEntityEnum.EMAIL.value: ["encrypt"],
+            PIIEntityEnum.PERSON.value: ["encrypt"]
+        }
+    
+    def test_extract_technique_and_pii_dict_with_type_field(self):
+        """Test _extract_technique_and_pii with dict containing 'type' field."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {
+                "technique": {
+                    "type": "encrypt",
+                    "pii": [PIIEntityEnum.EMAIL.value],
+                    "key_name": "test_key"
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert piis == [PIIEntityEnum.EMAIL.value]
+    
+    def test_extract_technique_and_pii_dict_with_variant_mapping(self):
+        """Test _extract_technique_and_pii with variant-key mapping."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {
+                "technique": {
+                    "hash": {
+                        "pii": [PIIEntityEnum.PERSON.value],
+                        "algorithm": "sha256"
+                    }
+                }
+            }
+        )
+        assert technique_type == "hash"
+        assert piis == [PIIEntityEnum.PERSON.value]
+    
+    def test_extract_technique_and_pii_dict_fallback_to_columns(self):
+        """Test _extract_technique_and_pii fallback to 'columns' key when 'pii' is missing."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {
+                "technique": {
+                    "type": "redact",
+                    "columns": ["fallback_col"]
+                }
+            }
+        )
+        assert technique_type == "redact"
+        assert piis == ["fallback_col"]
+    
+    def test_extract_technique_and_pii_model_instance(self):
+        """Test _extract_technique_and_pii with model instance."""
+        pseudo_tech = UnstructuredPseudoTechniqueConfig(
+            technique=UnstructuredRedactConfig(
+                pii=[PIIEntityEnum.EMAIL.value]
+            )
+        )
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(pseudo_tech)
+        assert technique_type == "redact"
+        assert piis == [PIIEntityEnum.EMAIL.value]
+    
+    def test_extract_technique_and_pii_model_with_getattr_fallback(self):
+        """Test _extract_technique_and_pii model with getattr fallback to columns."""
+        # Create a mock-like scenario where pii attribute doesn't exist
+        pseudo_tech = UnstructuredPseudoTechniqueConfig(
+            technique=RetainConfig(pii=[PIIEntityEnum.PERSON.value])
+        )
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(pseudo_tech)
+        assert technique_type == "retain"
+        assert piis == [PIIEntityEnum.PERSON.value]
+    
+    def test_extract_technique_and_pii_empty_dict(self):
+        """Test _extract_technique_and_pii with empty dict."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {"technique": {}}
+        )
+        assert technique_type is None
+        assert piis == []
+    
+    def test_extract_technique_and_pii_missing_pii_key(self):
+        """Test _extract_technique_and_pii when 'pii' key is missing."""
+        config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
+        technique_type, piis = config._extract_technique_and_pii(
+            {
+                "technique": {
+                    "type": "encrypt",
+                    "key_name": "test_key"
+                }
+            }
+        )
+        assert technique_type == "encrypt"
+        assert piis == []
+
+
+class TestUnstructuredDepseudonymizeConfig:
+    """Tests for DepseudonymizeUnstructuredConfig."""
+    
+    def test_depseudonymize_unstructured_config_default(self):
+        """Test default DepseudonymizeUnstructuredConfig."""
+        config = DepseudonymizeUnstructuredConfig()
+        assert config is not None
+        assert len(config.used_function) >= 1
+    
+    def test_depseudonymize_unstructured_config_with_custom_function(self):
+        """Test DepseudonymizeUnstructuredConfig with custom function."""
+        config = DepseudonymizeUnstructuredConfig(
+            used_function=[
+                UnstructuredDepseudoTechniqueConfig(
+                    technique=UnstructuredDecryptConfig(
+                        key_name="custom_key"
+                    )
+                )
+            ]
+        )
+        assert len(config.used_function) == 1
+        assert config.used_function[0].technique.key_name == "custom_key"
+
+
+class TestLanguageSupport:
+    """Tests for language configuration support."""
+    
+    def test_all_supported_languages(self):
+        """Test that all supported languages can be set."""
+        supported_languages = [
+            LanguageEnum.hr, LanguageEnum.da, LanguageEnum.nl, LanguageEnum.en,
+            LanguageEnum.fi, LanguageEnum.fr, LanguageEnum.de, LanguageEnum.el,
+            LanguageEnum.it, LanguageEnum.lt, LanguageEnum.pl, LanguageEnum.pt,
+            LanguageEnum.ro, LanguageEnum.sl, LanguageEnum.es, LanguageEnum.sv
+        ]
+        
+        for lang in supported_languages:
+            config = AnonymisePseudonymizeUnstructuredConfig(language=lang)
+            assert config.language == lang
+    
+    def test_default_language_is_english(self):
+        """Test that default language is English."""
+        config = AnonymisePseudonymizeUnstructuredConfig()
+        assert config.language == LanguageEnum.en
+
+
+class TestTechniqueConfigDefaults:
+    """Tests for technique config defaults."""
+    
+    def test_hash_config_default_algorithm(self):
+        """Test HashConfig default algorithm."""
+        config = HashConfig()
+        assert config.algorithm == "sha256"
+        assert config.type == "hash"
+    
+    def test_encrypt_config_defaults(self):
+        """Test EncryptConfig defaults."""
+        config = EncryptConfig()
+        assert config.type == "encrypt"
+        assert config.key_name == "my_key"
+    
+    def test_redact_config_defaults(self):
+        """Test RedactConfig defaults."""
+        config = RedactConfig()
+        assert config.type == "redact"
+    
+    def test_replace_config_defaults(self):
+        """Test ReplaceConfig defaults."""
+        config = ReplaceConfig()
+        assert config.type == "replace"
+        assert config.new_value == "REPLACED"
+    
+    def test_decrypt_config_defaults(self):
+        """Test DecryptConfig defaults."""
+        config = DecryptConfig()
+        assert config.type == "decrypt"
+        assert config.key_name == "my_key"
+    
+    def test_unstructured_retain_config_defaults(self):
+        """Test RetainConfig defaults."""
+        config = RetainConfig()
+        assert config.type == "retain"
+
+
+class TestPseudoTechniqueConfigDefaults:
+    """Tests for PseudoTechniqueConfig defaults."""
+    
+    def test_pseudo_technique_default_to_hash(self):
+        """Test PseudoTechniqueConfig defaults to hash technique."""
+        config = PseudoTechniqueConfig()
+        # For Dagster Config, technique may be a dict with the discriminator structure
+        if isinstance(config.technique, dict):
+            # Check if it has hash configuration
+            assert "hash" in config.technique or config.technique.get("type") == "hash"
+        else:
+            assert config.technique.type == "hash"
+    
+    def test_unstructured_pseudo_technique_default_to_hash(self):
+        """Test UnstructuredPseudoTechniqueConfig defaults to hash technique."""
+        config = UnstructuredPseudoTechniqueConfig()
+        # For Dagster Config, technique may be a dict with the discriminator structure
+        if isinstance(config.technique, dict):
+            # Check if it has hash configuration
+            assert "hash" in config.technique or config.technique.get("type") == "hash"
+        else:
+            assert config.technique.type == "hash"
+
+
+class TestConfigModelIntegration:
+    """Integration tests for config models."""
+    
+    def test_structured_config_with_all_technique_types(self):
+        """Test structured config with all technique types."""
+        config = AnonymisePseudonymizeStructuredConfig(
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=HashConfig(columns=["col1"])
+                ),
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(columns=["col2"], key_name="k1")
+                ),
+                PseudoTechniqueConfig(
+                    technique=RedactConfig(columns=["col3"])
+                ),
+                PseudoTechniqueConfig(
+                    technique=ReplaceConfig(columns=["col4"], new_value="X")
+                )
+            ]
+        )
+        assert len(config.used_function) == 4
+        techniques = {f.technique.type for f in config.used_function}
+        assert techniques == {"hash", "encrypt", "redact", "replace"}
+    
+    def test_unstructured_config_with_all_technique_types(self):
+        """Test unstructured config with all technique types."""
+        config = AnonymisePseudonymizeUnstructuredConfig(
+            language=LanguageEnum.en,
+            used_function=[
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredHashConfig(pii=[PIIEntityEnum.EMAIL.value])
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredEncryptConfig(
+                        pii=[PIIEntityEnum.PERSON.value],
+                        key_name="k1"
+                    )
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredRedactConfig(pii=[PIIEntityEnum.PHONE_NUMBERS.value])
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=UnstructuredReplaceConfig(
+                        pii=[PIIEntityEnum.CREDIT_CARD.value],
+                        new_value="X"
+                    )
+                ),
+                UnstructuredPseudoTechniqueConfig(
+                    technique=RetainConfig(pii=[PIIEntityEnum.DATE_OF_BIRTH.value])
+                )
+            ]
+        )
+        assert len(config.used_function) == 5
+        techniques = {f.technique.type for f in config.used_function}
+        assert techniques == {"hash", "encrypt", "redact", "replace", "retain"}
--- a/tests/field_level_pseudo_anonymisation/test_decrypt_structured.py
+++ b/tests/field_level_pseudo_anonymisation/test_decrypt_structured.py
--- a/tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py
+++ b/tests/field_level_pseudo_anonymisation/test_decrypt_unstructured.py
@@ -0,0 +1,288 @@
+"""
+Test suite for data restoration (depseudonymisation) of unstructured text.
+
+## Test Coverage Summary
+
+### Acceptance Criteria Coverage:
+- AC1 (Data Restoration with Valid Key): 2 tests
+- AC2 (Restoration Denial - Missing Key): 1 test
+- AC3 (Restoration Denial - Unauthorized Access): 1 test
+- AC4 (Restoration Denial - Invalid Key): 1 test
+- Additional Coverage: 2 tests (edge cases)
+
+### Test Pattern:
+- Each test uses build_op_context with .model_dump() for configuration
+- Tests validate dual outputs (data, metrics)
+- Tests verify complete restoration of original text
+- Tests validate security controls and error handling
+- Tests use descriptive names mapping to AC scenarios
+
+"""
+
+import pytest
+from unittest.mock import patch
+from cryptography.fernet import Fernet
+from dagster import build_op_context
+
+from src.field_level_pseudo_anonymisation.unstructured_ops import (
+    depseudonymize_unstructured,
+)
+from src.field_level_pseudo_anonymisation.config_models.unstructured_config import (
+    DepseudonymizeUnstructuredConfig,
+    DecryptConfig,
+    DepseudoTechniqueConfig,
+)
+
+
+@pytest.fixture
+def fernet_key() -> bytes:
+    """Generate a valid Fernet key for encryption in tests."""
+    return Fernet.generate_key()
+
+
+@pytest.fixture
+def encrypted_text_data(fernet_key: bytes) -> dict:
+    """
+    Create encrypted data for testing decryption.
+
+    Returns a dict with:
+    - original_text: The unencrypted text
+    - encrypted_text: Text with PII values encrypted in {encrypt:...} format
+    """
+    original_text = "My name is John Doe and my email is john.doe@example.com."
+    fernet = Fernet(fernet_key)
+    encrypted_name = fernet.encrypt(b"John Doe").decode()
+    encrypted_email = fernet.encrypt(b"john.doe@example.com").decode()
+    encrypted_text = (
+        f"My name is {{encrypt:{encrypted_name}}} and my email is {{encrypt:{encrypted_email}}}."
+    )
+    return {
+        "original_text": original_text,
+        "encrypted_text": encrypted_text,
+    }
+
+
+# ---------------------- AC1: Data Restoration with Valid Key --------------------------------
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac1_restore_encrypted_pii_entities_with_valid_key(
+    mock_create_get_key, fernet_key: bytes, encrypted_text_data: dict
+):
+    """AC1: Restore encrypted PII entities with a valid key from secret management tool."""
+    # Arrange - Mock the Vault key retrieval to return the valid key
+    mock_create_get_key.return_value = fernet_key
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="test_key"))
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act - Request data restoration
+    result_gen = depseudonymize_unstructured(
+        context, input_text=encrypted_text_data["encrypted_text"]
+    )
+    data_output = next(result_gen)
+    metrics_output = next(result_gen)
+
+    # Assert - Verify successful restoration
+    # 1. All original values are restored exactly
+    assert (
+        data_output.value == encrypted_text_data["original_text"]
+    ), "Original text should be fully restored"
+
+    # 2. Correct output structure
+    assert data_output.output_name == "data", "Output should be named 'data'"
+
+    # 3. Metrics show correct number of restored entities
+    assert (
+        metrics_output.value["total_depseudo_count"] == 2
+    ), "Should restore 2 encrypted entities (name and email)"
+
+    # 4. System retrieved key from secret management tool
+    mock_create_get_key.assert_called_once_with("decrypt", "test_key")
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac1_restore_multiple_pii_types_with_valid_key(mock_create_get_key, fernet_key: bytes):
+    """AC1: Restore multiple encrypted PII entity types (name, email, phone) with a valid key."""
+    # Arrange - Create text with multiple PII types encrypted
+    original_text = "Contact John Doe at john.doe@example.com or call 555-1234."
+    fernet = Fernet(fernet_key)
+    encrypted_name = fernet.encrypt(b"John Doe").decode()
+    encrypted_email = fernet.encrypt(b"john.doe@example.com").decode()
+    encrypted_phone = fernet.encrypt(b"555-1234").decode()
+    encrypted_text = (
+        f"Contact {{encrypt:{encrypted_name}}} at "
+        f"{{encrypt:{encrypted_email}}} or call {{encrypt:{encrypted_phone}}}."
+    )
+
+    mock_create_get_key.return_value = fernet_key
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", key_name="multi_pii_key")
+            )
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act
+    result_gen = depseudonymize_unstructured(context, input_text=encrypted_text)
+    data_output = next(result_gen)
+    metrics_output = next(result_gen)
+
+    # Assert
+    assert data_output.value == original_text, "All PII types should be restored"
+    assert (
+        metrics_output.value["total_depseudo_count"] == 3
+    ), "Should restore 3 encrypted entities (name, email, phone)"
+    mock_create_get_key.assert_called_once_with("decrypt", "multi_pii_key")
+
+
+# ------------------- AC2: Restoration Denial when Key is Missing ----------------------------
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac2_restoration_denial_when_key_missing(mock_create_get_key, encrypted_text_data: dict):
+    """AC2: Deny restoration when decryption key is missing from secret management tool."""
+    # Arrange - Mock Vault to indicate key is missing
+    mock_create_get_key.side_effect = ValueError(
+        "Fernet key 'non_existent_key' not found in Vault for decrypt."
+    )
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", key_name="non_existent_key")
+            )
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act & Assert - Verify system fails the restoration request
+    with pytest.raises(
+        ValueError,
+        match="Fernet key 'non_existent_key' not found in Vault for decrypt.",
+    ) as exc_info:
+        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
+
+    # Verify error message is clear and actionable
+    assert "not found in Vault" in str(
+        exc_info.value
+    ), "Error message should indicate key is missing from Vault"
+
+    # Verify system attempted to retrieve the key (logged attempt)
+    mock_create_get_key.assert_called_once_with("decrypt", "non_existent_key")
+
+
+# ------------- AC3: Restoration Denial when Access is Unauthorized --------------------------
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac3_restoration_denial_when_unauthorized_access(
+    mock_create_get_key, encrypted_text_data: dict
+):
+    """AC3: Deny restoration when participant is not authorized to access the decryption key."""
+    # Arrange - Mock Vault to deny access
+    mock_create_get_key.side_effect = ValueError("Access denied to secret: unauthorized_key")
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(
+                technique=DecryptConfig(type="decrypt", key_name="unauthorized_key")
+            )
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act & Assert - Verify system denies access
+    with pytest.raises(ValueError, match="Access denied to secret: unauthorized_key") as exc_info:
+        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
+
+    # Verify error message clearly indicates access denial
+    assert "Access denied" in str(
+        exc_info.value
+    ), "Error message should clearly indicate access was denied"
+
+    # Verify the unauthorized access attempt was logged (function was called)
+    mock_create_get_key.assert_called_once_with("decrypt", "unauthorized_key")
+
+
+# ------------------- AC4: Restoration Denial when Key is Invalid ----------------------------
+
+
+@patch("src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key")
+def test_ac4_restoration_denial_when_key_invalid(mock_create_get_key, encrypted_text_data: dict):
+    """AC4: Deny restoration when decryption key does not correspond to the encrypted fields."""
+    # Arrange - Mock Vault to return a different (wrong) key
+    invalid_key = Fernet.generate_key()  # A different, incorrect key
+    mock_create_get_key.return_value = invalid_key
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="wrong_key"))
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act & Assert - Verify system fails the restoration
+    with pytest.raises(ValueError, match="Invalid Fernet token") as exc_info:
+        list(depseudonymize_unstructured(context, input_text=encrypted_text_data["encrypted_text"]))
+
+    # Verify error message indicates decryption failure
+    assert "Invalid Fernet token" in str(
+        exc_info.value
+    ), "Error message should indicate the key is invalid for this data"
+
+    # Verify key was retrieved (system attempted decryption)
+    mock_create_get_key.assert_called_once_with("decrypt", "wrong_key")
+
+
+# -------------------------------- Additional Edge Cases ----------------------------------------
+
+
+def test_depseudonymize_unstructured_no_decrypt_config():
+    """Edge case: Text is returned unchanged when no decryption techniques are configured."""
+    # Arrange
+    original_text = "This text has no {encrypt:values} to decrypt."
+    config = DepseudonymizeUnstructuredConfig(used_function=[])  # No techniques
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act
+    result_gen = depseudonymize_unstructured(context, input_text=original_text)
+    result_output = next(result_gen)
+    metrics_output = next(result_gen)
+
+    # Assert
+    assert (
+        result_output.value == original_text
+    ), "Text should remain unchanged when no decryption is configured"
+    assert (
+        metrics_output.value["total_depseudo_count"] == 0
+    ), "Should report zero decryptions performed"
+
+
+def test_depseudonymize_unstructured_empty_text():
+    """Edge case: Empty input text is returned unchanged with zero decryptions performed."""
+    # Arrange
+    empty_text = ""
+    config = DepseudonymizeUnstructuredConfig(
+        used_function=[
+            DepseudoTechniqueConfig(technique=DecryptConfig(type="decrypt", key_name="test_key"))
+        ]
+    )
+    context = build_op_context(op_config=config.model_dump())
+
+    # Act
+    with patch(
+        "src.field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key"
+    ) as mock_key:
+        mock_key.return_value = Fernet.generate_key()
+        result_gen = depseudonymize_unstructured(context, input_text=empty_text)
+        result_output = next(result_gen)
+        metrics_output = next(result_gen)
+
+    # Assert
+    assert result_output.value == "", "Empty text should remain empty"
+    assert (
+        metrics_output.value["total_depseudo_count"] == 0
+    ), "Should report zero decryptions for empty text"
--- a/tests/field_level_pseudo_anonymisation/test_encrypt_structured.py
+++ b/tests/field_level_pseudo_anonymisation/test_encrypt_structured.py
--- a/tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py
+++ b/tests/field_level_pseudo_anonymisation/test_encrypt_unstructured.py
@@ -0,0 +1,853 @@
+"""
+Test suite for field-level pseudonymisation operations on unstructured data.
+
+This test suite validates the pseudonymisation of unstructured text with PII detection,
+covering the following Acceptance Criteria:
+
+## Test Coverage Summary
+
+### Acceptance Criteria Coverage:
+- AC1 (Pseudonymisation and Retention Applied Correctly): 8 tests
+- AC2 (Invalid Execution Handling): 5 tests
+- AC3 (Execution Audit & Logging - Positive Scenario): 3 tests
+- AC4 (Execution Audit & Logging - Negative Scenario): 4 tests
+- Additional Coverage: 3 tests
+
+### Test Pattern:
+- Each test uses build_op_context with config_to_dagster_dict for configuration
+- Tests validate dual outputs (data, metrics)
+- Vault access is mocked for isolation
+- Tests validate Scrubadub automatic PII detection
+- Tests ensure placeholder replacement for unconfigured PII
+"""
+
+import pytest
+import re
+from dagster import build_op_context
+from unittest.mock import patch, MagicMock
+
+from template_code_location.field_level_pseudo_anonymisation.config_models.unstructured_config import (
+    AnonymisePseudonymizeUnstructuredConfig,
+    EncryptConfig,
+    RetainConfig,
+    PseudoTechniqueConfig,
+)
+from template_code_location.field_level_pseudo_anonymisation.config_models import PIIEntityEnum, LanguageEnum
+from template_code_location.field_level_pseudo_anonymisation.unstructured_ops import (
+    anonymize_pseudonymize_unstructured,
+)
+
+from .conftest import clear_vault_key
+
+
+def config_to_dagster_dict_unstructured(config):
+    """Convert unstructured config to Dagster format."""
+    config_dict = {"language": config.language.value, "used_function": []}
+
+    for func_config in config.used_function:
+        technique = func_config.technique
+        technique_type = technique.type
+        technique_dict = technique.model_dump()
+
+        if "pii" in technique_dict:
+            technique_dict["pii"] = [pii_enum.name for pii_enum in technique.pii]
+
+        technique_dict_without_type = {k: v for k, v in technique_dict.items() if k != "type"}
+
+        config_dict["used_function"].append(
+            {"technique": {technique_type: technique_dict_without_type}}
+        )
+
+    return config_dict
+
+
+def run_unstructured_op(config, text):
+    """
+    Helper to run unstructured pseudonymisation op.
+
+    Returns:
+        tuple: (result_text: str, metrics_markdown: str)
+    """
+    context = build_op_context(op_config=config_to_dagster_dict_unstructured(config))
+    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=text)
+
+    # Extract actual values from Output objects
+    return result_text.value, metrics.value
+
+
+def parse_metrics_markdown(metrics_md: str) -> dict:
+    """
+    Parse markdown metrics into structured dict for easier testing.
+
+    Args:
+        metrics_md: Markdown metrics string from op output
+
+    Returns:
+        dict with keys: total_pii_detected, pii_by_type, techniques_applied, language
+    """
+    result = {
+        "total_pii_detected": 0,
+        "pii_by_type": {},
+        "techniques_applied": {},
+        "language": "",
+    }
+
+    # Extract total PII detected
+    total_match = re.search(r"\*\*Total PII Detected\*\*:\s*(\d+)", metrics_md)
+    if total_match:
+        result["total_pii_detected"] = int(total_match.group(1))
+
+    # Extract language
+    lang_match = re.search(r"\*\*Language\*\*:\s*(\w+)", metrics_md)
+    if lang_match:
+        result["language"] = lang_match.group(1)
+
+    # Extract PII by type from table
+    pii_table_section = re.search(
+        r"### PII by Type\n\| Entity Type \| Count \|\n\|[^\n]+\n((?:\|[^\n]+\n)+)",
+        metrics_md,
+    )
+    if pii_table_section:
+        for line in pii_table_section.group(1).strip().split("\n"):
+            parts = [p.strip() for p in line.split("|") if p.strip()]
+            if len(parts) == 2:
+                entity_type, count = parts
+                result["pii_by_type"][entity_type] = int(count)
+
+    # Extract techniques applied
+    techniques_section = re.search(r"### Techniques Applied\n((?:- \*\*[^\n]+\n)+)", metrics_md)
+    if techniques_section:
+        for line in techniques_section.group(1).strip().split("\n"):
+            tech_match = re.match(r"-\s*\*\*(.+?)\*\*:\s*(.+)", line)
+            if tech_match:
+                pii_type, technique = tech_match.groups()
+                result["techniques_applied"][pii_type] = technique
+
+    return result
+
+
+# -------------------------------- Fixtures ----------------------------------------
+
+
+@pytest.fixture
+def sample_text_en():
+    """English text with various PII types."""
+    return """
+    John Smith works at Acme Corporation. His email is john.smith@example.com
+    and his phone number is +1-555-123-4567. He lives in New York City at
+    123 Main Street, Apartment 4B. His SSN is 123-45-6789.
+    """
+
+
+@pytest.fixture
+def sample_text_multi_person():
+    """Text with multiple person names."""
+    return """
+    The meeting included Alice Johnson, Bob Williams, and Charlie Brown.
+    They discussed the project with Maria Garcia and David Wilson.
+    """
+
+
+@pytest.fixture
+def sample_text_mixed_pii():
+    """Text with multiple PII types for AC1 comprehensive testing."""
+    return """
+    Contact Information:
+    Name: Dr. Emily Watson
+    Email: emily.watson@hospital.com
+    Phone: +44-20-7946-0958
+    Website: https://patient-portal.hospital.com/records
+    """
+
+
+@pytest.fixture
+def encrypt_person_config():
+    """Configuration to encrypt PERSON entities."""
+    return AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_person_key",
+                )
+            )
+        ],
+    )
+
+
+@pytest.fixture
+def retain_person_config():
+    """Configuration to retain PERSON entities unchanged."""
+    return AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PERSON]))
+        ],
+    )
+
+
+@pytest.fixture
+def mixed_technique_config():
+    """Configuration with encryption and retention for AC1 testing."""
+    return AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
+                    key_name="test_mixed_key",
+                )
+            ),
+            PseudoTechniqueConfig(
+                technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PHONE_NUMBERS])
+            ),
+        ],
+    )
+
+
+# ================================================================================================
+# AC1: Pseudonymisation and Retention Are Applied Correctly
+# ================================================================================================
+
+
+def test_ac1_encrypt_configured_pii_types(sample_text_mixed_pii, encrypt_person_config):
+    """AC1: Test that configured PII types are encrypted correctly."""
+    clear_vault_key("test_person_key")
+
+    result_text, metrics_md = run_unstructured_op(encrypt_person_config, sample_text_mixed_pii)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify person name is encrypted (not in plaintext)
+    assert "Emily Watson" not in result_text, "Configured PERSON PII should be encrypted"
+
+    # Verify encryption token is present
+    assert "{encrypt:" in result_text, "Encrypted token should be present in result"
+
+    # Verify PII was detected and processed
+    assert metrics["total_pii_detected"] > 0, "System should detect PII entities"
+    assert "PERSON" in metrics["pii_by_type"], "PERSON type should be in detected PII"
+
+    # Verify text structure is preserved (surrounding text intact)
+    assert "Contact Information:" in result_text, "Non-PII text structure should be preserved"
+
+
+def test_ac1_retain_configured_pii_unchanged(sample_text_multi_person):
+    """AC1: Test that PII types marked for retention remain unchanged."""
+    retain_config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PERSON]))
+        ],
+    )
+
+    result_text, metrics_md = run_unstructured_op(retain_config, sample_text_multi_person)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify retained PII types remain in plaintext
+    assert "Alice Johnson" in result_text, "Retained PERSON PII should remain unchanged"
+    assert "Bob Williams" in result_text, "Retained PERSON PII should remain unchanged"
+
+    # Verify technique applied is 'retain'
+    assert (
+        "retain" in metrics["techniques_applied"].get("PERSON", "").lower()
+    ), "Retain technique should be recorded for PERSON type"
+
+
+def test_ac1_unconfigured_pii_replaced_with_placeholders(sample_text_mixed_pii):
+    """AC1: Test that unconfigured PII types are replaced with placeholders."""
+    encrypt_person_only = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_person_only_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_person_only_key")
+
+    result_text, metrics_md = run_unstructured_op(encrypt_person_only, sample_text_mixed_pii)
+
+    # Verify person is encrypted (configured)
+    assert "Emily Watson" not in result_text, "Configured PERSON should be encrypted"
+
+    # Verify unconfigured PII types have placeholders
+    assert (
+        "{{" in result_text and "}}" in result_text
+    ), "Unconfigured PII should be replaced with placeholders"
+
+    # Verify original unconfigured PII values are not in result
+    assert (
+        "emily.watson@hospital.com" not in result_text
+    ), "Unconfigured EMAIL should be replaced with placeholder"
+
+    # Verify placeholder format
+    assert (
+        "{{EMAIL}}" in result_text or "{{URL}}" in result_text
+    ), "Placeholders should indicate entity type"
+
+
+def test_ac1_mixed_techniques_applied_correctly(sample_text_mixed_pii, mixed_technique_config):
+    """AC1: Test that multiple techniques (encrypt, retain) are applied correctly."""
+    clear_vault_key("test_mixed_key")
+
+    result_text, metrics_md = run_unstructured_op(mixed_technique_config, sample_text_mixed_pii)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify encrypted PII types (PERSON, EMAIL)
+    assert "Emily Watson" not in result_text, "Configured PERSON should be encrypted"
+    assert "emily.watson@hospital.com" not in result_text, "Configured EMAIL should be encrypted"
+
+    # Verify retained PII type (PHONE_NUMBERS)
+    assert "+44-20-7946-0958" in result_text, "Configured PHONE_NUMBERS should be retained"
+
+    # Verify metrics reflect different techniques
+    assert (
+        "encrypt" in metrics["techniques_applied"].get("PERSON", "").lower()
+    ), "Encrypt technique should be applied to PERSON"
+    assert (
+        "encrypt" in metrics["techniques_applied"].get("EMAIL", "").lower()
+    ), "Encrypt technique should be applied to EMAIL"
+    assert (
+        "retain" in metrics["techniques_applied"].get("PHONE_NUMBERS", "").lower()
+    ), "Retain technique should be applied to PHONE_NUMBERS"
+
+
+def test_ac1_multiple_instances_same_pii_type(sample_text_multi_person, encrypt_person_config):
+    """AC1: Test that all instances of a configured PII type are processed."""
+    clear_vault_key("test_person_key")
+
+    result_text, metrics_md = run_unstructured_op(encrypt_person_config, sample_text_multi_person)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify all person names are encrypted
+    person_names = [
+        "Alice Johnson",
+        "Bob Williams",
+        "Charlie Brown",
+        "Maria Garcia",
+        "David Wilson",
+    ]
+    for name in person_names:
+        assert name not in result_text, f"All PERSON instances should be encrypted: {name}"
+
+    # Verify metrics count multiple instances
+    assert metrics["pii_by_type"].get("PERSON", 0) >= len(
+        person_names
+    ), f"Should detect at least {len(person_names)} PERSON entities"
+
+
+def test_ac1_empty_text_returns_empty(encrypt_person_config):
+    """AC1: Test that empty or null text input raises a ValueError."""
+    clear_vault_key("test_person_key")
+
+    with pytest.raises(ValueError) as exc_info:
+        run_unstructured_op(encrypt_person_config, "")
+
+    assert "empty" in str(exc_info.value).lower(), "Error should indicate empty input"
+
+
+def test_ac1_text_without_pii_remains_unchanged():
+    """AC1: Test that text without any PII remains unchanged after processing."""
+    no_pii_text = """
+    The weather today is sunny with a high of 25 degrees Celsius.
+    The conference starts at 9:00 AM in Room 301.
+    """
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_no_pii_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_no_pii_key")
+
+    result_text, metrics_md = run_unstructured_op(config, no_pii_text)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    assert result_text.strip() == no_pii_text.strip(), "Text without PII should remain unchanged"
+    assert metrics["total_pii_detected"] == 0, "No PII should be detected"
+
+
+def test_ac1_placeholder_format_indicates_entity_type(sample_text_mixed_pii):
+    """AC1: Test that placeholders for unconfigured PII indicate the entity type."""
+    encrypt_person_only = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_placeholder_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_placeholder_key")
+
+    result_text, metrics_md = run_unstructured_op(encrypt_person_only, sample_text_mixed_pii)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify placeholder format (scrubadub uses {{TYPE}} format)
+    placeholder_pattern = r"\{\{[A-Z_]+\}\}"
+    placeholders = re.findall(placeholder_pattern, result_text)
+
+    assert (
+        len(placeholders) > 0
+    ), "Result should contain entity-type placeholders for unconfigured PII"
+
+    # Verify metrics track which PII types were detected
+    assert len(metrics["pii_by_type"]) > 0, "Metrics should list detected PII types"
+
+
+# ================================================================================================
+# AC2: Invalid Execution Handling
+# ================================================================================================
+
+
+def test_ac2_graceful_abort_on_scrubadub_failure():
+    """AC2: Test graceful abort when the PII detection engine (Scrubadub) fails."""
+    text = "Test user John Smith with email john@example.com"
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_abort_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_abort_key")
+
+    # Mock Scrubadub to fail at the right import path
+    with patch(
+        "field_level_pseudo_anonymisation.unstructured_ops.scrubadub.Scrubber"
+    ) as mock_scrubber_class:
+        mock_scrubber = MagicMock()
+        mock_scrubber.iter_filth.side_effect = RuntimeError("Scrubadub internal error")
+        mock_scrubber_class.return_value = mock_scrubber
+
+        with pytest.raises(RuntimeError) as exc_info:
+            run_unstructured_op(config, text)
+
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "pii" in error_msg
+            or "detection" in error_msg
+            or "scrubadub" in error_msg
+            or "failed" in error_msg
+        ), "Error message should indicate PII detection failure"
+
+
+def test_ac2_graceful_abort_on_encryption_failure(sample_text_en):
+    """AC2: Test graceful abort when an encryption technique fails during execution."""
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_encrypt_fail_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_encrypt_fail_key")
+
+    # Mock encrypt function at correct path - it's imported from techniques module
+    encrypt_path = (
+        "field_level_pseudo_anonymisation"
+        ".techniques.anonymisation_pseudonymisation_techniques.encrypt"
+    )
+    with patch(encrypt_path) as mock_encrypt:
+        mock_encrypt.side_effect = Exception("Encryption algorithm failure")
+
+        with pytest.raises(RuntimeError) as exc_info:
+            run_unstructured_op(config, sample_text_en)
+
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "encrypt" in error_msg or "failed" in error_msg or "technique" in error_msg
+        ), "Error message should indicate encryption failure"
+
+
+def test_ac2_null_text_input_raises_error(encrypt_person_config):
+    """AC2: Test that a null (None) text input is rejected with an error."""
+    clear_vault_key("test_person_key")
+
+    # Dagster will raise DagsterTypeCheckDidNotPass before op executes
+    from dagster import DagsterTypeCheckDidNotPass
+
+    with pytest.raises((ValueError, DagsterTypeCheckDidNotPass, TypeError)):
+        run_unstructured_op(encrypt_person_config, None)
+
+
+def test_ac2_invalid_language_configuration():
+    """AC2: Test that an unsupported language in the config raises a validation error."""
+    # This should fail at config creation due to Pydantic validation
+    with pytest.raises((ValueError, TypeError)):
+        AnonymisePseudonymizeUnstructuredConfig(
+            language="invalid_lang",  # Should fail Pydantic validation
+            used_function=[
+                PseudoTechniqueConfig(
+                    technique=EncryptConfig(
+                        type="encrypt", pii=[PIIEntityEnum.PERSON], key_name="test_key"
+                    )
+                )
+            ],
+        )
+
+
+def test_ac2_very_large_text_processing():
+    """AC2: Test that very large text inputs are processed successfully without memory errors."""
+    # Create large text with repeated PII patterns
+    large_text = (
+        """
+    John Smith works at company. Email: john.smith@example.com.
+    """
+        * 1000
+    )  # ~60KB of text with repeated PII
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
+                    key_name="test_large_text_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_large_text_key")
+
+    result_text, metrics_md = run_unstructured_op(config, large_text)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify processing completed
+    assert result_text is not None, "Large text should be processed successfully"
+    assert len(result_text) > 0, "Result should not be empty"
+    assert metrics["total_pii_detected"] > 0, "PII should be detected in large text"
+
+
+# ================================================================================================
+# AC3: Execution Audit & Logging - Positive Scenario
+# ================================================================================================
+
+
+def test_ac3_successful_execution_logs_timestamp_and_run_id(sample_text_en, encrypt_person_config):
+    """AC3: Test that successful execution context contains a run ID for logging."""
+    clear_vault_key("test_person_key")
+
+    op_config_dict = config_to_dagster_dict_unstructured(encrypt_person_config)
+    context = build_op_context(op_config=op_config_dict)
+
+    # Capture run context
+    run_id = context.run_id
+
+    # Execute operation
+    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_en)
+
+    # Verify run identifier is available for logging
+    assert run_id is not None, "Run ID must be available for audit logging"
+
+    # Verify outputs are returned (for Dagster to log)
+    assert result_text is not None, "Result text should be available for logging"
+    assert metrics is not None, "Metrics should be available for logging"
+
+
+def test_ac3_successful_execution_logs_configuration_parameters(
+    sample_text_en, mixed_technique_config
+):
+    """AC3: Test that the used configuration is accessible for logging on success."""
+    clear_vault_key("test_mixed_key")
+
+    op_config_dict = config_to_dagster_dict_unstructured(mixed_technique_config)
+    context = build_op_context(op_config=op_config_dict)
+
+    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_en)
+
+    # Verify configuration is captured and accessible
+    assert "used_function" in op_config_dict, "Configuration must be accessible for logging"
+    assert len(op_config_dict["used_function"]) == 2, "Multiple techniques should be captured"
+
+    # Verify techniques are logged
+    techniques = [func["technique"] for func in op_config_dict["used_function"]]
+    assert any(
+        "encrypt" in str(tech) for tech in techniques
+    ), "Encrypt technique should be in configuration"
+    assert any(
+        "retain" in str(tech) for tech in techniques
+    ), "Retain technique should be in configuration"
+
+    # Verify metrics contain technique information (in markdown string)
+    metrics_str = metrics.value
+    assert (
+        "Techniques Applied" in metrics_str
+    ), "Applied techniques should be in metrics for logging"
+
+
+def test_ac3_successful_execution_logs_no_raw_pii(sample_text_mixed_pii, encrypt_person_config):
+    """AC3: Test that logs and metrics from a successful run do not contain raw PII."""
+    clear_vault_key("test_person_key")
+
+    op_config_dict = config_to_dagster_dict_unstructured(encrypt_person_config)
+    context = build_op_context(op_config=op_config_dict)
+
+    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_mixed_pii)
+
+    # Verify raw PII values are not in metrics
+    metrics_str = metrics.value
+
+    sensitive_values = ["Emily Watson", "emily.watson@hospital.com", "+44-20-7946-0958"]
+
+    for pii_value in sensitive_values:
+        assert (
+            pii_value not in metrics_str
+        ), f"Raw PII value should not appear in metrics: {pii_value}"
+
+    # Verify configuration logs do not contain raw PII
+    config_str = str(op_config_dict)
+    for pii_value in sensitive_values:
+        assert (
+            pii_value not in config_str
+        ), f"Raw PII value should not appear in configuration logs: {pii_value}"
+
+
+# ================================================================================================
+# AC4: Execution Audit & Logging - Negative Scenario
+# ================================================================================================
+
+
+def test_ac4_failed_execution_logs_error_details():
+    """AC4: Negative execution should surface clear error details (encryption key failure)."""
+    text = "Test user John Smith"
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_fail_log_key",
+                )
+            )
+        ],
+    )
+    clear_vault_key("test_fail_log_key")
+    ctx = build_op_context(op_config=config_to_dagster_dict_unstructured(config))
+
+    # Patch the key retrieval used inside unstructured_ops to force failure
+    with patch(
+        "field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key",
+        side_effect=RuntimeError("Encryption key retrieval failed"),
+    ):
+        with pytest.raises(RuntimeError) as exc_info:
+            # Consume the generator to trigger execution and raise the exception
+            list(anonymize_pseudonymize_unstructured(ctx, text=text))
+
+        msg = str(exc_info.value).lower()
+        assert "key" in msg and "failed" in msg, "Error message should mention key failure"
+
+
+def test_ac4_failed_execution_logs_configuration_used():
+    """AC4: Test that the attempted configuration is available for logging on failure."""
+    text = "Test data with person John Doe"
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_config_fail_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_config_fail_key")
+
+    op_config_dict = config_to_dagster_dict_unstructured(config)
+    context = build_op_context(op_config=op_config_dict)
+
+    # Mock _initialize_scrubber to fail
+    with patch(
+        "field_level_pseudo_anonymisation.unstructured_ops._initialize_scrubber"
+    ) as mock_init_scrubber:
+        mock_init_scrubber.side_effect = Exception("Scrubber module not available")
+
+        with pytest.raises((RuntimeError, Exception)) as exc_info:
+            list(anonymize_pseudonymize_unstructured(context, text=text))
+
+        # Verify configuration is still accessible despite failure
+        assert op_config_dict is not None, "Configuration must be accessible for failure audit"
+        assert (
+            "used_function" in op_config_dict
+        ), "Technique configuration should be available for diagnosis"
+
+        # Verify error was raised with proper message
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "pii" in error_msg
+            or "detection" in error_msg
+            or "failed" in error_msg
+            or "scrubber" in error_msg
+            or "module" in error_msg
+        ), "Error should indicate detection/processing failed"
+
+
+def test_ac4_failed_execution_logs_failure_reason():
+    """AC4: Test that the reason for a failure is clearly indicated in the error message."""
+    text = "User: Alice Smith, Email: alice@example.com"
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.en,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
+                    key_name="test_failure_reason_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_failure_reason_key")
+
+    # Mock key retrieval function to fail
+    with patch(
+        "field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key"
+    ) as mock_get_key:
+        mock_get_key.side_effect = RuntimeError("Vault connection timeout")
+
+        with pytest.raises(RuntimeError) as exc_info:
+            run_unstructured_op(config, text)
+
+        # Verify failure reason is in error message
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "encrypt" in error_msg
+            or "key" in error_msg
+            or "timeout" in error_msg
+            or "failed" in error_msg
+        ), "Error should indicate key retrieval/encryption failure"
+
+
+# ================================================================================================
+# Additional Tests - Edge Cases and Integration
+# ================================================================================================
+
+
+def test_multi_language_support_italian():
+    """Additional test: Verify that Italian text is processed correctly."""
+    italian_text = """
+    Il dottor Marco Rossi lavora presso l'ospedale.
+    Email: marco.rossi@ospedale.it
+    Telefono: +39-06-12345678
+    """
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.it,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON],
+                    key_name="test_italian_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_italian_key")
+
+    result_text, metrics_md = run_unstructured_op(config, italian_text)
+    metrics = parse_metrics_markdown(metrics_md)
+
+    # Verify processing occurred
+    assert result_text != italian_text, "Italian text should be processed"
+    assert metrics["total_pii_detected"] > 0, "PII should be detected in Italian text"
+
+
+def test_special_characters_in_text():
+    """Additional test: Verify handling of text with special Unicode characters."""
+    special_text = """
+    User: João da Silva 🇧🇷
+    Email: joão@empresa.com.br
+    Message: "Hello, World!" — Testing special chars: €, £, ¥, ©, ®
+    """
+
+    config = AnonymisePseudonymizeUnstructuredConfig(
+        language=LanguageEnum.pt,
+        used_function=[
+            PseudoTechniqueConfig(
+                technique=EncryptConfig(
+                    type="encrypt",
+                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
+                    key_name="test_special_chars_key",
+                )
+            )
+        ],
+    )
+
+    clear_vault_key("test_special_chars_key")
+
+    result_text, metrics_md = run_unstructured_op(config, special_text)
+
+    # Verify processing completed without encoding errors
+    assert result_text is not None, "Special characters should not cause processing failure"
+    assert len(result_text) > 0, "Result should not be empty"
+
+
+def test_deterministic_encryption_within_session(sample_text_en, encrypt_person_config):
+    """Additional test: Verify encryption format consistency across runs."""
+    clear_vault_key("test_person_key")
+
+    result1, metrics_md1 = run_unstructured_op(encrypt_person_config, sample_text_en)
+    result2, metrics_md2 = run_unstructured_op(encrypt_person_config, sample_text_en)
+
+    # Both should have encryption tokens
+    assert "{encrypt:" in result1, "First run should produce encrypted tokens"
+    assert "{encrypt:" in result2, "Second run should produce encrypted tokens"
+
+    # Verify consistent PII detection
+    metrics1 = parse_metrics_markdown(metrics_md1)
+    metrics2 = parse_metrics_markdown(metrics_md2)
+
+    assert (
+        metrics1["total_pii_detected"] == metrics2["total_pii_detected"]
+    ), "PII detection should be consistent across runs"
+
+    # Verify token format is consistent (Fernet base64 pattern)
+    token_pattern = r"\{encrypt:gAAAAAB[A-Za-z0-9+/=_-]+\}"
+    tokens1 = re.findall(token_pattern, result1)
+    tokens2 = re.findall(token_pattern, result2)
+
+    assert len(tokens1) == len(tokens2), "Same number of encryption tokens should be generated"
--- a/tests/field_level_pseudo_anonymisation/test_jobs.py
+++ b/tests/field_level_pseudo_anonymisation/test_jobs.py
@@ -0,0 +1,58 @@
+from template_code_location.field_level_pseudo_anonymisation.jobs import (
+    anonymize_pseudonymize_structured_job,
+    anonymize_pseudonymize_structured_job_s3,
+    depseudonymize_structured_job,
+    depseudonymize_structured_job_s3,
+    anonymize_pseudonymize_unstructured_job_s3,
+    anonymize_pseudonymize_unstructured_job,
+    depseudonymize_unstructured_job_s3,
+    depseudonymize_unstructured_job
+)
+
+
+def test_anonymize_pseudonymize_structured_job_is_callable():
+    """Test anonymize_pseudonymize_structured_job is a valid Dagster job"""
+    assert callable(anonymize_pseudonymize_structured_job)
+    assert hasattr(anonymize_pseudonymize_structured_job, 'execute_in_process')
+
+
+def test_anonymize_pseudonymize_structured_job_s3_is_callable():
+    """Test anonymize_pseudonymize_structured_job_s3 is a valid Dagster job"""
+    assert callable(anonymize_pseudonymize_structured_job_s3)
+    assert hasattr(anonymize_pseudonymize_structured_job_s3, 'execute_in_process')
+
+
+def test_depseudonymize_structured_job_is_callable():
+    """Test depseudonymize_structured_job is a valid Dagster job"""
+    assert callable(depseudonymize_structured_job)
+    assert hasattr(depseudonymize_structured_job, 'execute_in_process')
+
+
+def test_depseudonymize_structured_job_s3_is_callable():
+    """Test depseudonymize_structured_job_s3 is a valid Dagster job"""
+    assert callable(depseudonymize_structured_job_s3)
+    assert hasattr(depseudonymize_structured_job_s3, 'execute_in_process')
+
+
+def test_anonymize_pseudonymize_unstructured_job_is_callable():
+    """Test anonymize_pseudonymize_unstructured_job is a valid Dagster job"""
+    assert callable(anonymize_pseudonymize_unstructured_job)
+    assert hasattr(anonymize_pseudonymize_unstructured_job, 'execute_in_process')
+
+
+def test_anonymize_pseudonymize_unstructured_job_s3_is_callable():
+    """Test anonymize_pseudonymize_unstructured_job_s3 is a valid Dagster job"""
+    assert callable(anonymize_pseudonymize_unstructured_job_s3)
+    assert hasattr(anonymize_pseudonymize_unstructured_job_s3, 'execute_in_process')
+
+
+def test_depseudonymize_unstructured_job_is_callable():
+    """Test depseudonymize_unstructured_job is a valid Dagster job"""
+    assert callable(depseudonymize_unstructured_job)
+    assert hasattr(depseudonymize_unstructured_job, 'execute_in_process')
+
+
+def test_depseudonymize_unstructured_job_s3_is_callable():
+    """Test depseudonymize_unstructured_job_s3 is a valid Dagster job"""
+    assert callable(depseudonymize_unstructured_job_s3)
+    assert hasattr(depseudonymize_unstructured_job_s3, 'execute_in_process')