"""
Test suite for field-level pseudonymisation operations on unstructured data.

This test suite validates the pseudonymisation of unstructured text with PII detection,
covering the following Acceptance Criteria:

## Test Coverage Summary

### Acceptance Criteria Coverage:
- AC1 (Pseudonymisation and Retention Applied Correctly): 8 tests
- AC2 (Invalid Execution Handling): 5 tests
- AC3 (Execution Audit & Logging - Positive Scenario): 3 tests
- AC4 (Execution Audit & Logging - Negative Scenario): 4 tests
- Additional Coverage: 3 tests

### Test Pattern:
- Each test uses build_op_context with config_to_dagster_dict for configuration
- Tests validate dual outputs (data, metrics)
- Vault access is mocked for isolation
- Tests validate Scrubadub automatic PII detection
- Tests ensure placeholder replacement for unconfigured PII
"""

import pytest
import re
from dagster import build_op_context
from unittest.mock import patch, MagicMock

from template_code_location.field_level_pseudo_anonymisation.config_models.unstructured_config import (
    AnonymisePseudonymizeUnstructuredConfig,
    EncryptConfig,
    RetainConfig,
    PseudoTechniqueConfig,
)
from template_code_location.field_level_pseudo_anonymisation.config_models import PIIEntityEnum, LanguageEnum
from template_code_location.field_level_pseudo_anonymisation.unstructured_ops import (
    anonymize_pseudonymize_unstructured,
)

from .conftest import clear_vault_key


def config_to_dagster_dict_unstructured(config):
    """Convert unstructured config to Dagster format."""
    config_dict = {"language": config.language.value, "used_function": []}

    for func_config in config.used_function:
        technique = func_config.technique
        technique_type = technique.type
        technique_dict = technique.model_dump()

        if "pii" in technique_dict:
            technique_dict["pii"] = [pii_enum.name for pii_enum in technique.pii]

        technique_dict_without_type = {k: v for k, v in technique_dict.items() if k != "type"}

        config_dict["used_function"].append(
            {"technique": {technique_type: technique_dict_without_type}}
        )

    return config_dict


def run_unstructured_op(config, text):
    """
    Helper to run unstructured pseudonymisation op.

    Returns:
        tuple: (result_text: str, metrics_markdown: str)
    """
    context = build_op_context(op_config=config_to_dagster_dict_unstructured(config))
    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=text)

    # Extract actual values from Output objects
    return result_text.value, metrics.value


def parse_metrics_markdown(metrics_md: str) -> dict:
    """
    Parse markdown metrics into structured dict for easier testing.

    Args:
        metrics_md: Markdown metrics string from op output

    Returns:
        dict with keys: total_pii_detected, pii_by_type, techniques_applied, language
    """
    result = {
        "total_pii_detected": 0,
        "pii_by_type": {},
        "techniques_applied": {},
        "language": "",
    }

    # Extract total PII detected
    total_match = re.search(r"\*\*Total PII Detected\*\*:\s*(\d+)", metrics_md)
    if total_match:
        result["total_pii_detected"] = int(total_match.group(1))

    # Extract language
    lang_match = re.search(r"\*\*Language\*\*:\s*(\w+)", metrics_md)
    if lang_match:
        result["language"] = lang_match.group(1)

    # Extract PII by type from table
    pii_table_section = re.search(
        r"### PII by Type\n\| Entity Type \| Count \|\n\|[^\n]+\n((?:\|[^\n]+\n)+)",
        metrics_md,
    )
    if pii_table_section:
        for line in pii_table_section.group(1).strip().split("\n"):
            parts = [p.strip() for p in line.split("|") if p.strip()]
            if len(parts) == 2:
                entity_type, count = parts
                result["pii_by_type"][entity_type] = int(count)

    # Extract techniques applied
    techniques_section = re.search(r"### Techniques Applied\n((?:- \*\*[^\n]+\n)+)", metrics_md)
    if techniques_section:
        for line in techniques_section.group(1).strip().split("\n"):
            tech_match = re.match(r"-\s*\*\*(.+?)\*\*:\s*(.+)", line)
            if tech_match:
                pii_type, technique = tech_match.groups()
                result["techniques_applied"][pii_type] = technique

    return result


# -------------------------------- Fixtures ----------------------------------------


@pytest.fixture
def sample_text_en():
    """English text with various PII types."""
    return """
    John Smith works at Acme Corporation. His email is john.smith@example.com
    and his phone number is +1-555-123-4567. He lives in New York City at
    123 Main Street, Apartment 4B. His SSN is 123-45-6789.
    """


@pytest.fixture
def sample_text_multi_person():
    """Text with multiple person names."""
    return """
    The meeting included Alice Johnson, Bob Williams, and Charlie Brown.
    They discussed the project with Maria Garcia and David Wilson.
    """


@pytest.fixture
def sample_text_mixed_pii():
    """Text with multiple PII types for AC1 comprehensive testing."""
    return """
    Contact Information:
    Name: Dr. Emily Watson
    Email: emily.watson@hospital.com
    Phone: +44-20-7946-0958
    Website: https://patient-portal.hospital.com/records
    """


@pytest.fixture
def encrypt_person_config():
    """Configuration to encrypt PERSON entities."""
    return AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_person_key",
                )
            )
        ],
    )


@pytest.fixture
def retain_person_config():
    """Configuration to retain PERSON entities unchanged."""
    return AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PERSON]))
        ],
    )


@pytest.fixture
def mixed_technique_config():
    """Configuration with encryption and retention for AC1 testing."""
    return AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
                    key_name="test_mixed_key",
                )
            ),
            PseudoTechniqueConfig(
                technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PHONE_NUMBERS])
            ),
        ],
    )


# ================================================================================================
# AC1: Pseudonymisation and Retention Are Applied Correctly
# ================================================================================================


def test_ac1_encrypt_configured_pii_types(sample_text_mixed_pii, encrypt_person_config):
    """AC1: Test that configured PII types are encrypted correctly."""
    clear_vault_key("test_person_key")

    result_text, metrics_md = run_unstructured_op(encrypt_person_config, sample_text_mixed_pii)
    metrics = parse_metrics_markdown(metrics_md)

    # Verify person name is encrypted (not in plaintext)
    assert "Emily Watson" not in result_text, "Configured PERSON PII should be encrypted"

    # Verify encryption token is present
    assert "{encrypt:" in result_text, "Encrypted token should be present in result"

    # Verify PII was detected and processed
    assert metrics["total_pii_detected"] > 0, "System should detect PII entities"
    assert "PERSON" in metrics["pii_by_type"], "PERSON type should be in detected PII"

    # Verify text structure is preserved (surrounding text intact)
    assert "Contact Information:" in result_text, "Non-PII text structure should be preserved"


def test_ac1_retain_configured_pii_unchanged(sample_text_multi_person):
    """AC1: Test that PII types marked for retention remain unchanged."""
    retain_config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(technique=RetainConfig(type="retain", pii=[PIIEntityEnum.PERSON]))
        ],
    )

    result_text, metrics_md = run_unstructured_op(retain_config, sample_text_multi_person)
    metrics = parse_metrics_markdown(metrics_md)

    # Verify retained PII types remain in plaintext
    assert "Alice Johnson" in result_text, "Retained PERSON PII should remain unchanged"
    assert "Bob Williams" in result_text, "Retained PERSON PII should remain unchanged"

    # Verify technique applied is 'retain'
    assert (
        "retain" in metrics["techniques_applied"].get("PERSON", "").lower()
    ), "Retain technique should be recorded for PERSON type"


def test_ac1_unconfigured_pii_replaced_with_placeholders(sample_text_mixed_pii):
    """AC1: Test that unconfigured PII types are replaced with placeholders."""
    encrypt_person_only = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_person_only_key",
                )
            )
        ],
    )

    clear_vault_key("test_person_only_key")

    result_text, metrics_md = run_unstructured_op(encrypt_person_only, sample_text_mixed_pii)

    # Verify person is encrypted (configured)
    assert "Emily Watson" not in result_text, "Configured PERSON should be encrypted"

    # Verify unconfigured PII types have placeholders
    assert (
        "{{" in result_text and "}}" in result_text
    ), "Unconfigured PII should be replaced with placeholders"

    # Verify original unconfigured PII values are not in result
    assert (
        "emily.watson@hospital.com" not in result_text
    ), "Unconfigured EMAIL should be replaced with placeholder"

    # Verify placeholder format
    assert (
        "{{EMAIL}}" in result_text or "{{URL}}" in result_text
    ), "Placeholders should indicate entity type"


def test_ac1_mixed_techniques_applied_correctly(sample_text_mixed_pii, mixed_technique_config):
    """AC1: Test that multiple techniques (encrypt, retain) are applied correctly."""
    clear_vault_key("test_mixed_key")

    result_text, metrics_md = run_unstructured_op(mixed_technique_config, sample_text_mixed_pii)
    metrics = parse_metrics_markdown(metrics_md)

    # Verify encrypted PII types (PERSON, EMAIL)
    assert "Emily Watson" not in result_text, "Configured PERSON should be encrypted"
    assert "emily.watson@hospital.com" not in result_text, "Configured EMAIL should be encrypted"

    # Verify retained PII type (PHONE_NUMBERS)
    assert "+44-20-7946-0958" in result_text, "Configured PHONE_NUMBERS should be retained"

    # Verify metrics reflect different techniques
    assert (
        "encrypt" in metrics["techniques_applied"].get("PERSON", "").lower()
    ), "Encrypt technique should be applied to PERSON"
    assert (
        "encrypt" in metrics["techniques_applied"].get("EMAIL", "").lower()
    ), "Encrypt technique should be applied to EMAIL"
    assert (
        "retain" in metrics["techniques_applied"].get("PHONE_NUMBERS", "").lower()
    ), "Retain technique should be applied to PHONE_NUMBERS"


def test_ac1_multiple_instances_same_pii_type(sample_text_multi_person, encrypt_person_config):
    """AC1: Test that all instances of a configured PII type are processed."""
    clear_vault_key("test_person_key")

    result_text, metrics_md = run_unstructured_op(encrypt_person_config, sample_text_multi_person)
    metrics = parse_metrics_markdown(metrics_md)

    # Verify all person names are encrypted
    person_names = [
        "Alice Johnson",
        "Bob Williams",
        "Charlie Brown",
        "Maria Garcia",
        "David Wilson",
    ]
    for name in person_names:
        assert name not in result_text, f"All PERSON instances should be encrypted: {name}"

    # Verify metrics count multiple instances
    assert metrics["pii_by_type"].get("PERSON", 0) >= len(
        person_names
    ), f"Should detect at least {len(person_names)} PERSON entities"


def test_ac1_empty_text_returns_empty(encrypt_person_config):
    """AC1: Test that empty or null text input raises a ValueError."""
    clear_vault_key("test_person_key")

    with pytest.raises(ValueError) as exc_info:
        run_unstructured_op(encrypt_person_config, "")

    assert "empty" in str(exc_info.value).lower(), "Error should indicate empty input"


def test_ac1_text_without_pii_remains_unchanged():
    """AC1: Test that text without any PII remains unchanged after processing."""
    no_pii_text = """
    The weather today is sunny with a high of 25 degrees Celsius.
    The conference starts at 9:00 AM in Room 301.
    """

    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_no_pii_key",
                )
            )
        ],
    )

    clear_vault_key("test_no_pii_key")

    result_text, metrics_md = run_unstructured_op(config, no_pii_text)
    metrics = parse_metrics_markdown(metrics_md)

    assert result_text.strip() == no_pii_text.strip(), "Text without PII should remain unchanged"
    assert metrics["total_pii_detected"] == 0, "No PII should be detected"


def test_ac1_placeholder_format_indicates_entity_type(sample_text_mixed_pii):
    """AC1: Test that placeholders for unconfigured PII indicate the entity type."""
    encrypt_person_only = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_placeholder_key",
                )
            )
        ],
    )

    clear_vault_key("test_placeholder_key")

    result_text, metrics_md = run_unstructured_op(encrypt_person_only, sample_text_mixed_pii)
    metrics = parse_metrics_markdown(metrics_md)

    # Verify placeholder format (scrubadub uses {{TYPE}} format)
    placeholder_pattern = r"\{\{[A-Z_]+\}\}"
    placeholders = re.findall(placeholder_pattern, result_text)

    assert (
        len(placeholders) > 0
    ), "Result should contain entity-type placeholders for unconfigured PII"

    # Verify metrics track which PII types were detected
    assert len(metrics["pii_by_type"]) > 0, "Metrics should list detected PII types"


# ================================================================================================
# AC2: Invalid Execution Handling
# ================================================================================================


def test_ac2_graceful_abort_on_scrubadub_failure():
    """AC2: Test graceful abort when the PII detection engine (Scrubadub) fails."""
    text = "Test user John Smith with email john@example.com"

    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_abort_key",
                )
            )
        ],
    )

    clear_vault_key("test_abort_key")

    # Mock Scrubadub to fail at the right import path
    with patch(
        "field_level_pseudo_anonymisation.unstructured_ops.scrubadub.Scrubber"
    ) as mock_scrubber_class:
        mock_scrubber = MagicMock()
        mock_scrubber.iter_filth.side_effect = RuntimeError("Scrubadub internal error")
        mock_scrubber_class.return_value = mock_scrubber

        with pytest.raises(RuntimeError) as exc_info:
            run_unstructured_op(config, text)

        error_msg = str(exc_info.value).lower()
        assert (
            "pii" in error_msg
            or "detection" in error_msg
            or "scrubadub" in error_msg
            or "failed" in error_msg
        ), "Error message should indicate PII detection failure"


def test_ac2_graceful_abort_on_encryption_failure(sample_text_en):
    """AC2: Test graceful abort when an encryption technique fails during execution."""
    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_encrypt_fail_key",
                )
            )
        ],
    )

    clear_vault_key("test_encrypt_fail_key")

    # Mock encrypt function at correct path - it's imported from techniques module
    encrypt_path = (
        "field_level_pseudo_anonymisation"
        ".techniques.anonymisation_pseudonymisation_techniques.encrypt"
    )
    with patch(encrypt_path) as mock_encrypt:
        mock_encrypt.side_effect = Exception("Encryption algorithm failure")

        with pytest.raises(RuntimeError) as exc_info:
            run_unstructured_op(config, sample_text_en)

        error_msg = str(exc_info.value).lower()
        assert (
            "encrypt" in error_msg or "failed" in error_msg or "technique" in error_msg
        ), "Error message should indicate encryption failure"


def test_ac2_null_text_input_raises_error(encrypt_person_config):
    """AC2: Test that a null (None) text input is rejected with an error."""
    clear_vault_key("test_person_key")

    # Dagster will raise DagsterTypeCheckDidNotPass before op executes
    from dagster import DagsterTypeCheckDidNotPass

    with pytest.raises((ValueError, DagsterTypeCheckDidNotPass, TypeError)):
        run_unstructured_op(encrypt_person_config, None)


def test_ac2_invalid_language_configuration():
    """AC2: Test that an unsupported language in the config raises a validation error."""
    # This should fail at config creation due to Pydantic validation
    with pytest.raises((ValueError, TypeError)):
        AnonymisePseudonymizeUnstructuredConfig(
            language="invalid_lang",  # Should fail Pydantic validation
            used_function=[
                PseudoTechniqueConfig(
                    technique=EncryptConfig(
                        type="encrypt", pii=[PIIEntityEnum.PERSON], key_name="test_key"
                    )
                )
            ],
        )


def test_ac2_very_large_text_processing():
    """AC2: Test that very large text inputs are processed successfully without memory errors."""
    # Create large text with repeated PII patterns
    large_text = (
        """
    John Smith works at company. Email: john.smith@example.com.
    """
        * 1000
    )  # ~60KB of text with repeated PII

    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
                    key_name="test_large_text_key",
                )
            )
        ],
    )

    clear_vault_key("test_large_text_key")

    result_text, metrics_md = run_unstructured_op(config, large_text)
    metrics = parse_metrics_markdown(metrics_md)

    # Verify processing completed
    assert result_text is not None, "Large text should be processed successfully"
    assert len(result_text) > 0, "Result should not be empty"
    assert metrics["total_pii_detected"] > 0, "PII should be detected in large text"


# ================================================================================================
# AC3: Execution Audit & Logging - Positive Scenario
# ================================================================================================


def test_ac3_successful_execution_logs_timestamp_and_run_id(sample_text_en, encrypt_person_config):
    """AC3: Test that successful execution context contains a run ID for logging."""
    clear_vault_key("test_person_key")

    op_config_dict = config_to_dagster_dict_unstructured(encrypt_person_config)
    context = build_op_context(op_config=op_config_dict)

    # Capture run context
    run_id = context.run_id

    # Execute operation
    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_en)

    # Verify run identifier is available for logging
    assert run_id is not None, "Run ID must be available for audit logging"

    # Verify outputs are returned (for Dagster to log)
    assert result_text is not None, "Result text should be available for logging"
    assert metrics is not None, "Metrics should be available for logging"


def test_ac3_successful_execution_logs_configuration_parameters(
    sample_text_en, mixed_technique_config
):
    """AC3: Test that the used configuration is accessible for logging on success."""
    clear_vault_key("test_mixed_key")

    op_config_dict = config_to_dagster_dict_unstructured(mixed_technique_config)
    context = build_op_context(op_config=op_config_dict)

    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_en)

    # Verify configuration is captured and accessible
    assert "used_function" in op_config_dict, "Configuration must be accessible for logging"
    assert len(op_config_dict["used_function"]) == 2, "Multiple techniques should be captured"

    # Verify techniques are logged
    techniques = [func["technique"] for func in op_config_dict["used_function"]]
    assert any(
        "encrypt" in str(tech) for tech in techniques
    ), "Encrypt technique should be in configuration"
    assert any(
        "retain" in str(tech) for tech in techniques
    ), "Retain technique should be in configuration"

    # Verify metrics contain technique information (in markdown string)
    metrics_str = metrics.value
    assert (
        "Techniques Applied" in metrics_str
    ), "Applied techniques should be in metrics for logging"


def test_ac3_successful_execution_logs_no_raw_pii(sample_text_mixed_pii, encrypt_person_config):
    """AC3: Test that logs and metrics from a successful run do not contain raw PII."""
    clear_vault_key("test_person_key")

    op_config_dict = config_to_dagster_dict_unstructured(encrypt_person_config)
    context = build_op_context(op_config=op_config_dict)

    result_text, metrics = anonymize_pseudonymize_unstructured(context, text=sample_text_mixed_pii)

    # Verify raw PII values are not in metrics
    metrics_str = metrics.value

    sensitive_values = ["Emily Watson", "emily.watson@hospital.com", "+44-20-7946-0958"]

    for pii_value in sensitive_values:
        assert (
            pii_value not in metrics_str
        ), f"Raw PII value should not appear in metrics: {pii_value}"

    # Verify configuration logs do not contain raw PII
    config_str = str(op_config_dict)
    for pii_value in sensitive_values:
        assert (
            pii_value not in config_str
        ), f"Raw PII value should not appear in configuration logs: {pii_value}"


# ================================================================================================
# AC4: Execution Audit & Logging - Negative Scenario
# ================================================================================================


def test_ac4_failed_execution_logs_error_details():
    """AC4: Negative execution should surface clear error details (encryption key failure)."""
    text = "Test user John Smith"
    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_fail_log_key",
                )
            )
        ],
    )
    clear_vault_key("test_fail_log_key")
    ctx = build_op_context(op_config=config_to_dagster_dict_unstructured(config))

    # Patch the key retrieval used inside unstructured_ops to force failure
    with patch(
        "field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key",
        side_effect=RuntimeError("Encryption key retrieval failed"),
    ):
        with pytest.raises(RuntimeError) as exc_info:
            # Consume the generator to trigger execution and raise the exception
            list(anonymize_pseudonymize_unstructured(ctx, text=text))

        msg = str(exc_info.value).lower()
        assert "key" in msg and "failed" in msg, "Error message should mention key failure"


def test_ac4_failed_execution_logs_configuration_used():
    """AC4: Test that the attempted configuration is available for logging on failure."""
    text = "Test data with person John Doe"

    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_config_fail_key",
                )
            )
        ],
    )

    clear_vault_key("test_config_fail_key")

    op_config_dict = config_to_dagster_dict_unstructured(config)
    context = build_op_context(op_config=op_config_dict)

    # Mock _initialize_scrubber to fail
    with patch(
        "field_level_pseudo_anonymisation.unstructured_ops._initialize_scrubber"
    ) as mock_init_scrubber:
        mock_init_scrubber.side_effect = Exception("Scrubber module not available")

        with pytest.raises((RuntimeError, Exception)) as exc_info:
            list(anonymize_pseudonymize_unstructured(context, text=text))

        # Verify configuration is still accessible despite failure
        assert op_config_dict is not None, "Configuration must be accessible for failure audit"
        assert (
            "used_function" in op_config_dict
        ), "Technique configuration should be available for diagnosis"

        # Verify error was raised with proper message
        error_msg = str(exc_info.value).lower()
        assert (
            "pii" in error_msg
            or "detection" in error_msg
            or "failed" in error_msg
            or "scrubber" in error_msg
            or "module" in error_msg
        ), "Error should indicate detection/processing failed"


def test_ac4_failed_execution_logs_failure_reason():
    """AC4: Test that the reason for a failure is clearly indicated in the error message."""
    text = "User: Alice Smith, Email: alice@example.com"

    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.en,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
                    key_name="test_failure_reason_key",
                )
            )
        ],
    )

    clear_vault_key("test_failure_reason_key")

    # Mock key retrieval function to fail
    with patch(
        "field_level_pseudo_anonymisation.unstructured_ops.create_get_encryption_key"
    ) as mock_get_key:
        mock_get_key.side_effect = RuntimeError("Vault connection timeout")

        with pytest.raises(RuntimeError) as exc_info:
            run_unstructured_op(config, text)

        # Verify failure reason is in error message
        error_msg = str(exc_info.value).lower()
        assert (
            "encrypt" in error_msg
            or "key" in error_msg
            or "timeout" in error_msg
            or "failed" in error_msg
        ), "Error should indicate key retrieval/encryption failure"


# ================================================================================================
# Additional Tests - Edge Cases and Integration
# ================================================================================================


def test_multi_language_support_italian():
    """Additional test: Verify that Italian text is processed correctly."""
    italian_text = """
    Il dottor Marco Rossi lavora presso l'ospedale.
    Email: marco.rossi@ospedale.it
    Telefono: +39-06-12345678
    """

    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.it,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON],
                    key_name="test_italian_key",
                )
            )
        ],
    )

    clear_vault_key("test_italian_key")

    result_text, metrics_md = run_unstructured_op(config, italian_text)
    metrics = parse_metrics_markdown(metrics_md)

    # Verify processing occurred
    assert result_text != italian_text, "Italian text should be processed"
    assert metrics["total_pii_detected"] > 0, "PII should be detected in Italian text"


def test_special_characters_in_text():
    """Additional test: Verify handling of text with special Unicode characters."""
    special_text = """
    User: João da Silva 🇧🇷
    Email: joão@empresa.com.br
    Message: "Hello, World!" — Testing special chars: €, £, ¥, ©, ®
    """

    config = AnonymisePseudonymizeUnstructuredConfig(
        language=LanguageEnum.pt,
        used_function=[
            PseudoTechniqueConfig(
                technique=EncryptConfig(
                    type="encrypt",
                    pii=[PIIEntityEnum.PERSON, PIIEntityEnum.EMAIL],
                    key_name="test_special_chars_key",
                )
            )
        ],
    )

    clear_vault_key("test_special_chars_key")

    result_text, metrics_md = run_unstructured_op(config, special_text)

    # Verify processing completed without encoding errors
    assert result_text is not None, "Special characters should not cause processing failure"
    assert len(result_text) > 0, "Result should not be empty"


def test_deterministic_encryption_within_session(sample_text_en, encrypt_person_config):
    """Additional test: Verify encryption format consistency across runs."""
    clear_vault_key("test_person_key")

    result1, metrics_md1 = run_unstructured_op(encrypt_person_config, sample_text_en)
    result2, metrics_md2 = run_unstructured_op(encrypt_person_config, sample_text_en)

    # Both should have encryption tokens
    assert "{encrypt:" in result1, "First run should produce encrypted tokens"
    assert "{encrypt:" in result2, "Second run should produce encrypted tokens"

    # Verify consistent PII detection
    metrics1 = parse_metrics_markdown(metrics_md1)
    metrics2 = parse_metrics_markdown(metrics_md2)

    assert (
        metrics1["total_pii_detected"] == metrics2["total_pii_detected"]
    ), "PII detection should be consistent across runs"

    # Verify token format is consistent (Fernet base64 pattern)
    token_pattern = r"\{encrypt:gAAAAAB[A-Za-z0-9+/=_-]+\}"
    tokens1 = re.findall(token_pattern, result1)
    tokens2 = re.findall(token_pattern, result2)

    assert len(tokens1) == len(tokens2), "Same number of encryption tokens should be generated"