634 lines
25 KiB
Python
634 lines
25 KiB
Python
import pytest
|
|
from pydantic import ValidationError
|
|
|
|
from template_code_location.field_level_pseudo_anonymisation.config_models.structured_config import (
|
|
AnonymisePseudonymizeStructuredConfig,
|
|
DepseudonymizeStructuredConfig,
|
|
PseudoTechniqueConfig,
|
|
DepseudoTechniqueConfig,
|
|
HashConfig,
|
|
EncryptConfig,
|
|
RedactConfig,
|
|
ReplaceConfig,
|
|
DecryptConfig,
|
|
)
|
|
from template_code_location.field_level_pseudo_anonymisation.config_models.unstructured_config import (
|
|
AnonymisePseudonymizeUnstructuredConfig,
|
|
DepseudonymizeUnstructuredConfig,
|
|
PseudoTechniqueConfig as UnstructuredPseudoTechniqueConfig,
|
|
DepseudoTechniqueConfig as UnstructuredDepseudoTechniqueConfig,
|
|
HashConfig as UnstructuredHashConfig,
|
|
EncryptConfig as UnstructuredEncryptConfig,
|
|
RedactConfig as UnstructuredRedactConfig,
|
|
ReplaceConfig as UnstructuredReplaceConfig,
|
|
RetainConfig,
|
|
DecryptConfig as UnstructuredDecryptConfig,
|
|
)
|
|
from template_code_location.field_level_pseudo_anonymisation.config_models.languages import LanguageEnum
|
|
from template_code_location.field_level_pseudo_anonymisation.config_models.pii_entities import PIIEntityEnum
|
|
|
|
|
|
# ==================== Structured Config Tests ====================
|
|
|
|
class TestStructuredConfigValidators:
|
|
"""Tests for structured_config.py validators and validators."""
|
|
|
|
def test_ensure_unique_columns_valid_single_technique(self):
|
|
"""Test that single technique with single column passes validation."""
|
|
config = AnonymisePseudonymizeStructuredConfig(
|
|
used_function=[
|
|
PseudoTechniqueConfig(
|
|
technique=EncryptConfig(
|
|
columns=["email"],
|
|
key_name="key1"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
assert config is not None
|
|
assert len(config.used_function) == 1
|
|
|
|
def test_ensure_unique_columns_valid_multiple_techniques_different_columns(self):
|
|
"""Test that multiple techniques with different columns passes validation."""
|
|
config = AnonymisePseudonymizeStructuredConfig(
|
|
used_function=[
|
|
PseudoTechniqueConfig(
|
|
technique=EncryptConfig(
|
|
columns=["email"],
|
|
key_name="key1"
|
|
)
|
|
),
|
|
PseudoTechniqueConfig(
|
|
technique=HashConfig(
|
|
columns=["ssn"],
|
|
algorithm="sha256"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
assert config is not None
|
|
assert len(config.used_function) == 2
|
|
|
|
def test_ensure_unique_columns_duplicate_columns_same_technique(self):
|
|
"""Test that duplicate columns in different techniques raises error."""
|
|
with pytest.raises(ValueError) as exc_info:
|
|
AnonymisePseudonymizeStructuredConfig(
|
|
used_function=[
|
|
PseudoTechniqueConfig(
|
|
technique=EncryptConfig(
|
|
columns=["email"],
|
|
key_name="key1"
|
|
)
|
|
),
|
|
PseudoTechniqueConfig(
|
|
technique=HashConfig(
|
|
columns=["email"],
|
|
algorithm="sha256"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
assert "Duplicate column" in str(exc_info.value)
|
|
assert "email" in str(exc_info.value)
|
|
|
|
def test_ensure_unique_columns_multiple_duplicates(self):
|
|
"""Test error message with multiple duplicate columns."""
|
|
with pytest.raises(ValueError) as exc_info:
|
|
AnonymisePseudonymizeStructuredConfig(
|
|
used_function=[
|
|
PseudoTechniqueConfig(
|
|
technique=EncryptConfig(
|
|
columns=["email", "phone"],
|
|
key_name="key1"
|
|
)
|
|
),
|
|
PseudoTechniqueConfig(
|
|
technique=HashConfig(
|
|
columns=["email", "phone"],
|
|
algorithm="sha256"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
error_msg = str(exc_info.value)
|
|
assert "Duplicate column" in error_msg
|
|
assert "email" in error_msg
|
|
assert "phone" in error_msg
|
|
|
|
def test_collect_column_to_techniques_single_technique(self):
|
|
"""Test _collect_column_to_techniques with single technique."""
|
|
config = AnonymisePseudonymizeStructuredConfig(
|
|
used_function=[
|
|
PseudoTechniqueConfig(
|
|
technique=EncryptConfig(
|
|
columns=["email", "phone"],
|
|
key_name="key1"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
mapping = config._collect_column_to_techniques()
|
|
assert mapping == {
|
|
"email": ["encrypt"],
|
|
"phone": ["encrypt"]
|
|
}
|
|
|
|
def test_extract_technique_and_columns_dict_with_type_field(self):
|
|
"""Test _extract_technique_and_columns with dict containing 'type' field."""
|
|
config = AnonymisePseudonymizeStructuredConfig()
|
|
technique_type, columns = config._extract_technique_and_columns(
|
|
{
|
|
"technique": {
|
|
"type": "encrypt",
|
|
"columns": ["email", "ssn"],
|
|
"key_name": "test_key"
|
|
}
|
|
}
|
|
)
|
|
assert technique_type == "encrypt"
|
|
assert columns == ["email", "ssn"]
|
|
|
|
def test_extract_technique_and_columns_dict_with_variant_mapping(self):
|
|
"""Test _extract_technique_and_columns with variant-key mapping {'hash': {...}}."""
|
|
config = AnonymisePseudonymizeStructuredConfig()
|
|
technique_type, columns = config._extract_technique_and_columns(
|
|
{
|
|
"technique": {
|
|
"encrypt": {
|
|
"columns": ["ssn"],
|
|
"key_name": "test_key"
|
|
}
|
|
}
|
|
}
|
|
)
|
|
assert technique_type == "encrypt"
|
|
assert columns == ["ssn"]
|
|
|
|
def test_extract_technique_and_columns_model_instance(self):
|
|
"""Test _extract_technique_and_columns with PseudoTechniqueConfig model instance."""
|
|
pseudo_config = PseudoTechniqueConfig(
|
|
technique=RedactConfig(columns=["address"])
|
|
)
|
|
config = AnonymisePseudonymizeStructuredConfig()
|
|
technique_type, columns = config._extract_technique_and_columns(pseudo_config)
|
|
assert technique_type == "redact"
|
|
assert columns == ["address"]
|
|
|
|
def test_extract_technique_and_columns_empty_dict(self):
|
|
"""Test _extract_technique_and_columns with empty dict."""
|
|
config = AnonymisePseudonymizeStructuredConfig()
|
|
technique_type, columns = config._extract_technique_and_columns(
|
|
{"technique": {}}
|
|
)
|
|
assert technique_type is None
|
|
assert columns == []
|
|
|
|
def test_extract_technique_and_columns_none_technique(self):
|
|
"""Test _extract_technique_and_columns with None technique."""
|
|
config = AnonymisePseudonymizeStructuredConfig()
|
|
technique_type, columns = config._extract_technique_and_columns(
|
|
{"technique": None}
|
|
)
|
|
assert technique_type is None
|
|
assert columns == []
|
|
|
|
def test_extract_technique_and_columns_missing_columns_key(self):
|
|
"""Test _extract_technique_and_columns when 'columns' key is missing."""
|
|
config = AnonymisePseudonymizeStructuredConfig()
|
|
technique_type, columns = config._extract_technique_and_columns(
|
|
{
|
|
"technique": {
|
|
"type": "encrypt",
|
|
"key_name": "test_key"
|
|
}
|
|
}
|
|
)
|
|
assert technique_type == "encrypt"
|
|
assert columns == []
|
|
|
|
def test_extract_technique_and_columns_model_without_columns_attr(self):
|
|
"""Test _extract_technique_and_columns with model instance missing columns attribute."""
|
|
pseudo_config = PseudoTechniqueConfig(
|
|
technique=ReplaceConfig(columns=["old_value"], new_value="NEW")
|
|
)
|
|
config = AnonymisePseudonymizeStructuredConfig()
|
|
technique_type, columns = config._extract_technique_and_columns(pseudo_config)
|
|
assert technique_type == "replace"
|
|
assert columns == ["old_value"]
|
|
|
|
|
|
class TestStructuredDepseudonymizeConfig:
|
|
"""Tests for DepseudonymizeStructuredConfig."""
|
|
|
|
def test_depseudonymize_config_normalize_used_function_with_dict(self):
|
|
"""Test _normalize_depseudo_used_function with dict input."""
|
|
config = DepseudonymizeStructuredConfig(
|
|
used_function=[
|
|
{
|
|
"technique": {
|
|
"type": "decrypt",
|
|
"columns": ["email"],
|
|
"key_name": "key1"
|
|
}
|
|
}
|
|
]
|
|
)
|
|
assert len(config.used_function) == 1
|
|
assert isinstance(config.used_function[0], DepseudoTechniqueConfig)
|
|
assert config.used_function[0].technique.type == "decrypt"
|
|
|
|
def test_depseudonymize_config_normalize_used_function_with_model(self):
|
|
"""Test _normalize_depseudo_used_function with model instance."""
|
|
depseudo_tech = DepseudoTechniqueConfig(
|
|
technique=DecryptConfig(
|
|
columns=["email"],
|
|
key_name="key1"
|
|
)
|
|
)
|
|
config = DepseudonymizeStructuredConfig(
|
|
used_function=[depseudo_tech]
|
|
)
|
|
assert len(config.used_function) == 1
|
|
assert config.used_function[0] is depseudo_tech
|
|
|
|
def test_depseudonymize_config_ensure_unique_columns_no_op(self):
|
|
"""Test that ensure_unique_columns is a no-op for depseudonymize."""
|
|
# For depseudonymize, there's no per-column uniqueness constraint
|
|
config = DepseudonymizeStructuredConfig(
|
|
used_function=[
|
|
DepseudoTechniqueConfig(
|
|
technique=DecryptConfig(
|
|
columns=["email"],
|
|
key_name="key1"
|
|
)
|
|
),
|
|
DepseudoTechniqueConfig(
|
|
technique=DecryptConfig(
|
|
columns=["email"],
|
|
key_name="key2"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
# Should not raise - no-op validator
|
|
assert config is not None
|
|
|
|
|
|
# ==================== Unstructured Config Tests ====================
|
|
|
|
class TestUnstructuredConfigValidators:
|
|
"""Tests for unstructured_config.py validators."""
|
|
|
|
def test_normalize_used_function_with_dict(self):
|
|
"""Test _normalize_used_function with dict input."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(
|
|
language=LanguageEnum.en,
|
|
used_function=[
|
|
{
|
|
"technique": {
|
|
"encrypt": {
|
|
"pii": [PIIEntityEnum.EMAIL.value],
|
|
"key_name": "key1"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
)
|
|
assert len(config.used_function) == 1
|
|
|
|
def test_normalize_used_function_with_model(self):
|
|
"""Test _normalize_used_function with model instance."""
|
|
pseudo_tech = UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredEncryptConfig(
|
|
pii=[PIIEntityEnum.EMAIL.value],
|
|
key_name="key1"
|
|
)
|
|
)
|
|
config = AnonymisePseudonymizeUnstructuredConfig(
|
|
language=LanguageEnum.en,
|
|
used_function=[pseudo_tech]
|
|
)
|
|
assert len(config.used_function) == 1
|
|
|
|
def test_ensure_unique_pii_valid_different_pii_types(self):
|
|
"""Test that different PII types pass validation."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(
|
|
language=LanguageEnum.en,
|
|
used_function=[
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredEncryptConfig(
|
|
pii=[PIIEntityEnum.EMAIL.value],
|
|
key_name="key1"
|
|
)
|
|
),
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredHashConfig(
|
|
pii=[PIIEntityEnum.PERSON.value],
|
|
algorithm="sha256"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
assert config is not None
|
|
assert len(config.used_function) == 2
|
|
|
|
def test_ensure_unique_pii_duplicate_pii_types(self):
|
|
"""Test that duplicate PII types raise error."""
|
|
with pytest.raises(ValueError) as exc_info:
|
|
AnonymisePseudonymizeUnstructuredConfig(
|
|
language=LanguageEnum.en,
|
|
used_function=[
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredEncryptConfig(
|
|
pii=[PIIEntityEnum.EMAIL.value],
|
|
key_name="key1"
|
|
)
|
|
),
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredHashConfig(
|
|
pii=[PIIEntityEnum.EMAIL.value],
|
|
algorithm="sha256"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
assert "Duplicate PII" in str(exc_info.value)
|
|
# Error message shows PIIEntityEnum.EMAIL (the enum repr) rather than the value
|
|
assert "EMAIL" in str(exc_info.value)
|
|
|
|
def test_collect_pii_to_techniques_single_technique(self):
|
|
"""Test _collect_pii_to_techniques with single technique."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(
|
|
language=LanguageEnum.en,
|
|
used_function=[
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredEncryptConfig(
|
|
pii=[PIIEntityEnum.EMAIL.value, PIIEntityEnum.PERSON.value],
|
|
key_name="key1"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
mapping = config._collect_pii_to_techniques()
|
|
assert mapping == {
|
|
PIIEntityEnum.EMAIL.value: ["encrypt"],
|
|
PIIEntityEnum.PERSON.value: ["encrypt"]
|
|
}
|
|
|
|
def test_extract_technique_and_pii_dict_with_type_field(self):
|
|
"""Test _extract_technique_and_pii with dict containing 'type' field."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
|
|
technique_type, piis = config._extract_technique_and_pii(
|
|
{
|
|
"technique": {
|
|
"type": "encrypt",
|
|
"pii": [PIIEntityEnum.EMAIL.value],
|
|
"key_name": "test_key"
|
|
}
|
|
}
|
|
)
|
|
assert technique_type == "encrypt"
|
|
assert piis == [PIIEntityEnum.EMAIL.value]
|
|
|
|
def test_extract_technique_and_pii_dict_with_variant_mapping(self):
|
|
"""Test _extract_technique_and_pii with variant-key mapping."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
|
|
technique_type, piis = config._extract_technique_and_pii(
|
|
{
|
|
"technique": {
|
|
"hash": {
|
|
"pii": [PIIEntityEnum.PERSON.value],
|
|
"algorithm": "sha256"
|
|
}
|
|
}
|
|
}
|
|
)
|
|
assert technique_type == "hash"
|
|
assert piis == [PIIEntityEnum.PERSON.value]
|
|
|
|
def test_extract_technique_and_pii_dict_fallback_to_columns(self):
|
|
"""Test _extract_technique_and_pii fallback to 'columns' key when 'pii' is missing."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
|
|
technique_type, piis = config._extract_technique_and_pii(
|
|
{
|
|
"technique": {
|
|
"type": "redact",
|
|
"columns": ["fallback_col"]
|
|
}
|
|
}
|
|
)
|
|
assert technique_type == "redact"
|
|
assert piis == ["fallback_col"]
|
|
|
|
def test_extract_technique_and_pii_model_instance(self):
|
|
"""Test _extract_technique_and_pii with model instance."""
|
|
pseudo_tech = UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredRedactConfig(
|
|
pii=[PIIEntityEnum.EMAIL.value]
|
|
)
|
|
)
|
|
config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
|
|
technique_type, piis = config._extract_technique_and_pii(pseudo_tech)
|
|
assert technique_type == "redact"
|
|
assert piis == [PIIEntityEnum.EMAIL.value]
|
|
|
|
def test_extract_technique_and_pii_model_with_getattr_fallback(self):
|
|
"""Test _extract_technique_and_pii model with getattr fallback to columns."""
|
|
# Create a mock-like scenario where pii attribute doesn't exist
|
|
pseudo_tech = UnstructuredPseudoTechniqueConfig(
|
|
technique=RetainConfig(pii=[PIIEntityEnum.PERSON.value])
|
|
)
|
|
config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
|
|
technique_type, piis = config._extract_technique_and_pii(pseudo_tech)
|
|
assert technique_type == "retain"
|
|
assert piis == [PIIEntityEnum.PERSON.value]
|
|
|
|
def test_extract_technique_and_pii_empty_dict(self):
|
|
"""Test _extract_technique_and_pii with empty dict."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
|
|
technique_type, piis = config._extract_technique_and_pii(
|
|
{"technique": {}}
|
|
)
|
|
assert technique_type is None
|
|
assert piis == []
|
|
|
|
def test_extract_technique_and_pii_missing_pii_key(self):
|
|
"""Test _extract_technique_and_pii when 'pii' key is missing."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(language=LanguageEnum.en)
|
|
technique_type, piis = config._extract_technique_and_pii(
|
|
{
|
|
"technique": {
|
|
"type": "encrypt",
|
|
"key_name": "test_key"
|
|
}
|
|
}
|
|
)
|
|
assert technique_type == "encrypt"
|
|
assert piis == []
|
|
|
|
|
|
class TestUnstructuredDepseudonymizeConfig:
|
|
"""Tests for DepseudonymizeUnstructuredConfig."""
|
|
|
|
def test_depseudonymize_unstructured_config_default(self):
|
|
"""Test default DepseudonymizeUnstructuredConfig."""
|
|
config = DepseudonymizeUnstructuredConfig()
|
|
assert config is not None
|
|
assert len(config.used_function) >= 1
|
|
|
|
def test_depseudonymize_unstructured_config_with_custom_function(self):
|
|
"""Test DepseudonymizeUnstructuredConfig with custom function."""
|
|
config = DepseudonymizeUnstructuredConfig(
|
|
used_function=[
|
|
UnstructuredDepseudoTechniqueConfig(
|
|
technique=UnstructuredDecryptConfig(
|
|
key_name="custom_key"
|
|
)
|
|
)
|
|
]
|
|
)
|
|
assert len(config.used_function) == 1
|
|
assert config.used_function[0].technique.key_name == "custom_key"
|
|
|
|
|
|
class TestLanguageSupport:
|
|
"""Tests for language configuration support."""
|
|
|
|
def test_all_supported_languages(self):
|
|
"""Test that all supported languages can be set."""
|
|
supported_languages = [
|
|
LanguageEnum.hr, LanguageEnum.da, LanguageEnum.nl, LanguageEnum.en,
|
|
LanguageEnum.fi, LanguageEnum.fr, LanguageEnum.de, LanguageEnum.el,
|
|
LanguageEnum.it, LanguageEnum.lt, LanguageEnum.pl, LanguageEnum.pt,
|
|
LanguageEnum.ro, LanguageEnum.sl, LanguageEnum.es, LanguageEnum.sv
|
|
]
|
|
|
|
for lang in supported_languages:
|
|
config = AnonymisePseudonymizeUnstructuredConfig(language=lang)
|
|
assert config.language == lang
|
|
|
|
def test_default_language_is_english(self):
|
|
"""Test that default language is English."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig()
|
|
assert config.language == LanguageEnum.en
|
|
|
|
|
|
class TestTechniqueConfigDefaults:
|
|
"""Tests for technique config defaults."""
|
|
|
|
def test_hash_config_default_algorithm(self):
|
|
"""Test HashConfig default algorithm."""
|
|
config = HashConfig()
|
|
assert config.algorithm == "sha256"
|
|
assert config.type == "hash"
|
|
|
|
def test_encrypt_config_defaults(self):
|
|
"""Test EncryptConfig defaults."""
|
|
config = EncryptConfig()
|
|
assert config.type == "encrypt"
|
|
assert config.key_name == "my_key"
|
|
|
|
def test_redact_config_defaults(self):
|
|
"""Test RedactConfig defaults."""
|
|
config = RedactConfig()
|
|
assert config.type == "redact"
|
|
|
|
def test_replace_config_defaults(self):
|
|
"""Test ReplaceConfig defaults."""
|
|
config = ReplaceConfig()
|
|
assert config.type == "replace"
|
|
assert config.new_value == "REPLACED"
|
|
|
|
def test_decrypt_config_defaults(self):
|
|
"""Test DecryptConfig defaults."""
|
|
config = DecryptConfig()
|
|
assert config.type == "decrypt"
|
|
assert config.key_name == "my_key"
|
|
|
|
def test_unstructured_retain_config_defaults(self):
|
|
"""Test RetainConfig defaults."""
|
|
config = RetainConfig()
|
|
assert config.type == "retain"
|
|
|
|
|
|
class TestPseudoTechniqueConfigDefaults:
|
|
"""Tests for PseudoTechniqueConfig defaults."""
|
|
|
|
def test_pseudo_technique_default_to_hash(self):
|
|
"""Test PseudoTechniqueConfig defaults to hash technique."""
|
|
config = PseudoTechniqueConfig()
|
|
# For Dagster Config, technique may be a dict with the discriminator structure
|
|
if isinstance(config.technique, dict):
|
|
# Check if it has hash configuration
|
|
assert "hash" in config.technique or config.technique.get("type") == "hash"
|
|
else:
|
|
assert config.technique.type == "hash"
|
|
|
|
def test_unstructured_pseudo_technique_default_to_hash(self):
|
|
"""Test UnstructuredPseudoTechniqueConfig defaults to hash technique."""
|
|
config = UnstructuredPseudoTechniqueConfig()
|
|
# For Dagster Config, technique may be a dict with the discriminator structure
|
|
if isinstance(config.technique, dict):
|
|
# Check if it has hash configuration
|
|
assert "hash" in config.technique or config.technique.get("type") == "hash"
|
|
else:
|
|
assert config.technique.type == "hash"
|
|
|
|
|
|
class TestConfigModelIntegration:
|
|
"""Integration tests for config models."""
|
|
|
|
def test_structured_config_with_all_technique_types(self):
|
|
"""Test structured config with all technique types."""
|
|
config = AnonymisePseudonymizeStructuredConfig(
|
|
used_function=[
|
|
PseudoTechniqueConfig(
|
|
technique=HashConfig(columns=["col1"])
|
|
),
|
|
PseudoTechniqueConfig(
|
|
technique=EncryptConfig(columns=["col2"], key_name="k1")
|
|
),
|
|
PseudoTechniqueConfig(
|
|
technique=RedactConfig(columns=["col3"])
|
|
),
|
|
PseudoTechniqueConfig(
|
|
technique=ReplaceConfig(columns=["col4"], new_value="X")
|
|
)
|
|
]
|
|
)
|
|
assert len(config.used_function) == 4
|
|
techniques = {f.technique.type for f in config.used_function}
|
|
assert techniques == {"hash", "encrypt", "redact", "replace"}
|
|
|
|
def test_unstructured_config_with_all_technique_types(self):
|
|
"""Test unstructured config with all technique types."""
|
|
config = AnonymisePseudonymizeUnstructuredConfig(
|
|
language=LanguageEnum.en,
|
|
used_function=[
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredHashConfig(pii=[PIIEntityEnum.EMAIL.value])
|
|
),
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredEncryptConfig(
|
|
pii=[PIIEntityEnum.PERSON.value],
|
|
key_name="k1"
|
|
)
|
|
),
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredRedactConfig(pii=[PIIEntityEnum.PHONE_NUMBERS.value])
|
|
),
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=UnstructuredReplaceConfig(
|
|
pii=[PIIEntityEnum.CREDIT_CARD.value],
|
|
new_value="X"
|
|
)
|
|
),
|
|
UnstructuredPseudoTechniqueConfig(
|
|
technique=RetainConfig(pii=[PIIEntityEnum.DATE_OF_BIRTH.value])
|
|
)
|
|
]
|
|
)
|
|
assert len(config.used_function) == 5
|
|
techniques = {f.technique.type for f in config.used_function}
|
|
assert techniques == {"hash", "encrypt", "redact", "replace", "retain"}
|