Files
template-code-location/tests/data_processing/test_config_models.py

203 lines
7.6 KiB
Python

"""Unit tests for configuration models."""
import pytest
from pydantic import ValidationError
from template_code_location.data_processing.config_models import (
FillMissingConfiguration,
ColumnsSelectConfiguration,
SpellCheckConfiguration,
AggregationConfiguration
)
class TestColumnsSelectConfiguration:
"""Tests for ColumnsSelectConfiguration."""
def test_default_columns(self):
"""Test default columns configuration."""
config = ColumnsSelectConfiguration()
assert config.columns == ['Name']
def test_custom_columns(self):
"""Test custom columns configuration."""
config = ColumnsSelectConfiguration(columns=['Col1', 'Col2', 'Col3'])
assert config.columns == ['Col1', 'Col2', 'Col3']
def test_empty_columns_list(self):
"""Test with empty columns list."""
config = ColumnsSelectConfiguration(columns=[])
assert config.columns == []
def test_single_column(self):
"""Test with a single column."""
config = ColumnsSelectConfiguration(columns=['SingleCol'])
assert config.columns == ['SingleCol']
def test_columns_with_special_characters(self):
"""Test columns with special characters."""
config = ColumnsSelectConfiguration(columns=['Col-1', 'Col_2', 'Col.3'])
assert config.columns == ['Col-1', 'Col_2', 'Col.3']
def test_duplicate_columns_are_removed(self):
"""Verifica che i duplicati vengano rimossi mantenendo l'ordine (grazie a dict.fromkeys)."""
config = ColumnsSelectConfiguration(columns=['A', 'B', 'A', 'C', 'B'])
assert config.columns == ['A', 'B', 'C']
def test_duplicate_default_behavior(self):
"""Verifica che anche input estremi vengano gestiti correttamente."""
config = ColumnsSelectConfiguration(columns=['Name', 'Name', 'Name'])
assert config.columns == ['Name']
class TestFillMissingConfiguration:
"""Tests for FillMissingConfiguration."""
def test_default_fill_map(self):
"""Test default fill map configuration."""
config = FillMissingConfiguration()
assert config.fill_map == {'Age': 'UNKNOWN_AGE'}
def test_custom_fill_map(self):
"""Test custom fill map configuration."""
fill_map = {'Age': '0', 'Name': 'UNKNOWN', 'City': 'N/A'}
config = FillMissingConfiguration(fill_map=fill_map)
assert config.fill_map == fill_map
def test_empty_fill_map(self):
"""Test with empty fill map."""
config = FillMissingConfiguration(fill_map={})
assert config.fill_map == {}
def test_fill_map_with_numeric_values(self):
"""Test fill map with numeric string values."""
fill_map = {'Age': '0', 'Score': '-1', 'Count': '999'}
config = FillMissingConfiguration(fill_map=fill_map)
assert config.fill_map == fill_map
def test_fill_map_with_string_values(self):
"""Test fill map with string values."""
fill_map = {'Name': 'Unknown', 'Email': 'no-email'}
config = FillMissingConfiguration(fill_map=fill_map)
assert config.fill_map == fill_map
def test_fill_map_mixed_types(self):
"""Test fill map with mixed value types (all strings)."""
fill_map = {'IntCol': '0', 'StrCol': 'Unknown', 'FloatCol': '0.0'}
config = FillMissingConfiguration(fill_map=fill_map)
assert config.fill_map == fill_map
class TestSpellCheckConfiguration:
"""Tests for SpellCheckConfiguration."""
def test_default_spell_check_config(self):
"""Test default spell check configuration."""
config = SpellCheckConfiguration()
assert config.columns == ['Name']
assert config.language == 'en'
def test_custom_spell_check_config(self):
"""Test custom spell check configuration."""
config = SpellCheckConfiguration(
columns=['Description', 'Notes'],
language='es'
)
assert config.columns == ['Description', 'Notes']
assert config.language == 'es'
def test_spell_check_all_languages(self):
"""Test spell check with all supported languages."""
supported_languages = ['en', 'es', 'it', 'fr', 'pt', 'de', 'nl']
for lang in supported_languages:
config = SpellCheckConfiguration(language=lang)
assert config.language == lang
def test_spell_check_invalid_language(self):
"""Test spell check with invalid language."""
with pytest.raises(ValidationError):
SpellCheckConfiguration(language='invalid')
def test_spell_check_multiple_columns(self):
"""Test spell check with multiple columns."""
columns = ['Col1', 'Col2', 'Col3', 'Col4']
config = SpellCheckConfiguration(columns=columns)
assert config.columns == columns
def test_spell_check_empty_columns(self):
"""Test spell check with empty columns list."""
config = SpellCheckConfiguration(columns=[])
assert config.columns == []
assert config.language == 'en'
def test_spell_check_inheritance(self):
"""Test that SpellCheckConfiguration inherits from ColumnsSelectConfiguration."""
config = SpellCheckConfiguration()
assert isinstance(config, ColumnsSelectConfiguration)
assert hasattr(config, 'columns')
assert hasattr(config, 'language')
@pytest.mark.parametrize("language", ['en', 'es', 'it', 'fr', 'pt', 'de', 'nl'])
def test_spell_check_languages_parametrized(self, language):
"""Test spell check with parametrized languages."""
config = SpellCheckConfiguration(language=language)
assert config.language == language
class TestAggregationConfiguration:
"""Tests for AggregationConfiguration."""
def test_aggregation_default_config(self):
"""Test default aggregation configuration."""
config = AggregationConfiguration()
assert config.columns == ['Name']
assert config.operation == 'sum'
@pytest.mark.parametrize("op", ["sum", "mean", "min", "max", "count"])
def test_aggregation_valid_operations(self, op):
"""Test all allowed aggregation operations."""
config = AggregationConfiguration(operation=op)
assert config.operation == op
def test_aggregation_invalid_operation(self):
"""Test that an invalid operation raises a ValidationError."""
with pytest.raises(ValidationError) as excinfo:
AggregationConfiguration(operation="invalid_op")
assert "Invalid aggregation operation 'invalid_op'" in str(excinfo.value)
def test_aggregation_custom_columns(self):
"""Test aggregation with custom columns."""
config = AggregationConfiguration(columns=['Price', 'Quantity'], operation='mean')
assert config.columns == ['Price', 'Quantity']
assert config.operation == 'mean'
def test_aggregation_inheritance(self):
"""Test that AggregationConfiguration inherits from ColumnsSelectConfiguration."""
config = AggregationConfiguration()
assert isinstance(config, ColumnsSelectConfiguration)
assert hasattr(config, 'columns')
assert hasattr(config, 'operation')
def test_aggregation_model_dump(self):
"""Test that model_dump contains all expected fields (useful for the Dagster op)."""
config = AggregationConfiguration(columns=['Value'], operation='max')
dump = config.model_dump()
assert dump['columns'] == ['Value']
assert dump['operation'] == 'max'