template-code-location/tests/data_processing/test_ops.py

"""Unit tests for data processing operations."""

import pytest
import pandas as pd
from template_code_location.data_processing.ops import (
    remove_duplicates,
    fill_missing_values,
    standardize_categorical_values,
    correct_typos,
    normalize_datetime,
    normalize_numeric_min_max,
    normalize_coordinates,
    add_global_aggregations
)
from template_code_location.data_processing.config_models import (
    FillMissingConfiguration,
    ColumnsSelectConfiguration,
    SpellCheckConfiguration,
    AggregationConfiguration,
    CoordinatesNormalizationConfiguration
)


class TestRemoveDuplicates:
    """Tests for the remove_duplicates operation."""

    def test_remove_duplicates_basic(self, mock_context, sample_dataframe):
        """Test basic duplicate removal."""
        result = remove_duplicates(mock_context, sample_dataframe)

        # Should have 3 unique rows (john doe appears 3x, jane smith 1x, bob johnson 1x)
        assert result.shape[0] == 3
        assert len(result) < len(sample_dataframe)

    def test_remove_duplicates_no_duplicates(self, mock_context):
        """Test remove_duplicates when there are no duplicates."""
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': ['x', 'y', 'z']
        })
        result = remove_duplicates(mock_context, df)

        assert result.shape[0] == 3
        pd.testing.assert_frame_equal(result, df)

    def test_remove_duplicates_all_duplicates(self, mock_context):
        """Test remove_duplicates when all rows are identical."""
        df = pd.DataFrame({
            'A': [1, 1, 1],
            'B': ['x', 'x', 'x']
        })
        result = remove_duplicates(mock_context, df)

        assert result.shape[0] == 1

    def test_remove_duplicates_empty_dataframe(self, mock_context, empty_dataframe):
        """Test remove_duplicates with empty DataFrame."""
        result = remove_duplicates(mock_context, empty_dataframe)

        assert result.shape[0] == 0
        assert result.shape[1] == 0

    def test_remove_duplicates_preserves_data_types(self, mock_context):
        """Test that remove_duplicates preserves data types."""
        df = pd.DataFrame({
            'int_col': [1, 2, 1],
            'str_col': ['a', 'b', 'a'],
            'float_col': [1.5, 2.5, 1.5]
        })
        result = remove_duplicates(mock_context, df)

        assert result['int_col'].dtype == df['int_col'].dtype
        assert result['str_col'].dtype == df['str_col'].dtype
        assert result['float_col'].dtype == df['float_col'].dtype


class TestFillMissingValues:
    """Tests for the fill_missing_values operation."""

    def test_fill_missing_values_basic(self, mock_context, dataframe_with_missing_values):
        """Test basic missing value filling."""
        config = FillMissingConfiguration(fill_map={'Column1': '0', 'Column2': 'N/A'})
        result = fill_missing_values(mock_context, config, dataframe_with_missing_values)

        # Check that no NaN values remain
        assert result['Column1'].isna().sum() == 0
        assert result['Column2'].isna().sum() == 0

    def test_fill_missing_values_with_different_values(self, mock_context):
        """Test filling with different replacement values."""
        df = pd.DataFrame({
            'A': [1, None, 3],
            'B': [None, 'b', 'c']
        })
        config = FillMissingConfiguration(fill_map={'A': '-1', 'B': 'UNKNOWN'})
        result = fill_missing_values(mock_context, config, df)

        assert result.loc[1, 'A'] == '-1'
        assert result.loc[0, 'B'] == 'UNKNOWN'

    def test_fill_missing_values_partial_columns(self, mock_context):
        """Test filling only specified columns."""
        df = pd.DataFrame({
            'A': [1, None, 3],
            'B': [None, 'b', 'c']
        })
        config = FillMissingConfiguration(fill_map={'A': '999'})
        result = fill_missing_values(mock_context, config, df)

        assert result.loc[1, 'A'] == '999'
        assert pd.isna(result.loc[0, 'B'])  # B should still have NaN

    def test_fill_missing_values_no_missing(self, mock_context):
        """Test when there are no missing values."""
        df = pd.DataFrame({
            'A': ['1', '2', '3'],
            'B': ['a', 'b', 'c']
        })
        config = FillMissingConfiguration(fill_map={'A': '0'})
        result = fill_missing_values(mock_context, config, df)

        pd.testing.assert_frame_equal(result, df)

    def test_fill_missing_values_empty_dataframe(self, mock_context, empty_dataframe):
        """Test with empty DataFrame."""
        config = FillMissingConfiguration(fill_map={})
        result = fill_missing_values(mock_context, config, empty_dataframe)

        assert result.shape[0] == 0


class TestStandardizeCategoricalValues:
    """Tests for the standardize_categorical_values operation."""

    def test_standardize_categorical_basic(self, mock_context, sample_dataframe):
        """Test basic categorical standardization."""
        config = ColumnsSelectConfiguration(columns=['Name', 'City', 'Status'])
        result = standardize_categorical_values(mock_context, config, sample_dataframe)

        # Check that values are lowercase and stripped
        assert result['Name'].iloc[0] == 'john doe'
        assert result['City'].iloc[1] == 'los angeles'
        assert result['Status'].iloc[1] == 'inactive'

    def test_standardize_categorical_single_column(self, mock_context):
        """Test standardization on a single column."""
        df = pd.DataFrame({
            'City': ['  NEW YORK  ', 'LOS ANGELES', '  chicago  ']
        })
        config = ColumnsSelectConfiguration(columns=['City'])
        result = standardize_categorical_values(mock_context, config, df)

        assert result['City'].iloc[0] == 'new york'
        assert result['City'].iloc[1] == 'los angeles'
        assert result['City'].iloc[2] == 'chicago'

    def test_standardize_categorical_missing_column(self, mock_context, sample_dataframe):
        """Test with non-existent column (should skip)."""
        config = ColumnsSelectConfiguration(columns=['NonExistent', 'Name'])
        result = standardize_categorical_values(mock_context, config, sample_dataframe)

        # Should process 'Name' column without error
        assert result['Name'].iloc[0] == 'john doe'

    def test_standardize_categorical_with_missing_values(self, mock_context):
        """Test standardization with missing values."""
        df = pd.DataFrame({
            'Category': ['  ACTIVE  ', None, '  pending  ']
        })
        config = ColumnsSelectConfiguration(columns=['Category'])
        result = standardize_categorical_values(mock_context, config, df)

        assert result['Category'].iloc[0] == 'active'
        assert result['Category'].iloc[1] == ''
        assert result['Category'].iloc[2] == 'pending'

    def test_standardize_categorical_empty_dataframe(self, mock_context, empty_dataframe):
        """Test with empty DataFrame."""
        config = ColumnsSelectConfiguration(columns=['A', 'B'])
        result = standardize_categorical_values(mock_context, config, empty_dataframe)

        assert result.shape[0] == 0

    def test_standardize_categorical_numeric_columns(self, mock_context):
        """Test that numeric columns are converted to strings."""
        df = pd.DataFrame({
            'NumCol': [1, 2, 3]
        })
        config = ColumnsSelectConfiguration(columns=['NumCol'])
        result = standardize_categorical_values(mock_context, config, df)

        assert result['NumCol'].iloc[0] == '1'
        assert isinstance(result['NumCol'].iloc[0], str)


class TestCorrectTypos:
    """Tests for the correct_typos operation."""

    def test_correct_typos_basic(self, mock_context):
        """Test basic typo correction."""
        df = pd.DataFrame({
            'Name': ['jon', 'jayne', 'bob']
        })
        config = SpellCheckConfiguration(columns=['Name'], language='en')
        result = correct_typos(mock_context, config, df)

        # Result should have corrections applied
        assert result.shape[0] == 3

    def test_correct_typos_missing_column(self, mock_context):
        """Test with non-existent column (should skip)."""
        df = pd.DataFrame({
            'Name': ['jon', 'jayne']
        })
        config = SpellCheckConfiguration(columns=['NonExistent'], language='en')
        result = correct_typos(mock_context, config, df)

        # Should not raise error, just skip
        pd.testing.assert_frame_equal(result, df)

    def test_correct_typos_with_missing_values(self, mock_context):
        """Test typo correction with missing values."""
        df = pd.DataFrame({
            'Text': ['helo', '', 'wrld']
        })
        config = SpellCheckConfiguration(columns=['Text'], language='en')
        result = correct_typos(mock_context, config, df)

        # Empty strings should be preserved
        assert result.loc[1, 'Text'] == ''

    def test_correct_typos_empty_dataframe(self, mock_context, empty_dataframe):
        """Test with empty DataFrame."""
        config = SpellCheckConfiguration(columns=['A'], language='en')
        result = correct_typos(mock_context, config, empty_dataframe)

        assert result.shape[0] == 0

    def test_correct_typos_different_languages(self, mock_context):
        """Test typo correction with different languages."""
        df = pd.DataFrame({
            'Text': ['ciao', 'mondo']
        })

        for lang in ['en', 'es', 'it']:
            config = SpellCheckConfiguration(columns=['Text'], language=lang)
            result = correct_typos(mock_context, config, df)

            # Should process without error
            assert result.shape[0] == 2

    def test_correct_typos_numeric_values(self, mock_context):
        """Test typo correction on numeric values converted to strings."""
        df = pd.DataFrame({
            'Values': [123, 456, 789]
        })
        config = SpellCheckConfiguration(columns=['Values'], language='en')
        result = correct_typos(mock_context, config, df)

        # Numeric values should be converted to string and processed
        assert result.shape[0] == 3

class TestNormalizeDatetime:
    """Tests for the normalize_datetime operation."""

    def test_normalize_datetime_basic(self, mock_context):
        """Test basic datetime normalization to ISO format."""
        df = pd.DataFrame({
            'date_col': ['2023-01-01 10:00:00', '2023-12-31T23:59:59']
        })

        config = ColumnsSelectConfiguration(columns=['date_col'])

        result = normalize_datetime(mock_context, config, df.copy())

        assert 'date_col_iso' in result.columns
        assert result['date_col_iso'].iloc[0] == '2023-01-01T10:00:00Z'
        assert result['date_col_iso'].iloc[1] == '2023-12-31T23:59:59Z'

    def test_normalize_datetime_missing_column(self, mock_context, sample_dataframe):
        """Test behavior when a configured column is missing in the DataFrame."""
        config = ColumnsSelectConfiguration(columns=['non_existent_column'])

        result = normalize_datetime(mock_context, config, sample_dataframe.copy())

        pd.testing.assert_frame_equal(result, sample_dataframe)

    def test_normalize_datetime_unparseable_values(self, mock_context):
        """Test column with values that cannot be parsed as dates."""
        df = pd.DataFrame({
            'invalid_col': ['not-a-date', 'completely-random-text']
        })
        config = ColumnsSelectConfiguration(columns=['invalid_col'])

        result = normalize_datetime(mock_context, config, df.copy())

        assert 'invalid_col_iso' not in result.columns

    def test_normalize_datetime_mixed_and_nulls(self, mock_context):
        """Test column with mixed valid dates, invalid dates, and NaNs."""
        df = pd.DataFrame({
            'mixed_col': ['2023-05-01', None, 'invalid-date']
        })
        config = ColumnsSelectConfiguration(columns=['mixed_col'])

        result = normalize_datetime(mock_context, config, df.copy())

        assert 'mixed_col_iso' in result.columns
        assert result['mixed_col_iso'].iloc[0] == '2023-05-01T00:00:00Z'

        assert result['mixed_col_iso'].iloc[1] == ""
        assert result['mixed_col_iso'].iloc[2] == ""

    def test_normalize_datetime_empty_dataframe(self, mock_context, empty_dataframe):
        """Test with an empty DataFrame."""
        config = ColumnsSelectConfiguration(columns=['some_col'])

        result = normalize_datetime(mock_context, config, empty_dataframe)

        assert result.empty

    def test_normalize_datetime_epoch_only(self, mock_context, capsys):
        """If parsing a column yields only the Unix epoch date, it should be skipped."""
        df = pd.DataFrame({
            'weird_col': ['0', 0, '0000', '']
        })

        config = ColumnsSelectConfiguration(columns=['weird_col'])

        result = normalize_datetime(mock_context, config, df.copy())

        assert 'weird_col_iso' not in result.columns

        captured = capsys.readouterr()
        assert "all normalized values are '1970-01-01'" in captured.err

    def test_normalize_datetime_all_1970_skipped(self, mock_context, capsys):
        """If all formatted values are '1970-01-01', the column should be skipped with a warning."""
        df = pd.DataFrame({
            'ts_col': ['1970-01-01 05:30:00', '1970-01-01 12:00:00']
        })

        config = ColumnsSelectConfiguration(columns=['ts_col'])

        result = normalize_datetime(mock_context, config, df.copy())

        assert 'ts_col_iso' not in result.columns

        captured = capsys.readouterr()
        assert "all normalized values are '1970-01-01'" in captured.err

    def test_normalize_datetime_integer_age_column_skipped(self, mock_context, capsys):
        """If an integer column like 'age' is passed, all values become 1970-01-01 and should be skipped."""
        df = pd.DataFrame({
            'age': [66, 45, 40, 43, 20, 26, 69, 21, 46]
        })

        config = ColumnsSelectConfiguration(columns=['age'])

        result = normalize_datetime(mock_context, config, df.copy())

        assert 'age_iso' not in result.columns

        captured = capsys.readouterr()
        assert "all normalized values are '1970-01-01'" in captured.err

class TestNormalizeNumericMinMax:
    """Tests for the normalize_numeric_min_max operation."""

    def test_normalize_numeric_basic(self, mock_context):
        """Test standard min-max normalization between 0 and 1."""
        df = pd.DataFrame({
            'score': [10, 20, 30, 40, 50]
        })
        config = ColumnsSelectConfiguration(columns=['score'])

        result = normalize_numeric_min_max(mock_context, config, df.copy())

        assert 'score_norm' in result.columns
        assert result['score_norm'].min() == 0.0
        assert result['score_norm'].max() == 1.0

        assert result['score_norm'].iloc[2] == 0.5

    def test_normalize_numeric_missing_column(self, mock_context):
        """Test skipping of non-existent columns."""
        df = pd.DataFrame({'existing': [1, 2, 3]})
        config = ColumnsSelectConfiguration(columns=['missing_col'])

        result = normalize_numeric_min_max(mock_context, config, df.copy())

        assert 'missing_col_norm' not in result.columns

    def test_normalize_numeric_constant_values(self, mock_context):
        """Test skipping when min == max to avoid division by zero."""
        df = pd.DataFrame({
            'constant': [10, 10, 10]
        })
        config = ColumnsSelectConfiguration(columns=['constant'])

        result = normalize_numeric_min_max(mock_context, config, df.copy())

        assert 'constant_norm' not in result.columns

    def test_normalize_numeric_with_nans(self, mock_context):
        """Test normalization with NaN values (pandas min/max ignore NaNs by default)."""
        df = pd.DataFrame({
            'with_nans': [10, None, 50]
        })
        config = ColumnsSelectConfiguration(columns=['with_nans'])

        result = normalize_numeric_min_max(mock_context, config, df.copy())

        assert 'with_nans_norm' in result.columns
        assert result['with_nans_norm'].iloc[0] == 0.0
        assert result['with_nans_norm'].iloc[2] == 1.0
        assert pd.isna(result['with_nans_norm'].iloc[1])

    def test_normalize_numeric_multiple_columns(self, mock_context):
        """Test processing multiple columns in one call."""
        df = pd.DataFrame({
            'A': [1, 2],
            'B': [10, 20]
        })
        config = ColumnsSelectConfiguration(columns=['A', 'B'])

        result = normalize_numeric_min_max(mock_context, config, df.copy())

        assert 'A_norm' in result.columns
        assert 'B_norm' in result.columns

class TestNormalizeCoordinates:
    """Tests for the normalize_coordinates operation."""

    def test_normalize_coordinates_basic(self, mock_context):
        """Test rounding and basic coordinate normalization."""
        df = pd.DataFrame({
            'lat': [45.123456, 46.0],
            'lon': [9.123456, 10.0]
        })
        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')

        result = normalize_coordinates(mock_context, config, df.copy())

        assert result['lat'].iloc[0] == 45.1235
        assert result['lon'].iloc[0] == 9.1235

        assert len(result) == 2

    def test_normalize_coordinates_filtering(self, mock_context):
        """Test filtering of out-of-range coordinates."""
        df = pd.DataFrame({
            'lat': [45.0, 100.0, -91.0, 0.0],  # 100 e -91 sono out of range
            'lon': [9.0, 0.0, 0.0, 200.0]      # 200 è out of range
        })
        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')

        result = normalize_coordinates(mock_context, config, df.copy())

        assert len(result) == 1
        assert result['lat'].iloc[0] == 45.0

    def test_normalize_coordinates_invalid_types(self, mock_context):
        """Test conversion of strings to numeric and handling of NaNs."""
        df = pd.DataFrame({
            'lat': ["45.5", "invalid", None],
            'lon': ["9.5", "10.0", "11.0"]
        })
        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')

        result = normalize_coordinates(mock_context, config, df.copy())

        assert len(result) == 1
        assert isinstance(result['lat'].iloc[0], float)

    def test_normalize_coordinates_empty_df(self, mock_context, empty_dataframe):
        """Test with an empty DataFrame."""

        df = pd.DataFrame(columns=['lat', 'lon'])
        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')

        result = normalize_coordinates(mock_context, config, df)

        assert len(result) == 0
        assert result.empty

    def test_normalize_coordinates_default_config(self, mock_context):
        """Test that normalize_coordinates uses default 'lat'/'lon' columns when no config is provided."""
        df = pd.DataFrame({
            'lat': [45.123456, 46.0],
            'lon': [9.123456, 10.0]
        })
        config = CoordinatesNormalizationConfiguration()

        result = normalize_coordinates(mock_context, config, df.copy())

        assert result['lat'].iloc[0] == 45.1235
        assert result['lon'].iloc[0] == 9.1235
        assert len(result) == 2

    def test_normalize_coordinates_null_config_values(self, mock_context):
        """Test that null lat/lon column names fall back to defaults ('lat'/'lon')."""
        df = pd.DataFrame({
            'lat': [45.123456, 46.0],
            'lon': [9.123456, 10.0]
        })
        config = CoordinatesNormalizationConfiguration(latColumn=None, lonColumn=None)

        assert config.latColumn == "lat"
        assert config.lonColumn == "lon"

        result = normalize_coordinates(mock_context, config, df.copy())

        assert result['lat'].iloc[0] == 45.1235
        assert result['lon'].iloc[0] == 9.1235
        assert len(result) == 2

    def test_normalize_coordinates_dms_degree_symbol(self, mock_context):
        """Test DMS parsing with degree/minute/second symbols like 40°26'46\"N."""
        df = pd.DataFrame({
            'lat': ["40°26'46\"N", "51°30'26\"N"],
            'lon': ["79°58'56\"W", "0°7'39\"W"]
        })
        config = CoordinatesNormalizationConfiguration(
            latColumn='lat', lonColumn='lon'
        )
        result = normalize_coordinates(mock_context, config, df.copy())

        assert len(result) == 2
        # 40°26'46"N ≈ 40.4461
        assert abs(result['lat'].iloc[0] - 40.4461) < 0.001
        # 79°58'56"W ≈ -79.9822
        assert abs(result['lon'].iloc[0] - (-79.9822)) < 0.001

    def test_normalize_coordinates_dms_spaced_format(self, mock_context):
        """Test DMS parsing with space-separated format like '40 26 46 N'."""
        df = pd.DataFrame({
            'lat': ["40 26 46 N"],
            'lon': ["79 58 56 W"]
        })
        config = CoordinatesNormalizationConfiguration(
            latColumn='lat', lonColumn='lon'
        )
        result = normalize_coordinates(mock_context, config, df.copy())

        assert len(result) == 1
        assert abs(result['lat'].iloc[0] - 40.4461) < 0.001
        assert abs(result['lon'].iloc[0] - (-79.9822)) < 0.001

    def test_normalize_coordinates_dms_already_decimal(self, mock_context):
        """Test that string columns with decimal values are auto-parsed correctly."""
        df = pd.DataFrame({
            'lat': ["45.5", "46.0"],
            'lon': ["9.5", "10.0"]
        })
        config = CoordinatesNormalizationConfiguration(
            latColumn='lat', lonColumn='lon'
        )
        result = normalize_coordinates(mock_context, config, df.copy())

        assert len(result) == 2
        assert result['lat'].iloc[0] == 45.5
        assert result['lon'].iloc[0] == 9.5

    def test_normalize_coordinates_dms_mixed_valid_invalid(self, mock_context):
        """Test auto-detection with a mix of valid DMS, valid decimal, and unparseable values."""
        df = pd.DataFrame({
            'lat': ["40°26'46\"N", "not_a_coord", "51.5"],
            'lon': ["79°58'56\"W", "10.0", "0.1"]
        })
        config = CoordinatesNormalizationConfiguration(
            latColumn='lat', lonColumn='lon'
        )
        result = normalize_coordinates(mock_context, config, df.copy())

        # Row with "not_a_coord" for lat should be dropped (NaN lat)
        assert len(result) == 2

    def test_normalize_coordinates_dms_out_of_range(self, mock_context):
        """Test that DMS-parsed coordinates outside valid range are filtered out."""
        df = pd.DataFrame({
            'lat': ["91°0'0\"N", "45°0'0\"N"],
            'lon': ["0°0'0\"E", "9°0'0\"E"]
        })
        config = CoordinatesNormalizationConfiguration(
            latColumn='lat', lonColumn='lon'
        )
        result = normalize_coordinates(mock_context, config, df.copy())

        # First row has lat=91° which is out of [-90, 90]
        assert len(result) == 1
        assert abs(result['lat'].iloc[0] - 45.0) < 0.001

    def test_normalize_coordinates_dms_south_and_east(self, mock_context):
        """Test DMS parsing with south latitude and east longitude."""
        df = pd.DataFrame({
            'lat': ["33°51'54\"S"],
            'lon': ["151°12'36\"E"]
        })
        config = CoordinatesNormalizationConfiguration(
            latColumn='lat', lonColumn='lon'
        )
        result = normalize_coordinates(mock_context, config, df.copy())

        assert len(result) == 1
        # 33°51'54"S ≈ -33.865
        assert result['lat'].iloc[0] < 0
        assert abs(result['lat'].iloc[0] - (-33.865)) < 0.001
        # 151°12'36"E ≈ 151.21
        assert result['lon'].iloc[0] > 0
        assert abs(result['lon'].iloc[0] - 151.21) < 0.01

    def test_normalize_coordinates_autodetect_numeric_vs_dms(self, mock_context):
        """Test that numeric columns are coerced directly while string columns are parsed as DMS."""
        # Numeric columns — should go through pd.to_numeric path
        df_numeric = pd.DataFrame({
            'lat': [45.123456, 46.0],
            'lon': [9.123456, 10.0]
        })
        config = CoordinatesNormalizationConfiguration(latColumn='lat', lonColumn='lon')
        result_numeric = normalize_coordinates(mock_context, config, df_numeric.copy())

        assert result_numeric['lat'].iloc[0] == 45.1235
        assert len(result_numeric) == 2

        # String DMS columns — should go through _parse_dms_to_decimal path
        df_dms = pd.DataFrame({
            'lat': ["40°26'46\"N"],
            'lon': ["79°58'56\"W"]
        })
        result_dms = normalize_coordinates(mock_context, config, df_dms.copy())

        assert len(result_dms) == 1
        assert abs(result_dms['lat'].iloc[0] - 40.4461) < 0.001

class TestAddGlobalAggregations:
    """Tests for the add_global_aggregations operation."""

    def test_add_global_aggregations_success(self, mock_context):
        """Test a successful group by and aggregation."""
        df = pd.DataFrame({
            'category': ['A', 'A', 'B'],
            'value': [10, 20, 100],
            'ignored_str': ['x', 'y', 'z']
        })

        config = AggregationConfiguration(
            columns=['category'],
            operation='sum'
        )

        result = add_global_aggregations(mock_context, config, df.copy())

        assert len(result) == 2
        assert result.loc[result['category'] == 'A', 'value'].values[0] == 30
        assert result.loc[result['category'] == 'B', 'value'].values[0] == 100
        assert 'ignored_str' not in result.columns
        mock_context.log.info.assert_called()

    def test_add_global_aggregations_missing_column(self, mock_context):
        """Test skipping a column that does not exist in the dataframe."""
        df = pd.DataFrame({'value': [1, 2, 3]})
        config = AggregationConfiguration(
            columns=['missing_col'],
            operation='count'
        )

        result = add_global_aggregations(mock_context, config, df.copy())

        mock_context.log.warning.assert_any_call("Column 'missing_col' not found, skipping aggregation.")
        assert len(result) == 1

    def test_add_global_aggregations_unsupported_op(self, mock_context):
        """Test the warning when an unsupported operation is provided."""
        df = pd.DataFrame({'category': ['A'], 'value': [1]})

        config = AggregationConfiguration(
            columns=['category'],
            operation='unsupported'
        )

        with pytest.raises(Exception):
            add_global_aggregations(mock_context, config, df.copy())

        mock_context.log.warning.assert_any_call("Unsupported aggregation 'unsupported'")

    def test_add_global_aggregations_only_numeric_kept(self, mock_context):
        """Verify that non-numeric and non-grouping columns are dropped."""
        df = pd.DataFrame({
            'group': ['A', 'A'],
            'num': [1, 2],
            'text': ['hello', 'world']
        })
        config = AggregationConfiguration(columns=['group'], operation='mean')

        result = add_global_aggregations(mock_context, config, df.copy())

        assert 'text' not in result.columns
        assert 'num' in result.columns
        assert 'group' in result.columns