feat(SIMPL-24642): consolidate all code locations into template-code-location

- Rename src/template-code-location to src/template_code_location
- Copy data-processing jobs/ops/config_models
- Copy dataframe-level-anonymisation jobs/ops/utils/config_models
- Copy field-level-pseudo-anonymisation jobs/ops/techniques/config_models
- Update all imports to template_code_location namespace
- Merge all jobs into unified repository.py with sensors/resources/loggers
- Update pyproject.toml with all dependencies
- Update Dockerfile for consolidated image
This commit is contained in:
ILay
2026-04-24 18:38:12 +02:00
parent 0d2802e6f5
commit 4e0b216410
42 changed files with 2071 additions and 14 deletions

View File

@@ -0,0 +1,65 @@
from dagster import Definitions
from util_services.resources import s3_resource
from util_services.sensors import (
notify_success,
notify_failure,
notify_canceled
)
from util_services.custom_json_logger import simpl_json_logger
# Data processing jobs
from template_code_location.data_processing.jobs import (
remove_duplicates_job_s3,
fill_missing_values_job_s3,
standardize_categorical_values_job_s3,
correct_typos_job_s3,
normalize_numeric_min_max_job_s3,
normalize_datetime_job_s3,
normalize_coordinates_job_s3,
add_global_aggregations_job_s3,
filter_dataset_job_s3,
)
# Dataframe-level anonymisation jobs
from template_code_location.dataframe_level_anonymisation.jobs import (
k_anonymity_job_s3,
l_diversity_job_s3,
t_closeness_job_s3,
read_write_semistructured_job_s3,
)
# Field-level pseudo-anonymisation jobs
from template_code_location.field_level_pseudo_anonymisation.jobs import (
anonymize_pseudonymize_structured_job_s3,
depseudonymize_structured_job_s3,
anonymize_pseudonymize_unstructured_job_s3,
depseudonymize_unstructured_job_s3,
)
defs = Definitions(
jobs=[
# Data processing
remove_duplicates_job_s3,
fill_missing_values_job_s3,
standardize_categorical_values_job_s3,
correct_typos_job_s3,
normalize_numeric_min_max_job_s3,
normalize_datetime_job_s3,
normalize_coordinates_job_s3,
add_global_aggregations_job_s3,
filter_dataset_job_s3,
# Dataframe-level anonymisation
k_anonymity_job_s3,
l_diversity_job_s3,
t_closeness_job_s3,
read_write_semistructured_job_s3,
# Field-level pseudo-anonymisation
anonymize_pseudonymize_structured_job_s3,
depseudonymize_structured_job_s3,
anonymize_pseudonymize_unstructured_job_s3,
depseudonymize_unstructured_job_s3,
],
sensors=[notify_success, notify_failure, notify_canceled],
resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},
loggers={"simpl": simpl_json_logger},
)