71 lines
2.1 KiB
Python
71 lines
2.1 KiB
Python
from dagster import Definitions
|
|
from util_services.resources import s3_resource
|
|
from util_services.sensors import (
|
|
notify_success,
|
|
notify_failure,
|
|
notify_canceled
|
|
)
|
|
from util_services.custom_json_logger import simpl_json_logger
|
|
|
|
# Data processing jobs
|
|
from data_processing.jobs import (
|
|
remove_duplicates_job_s3,
|
|
fill_missing_values_job_s3,
|
|
standardize_categorical_values_job_s3,
|
|
correct_typos_job_s3,
|
|
normalize_numeric_min_max_job_s3,
|
|
normalize_datetime_job_s3,
|
|
normalize_coordinates_job_s3,
|
|
add_global_aggregations_job_s3,
|
|
filter_dataset_job_s3,
|
|
quality_job_s3
|
|
)
|
|
|
|
# Dataframe-level anonymisation jobs
|
|
from dataframe_level_anonymisation.jobs import (
|
|
k_anonymity_job_s3,
|
|
l_diversity_job_s3,
|
|
t_closeness_job_s3,
|
|
read_write_semistructured_job_s3,
|
|
)
|
|
|
|
# Field-level pseudo-anonymisation jobs
|
|
from field_level_pseudo_anonymisation.jobs import (
|
|
anonymise_pseudonymise_structured_job_s3,
|
|
depseudonymise_structured_job_s3,
|
|
anonymise_pseudonymise_unstructured_job_s3,
|
|
depseudonymise_unstructured_job_s3,
|
|
)
|
|
|
|
from template_code_location.jobs.jobs import data_processing_job
|
|
|
|
defs = Definitions(
|
|
jobs=[
|
|
data_processing_job,
|
|
# Data processing
|
|
remove_duplicates_job_s3,
|
|
fill_missing_values_job_s3,
|
|
standardize_categorical_values_job_s3,
|
|
correct_typos_job_s3,
|
|
normalize_numeric_min_max_job_s3,
|
|
normalize_datetime_job_s3,
|
|
normalize_coordinates_job_s3,
|
|
add_global_aggregations_job_s3,
|
|
filter_dataset_job_s3,
|
|
quality_job_s3,
|
|
# Dataframe-level anonymisation
|
|
k_anonymity_job_s3,
|
|
l_diversity_job_s3,
|
|
t_closeness_job_s3,
|
|
read_write_semistructured_job_s3,
|
|
# Field-level pseudo-anonymisation
|
|
anonymise_pseudonymise_structured_job_s3,
|
|
depseudonymise_structured_job_s3,
|
|
anonymise_pseudonymise_unstructured_job_s3,
|
|
depseudonymise_unstructured_job_s3,
|
|
],
|
|
sensors=[notify_success, notify_failure, notify_canceled],
|
|
resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},
|
|
loggers={"simpl": simpl_json_logger},
|
|
)
|