diff --git a/src/template_code_location/field_level_pseudo_anonymisation/jobs.py b/src/template_code_location/field_level_pseudo_anonymisation/jobs.py index 56baf11..0f39cfb 100644 --- a/src/template_code_location/field_level_pseudo_anonymisation/jobs.py +++ b/src/template_code_location/field_level_pseudo_anonymisation/jobs.py @@ -3,13 +3,13 @@ from util_services.util_ops import ( preview_dataframe, read_structured_to_df, write_df_to_local, - write_string_to_txt, - read_txt_to_string, - preview_txt, + write_string_to_unstructured, + read_unstructured_to_string, + preview_unstructured, read_structured_from_s3, write_df_to_s3, - read_txt_from_s3, - write_text_to_s3, + read_unstructured_from_s3, + write_unstructured_to_s3, ) from .ops import ( anonymize_pseudonymize_structured, @@ -23,7 +23,7 @@ from .unstructured_ops import ( @job(tags={ "business_operation": "ANONYMISATION_PSEUDONYMISATION" }) -def anonymize_pseudonymize_structured_job(): +def anonymise_pseudonymise_structured_job(): df = read_structured_to_df() preview_dataframe(df) df_anon, metrics = anonymize_pseudonymize_structured(df) @@ -35,7 +35,7 @@ def anonymize_pseudonymize_structured_job(): "business_operation": "ANONYMISATION_PSEUDONYMISATION", "resource_type": "RD_DATA" }) -def anonymize_pseudonymize_structured_job_s3(): +def anonymise_pseudonymise_structured_job_s3(): df = read_structured_from_s3() preview_dataframe(df) df_anon, metrics = anonymize_pseudonymize_structured(df) @@ -46,7 +46,7 @@ def anonymize_pseudonymize_structured_job_s3(): @job(tags={ "business_operation": "DEPSEUDONYMISATION" }) -def depseudonymize_structured_job(): +def depseudonymise_structured_job(): df = read_structured_to_df() preview_dataframe(df) df_anon, metrics = depseudonymize_structured(df) @@ -58,7 +58,7 @@ def depseudonymize_structured_job(): "business_operation": "DEPSEUDONYMISATION", "resource_type": "RD_DATA" }) -def depseudonymize_structured_job_s3(): +def depseudonymise_structured_job_s3(): df = read_structured_from_s3() preview_dataframe(df) df_anon, metrics = depseudonymize_structured(df) @@ -69,7 +69,7 @@ def depseudonymize_structured_job_s3(): @job(tags={ "business_operation": "ANONYMISATION_PSEUDONYMISATION" }) -def anonymize_pseudonymize_depseudonymize_structured_job(): +def anonymise_pseudonymise_depseudonymise_structured_job(): df = read_structured_to_df() preview_dataframe(df) df_pseduo, metrics = anonymize_pseudonymize_structured(df) @@ -81,46 +81,46 @@ def anonymize_pseudonymize_depseudonymize_structured_job(): @job(tags={ "business_operation": "ANONYMISATION_PSEUDONYMISATION" }) -def anonymize_pseudonymize_unstructured_job(): - text = read_txt_to_string() - preview_txt(text) +def anonymise_pseudonymise_unstructured_job(): + text = read_unstructured_to_string() + preview_unstructured(text) text_anon, metrics = anonymize_pseudonymize_unstructured(text) - preview_txt(text_anon) - preview_txt(metrics) - write_string_to_txt(text_anon) + preview_unstructured(text_anon) + preview_unstructured(metrics) + write_string_to_unstructured(text_anon) @job(tags={ "business_operation": "ANONYMISATION_PSEUDONYMISATION", "resource_type": "RD_DATA" }) -def anonymize_pseudonymize_unstructured_job_s3(): - text = read_txt_from_s3() - preview_txt(text) +def anonymise_pseudonymise_unstructured_job_s3(): + text = read_unstructured_from_s3() + preview_unstructured(text) text_anon, metrics = anonymize_pseudonymize_unstructured(text) - preview_txt(text_anon) - preview_txt(metrics) - write_text_to_s3(text_anon) + preview_unstructured(text_anon) + preview_unstructured(metrics) + write_unstructured_to_s3(text_anon) @job(tags={ "business_operation": "DEPSEUDONYMISATION" }) -def depseudonymize_unstructured_job(): - text = read_txt_to_string() - preview_txt(text) +def depseudonymise_unstructured_job(): + text = read_unstructured_to_string() + preview_unstructured(text) text_anon, metrics = depseudonymize_unstructured(text) - preview_txt(text_anon) - write_string_to_txt(text_anon) + preview_unstructured(text_anon) + write_string_to_unstructured(text_anon) @job(tags={ "business_operation": "DEPSEUDONYMISATION", "resource_type": "RD_DATA" }) -def depseudonymize_unstructured_job_s3(): - text = read_txt_from_s3() - preview_txt(text) +def depseudonymise_unstructured_job_s3(): + text = read_unstructured_from_s3() + preview_unstructured(text) text_anon, metrics = depseudonymize_unstructured(text) - preview_txt(text_anon) - write_text_to_s3(text_anon) + preview_unstructured(text_anon) + write_unstructured_to_s3(text_anon) diff --git a/src/template_code_location/repository.py b/src/template_code_location/repository.py index f825e85..d19d6fd 100644 --- a/src/template_code_location/repository.py +++ b/src/template_code_location/repository.py @@ -30,10 +30,10 @@ from template_code_location.dataframe_level_anonymisation.jobs import ( # Field-level pseudo-anonymisation jobs from template_code_location.field_level_pseudo_anonymisation.jobs import ( - anonymize_pseudonymize_structured_job_s3, - depseudonymize_structured_job_s3, - anonymize_pseudonymize_unstructured_job_s3, - depseudonymize_unstructured_job_s3, + anonymise_pseudonymise_structured_job_s3, + depseudonymise_structured_job_s3, + anonymise_pseudonymise_unstructured_job_s3, + depseudonymise_unstructured_job_s3, ) from template_code_location.jobs import data_processing_job @@ -57,10 +57,10 @@ defs = Definitions( t_closeness_job_s3, read_write_semistructured_job_s3, # Field-level pseudo-anonymisation - anonymize_pseudonymize_structured_job_s3, - depseudonymize_structured_job_s3, - anonymize_pseudonymize_unstructured_job_s3, - depseudonymize_unstructured_job_s3, + anonymise_pseudonymise_structured_job_s3, + depseudonymise_structured_job_s3, + anonymise_pseudonymise_unstructured_job_s3, + depseudonymise_unstructured_job_s3, ], sensors=[notify_success, notify_failure, notify_canceled], resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},