rename field-level ops and jobs

This commit is contained in:
ILay
2026-05-05 17:07:07 +02:00
parent f0cac061b8
commit 2e6e788552
2 changed files with 40 additions and 40 deletions

View File

@@ -3,13 +3,13 @@ from util_services.util_ops import (
preview_dataframe, preview_dataframe,
read_structured_to_df, read_structured_to_df,
write_df_to_local, write_df_to_local,
write_string_to_txt, write_string_to_unstructured,
read_txt_to_string, read_unstructured_to_string,
preview_txt, preview_unstructured,
read_structured_from_s3, read_structured_from_s3,
write_df_to_s3, write_df_to_s3,
read_txt_from_s3, read_unstructured_from_s3,
write_text_to_s3, write_unstructured_to_s3,
) )
from .ops import ( from .ops import (
anonymize_pseudonymize_structured, anonymize_pseudonymize_structured,
@@ -23,7 +23,7 @@ from .unstructured_ops import (
@job(tags={ @job(tags={
"business_operation": "ANONYMISATION_PSEUDONYMISATION" "business_operation": "ANONYMISATION_PSEUDONYMISATION"
}) })
def anonymize_pseudonymize_structured_job(): def anonymise_pseudonymise_structured_job():
df = read_structured_to_df() df = read_structured_to_df()
preview_dataframe(df) preview_dataframe(df)
df_anon, metrics = anonymize_pseudonymize_structured(df) df_anon, metrics = anonymize_pseudonymize_structured(df)
@@ -35,7 +35,7 @@ def anonymize_pseudonymize_structured_job():
"business_operation": "ANONYMISATION_PSEUDONYMISATION", "business_operation": "ANONYMISATION_PSEUDONYMISATION",
"resource_type": "RD_DATA" "resource_type": "RD_DATA"
}) })
def anonymize_pseudonymize_structured_job_s3(): def anonymise_pseudonymise_structured_job_s3():
df = read_structured_from_s3() df = read_structured_from_s3()
preview_dataframe(df) preview_dataframe(df)
df_anon, metrics = anonymize_pseudonymize_structured(df) df_anon, metrics = anonymize_pseudonymize_structured(df)
@@ -46,7 +46,7 @@ def anonymize_pseudonymize_structured_job_s3():
@job(tags={ @job(tags={
"business_operation": "DEPSEUDONYMISATION" "business_operation": "DEPSEUDONYMISATION"
}) })
def depseudonymize_structured_job(): def depseudonymise_structured_job():
df = read_structured_to_df() df = read_structured_to_df()
preview_dataframe(df) preview_dataframe(df)
df_anon, metrics = depseudonymize_structured(df) df_anon, metrics = depseudonymize_structured(df)
@@ -58,7 +58,7 @@ def depseudonymize_structured_job():
"business_operation": "DEPSEUDONYMISATION", "business_operation": "DEPSEUDONYMISATION",
"resource_type": "RD_DATA" "resource_type": "RD_DATA"
}) })
def depseudonymize_structured_job_s3(): def depseudonymise_structured_job_s3():
df = read_structured_from_s3() df = read_structured_from_s3()
preview_dataframe(df) preview_dataframe(df)
df_anon, metrics = depseudonymize_structured(df) df_anon, metrics = depseudonymize_structured(df)
@@ -69,7 +69,7 @@ def depseudonymize_structured_job_s3():
@job(tags={ @job(tags={
"business_operation": "ANONYMISATION_PSEUDONYMISATION" "business_operation": "ANONYMISATION_PSEUDONYMISATION"
}) })
def anonymize_pseudonymize_depseudonymize_structured_job(): def anonymise_pseudonymise_depseudonymise_structured_job():
df = read_structured_to_df() df = read_structured_to_df()
preview_dataframe(df) preview_dataframe(df)
df_pseduo, metrics = anonymize_pseudonymize_structured(df) df_pseduo, metrics = anonymize_pseudonymize_structured(df)
@@ -81,46 +81,46 @@ def anonymize_pseudonymize_depseudonymize_structured_job():
@job(tags={ @job(tags={
"business_operation": "ANONYMISATION_PSEUDONYMISATION" "business_operation": "ANONYMISATION_PSEUDONYMISATION"
}) })
def anonymize_pseudonymize_unstructured_job(): def anonymise_pseudonymise_unstructured_job():
text = read_txt_to_string() text = read_unstructured_to_string()
preview_txt(text) preview_unstructured(text)
text_anon, metrics = anonymize_pseudonymize_unstructured(text) text_anon, metrics = anonymize_pseudonymize_unstructured(text)
preview_txt(text_anon) preview_unstructured(text_anon)
preview_txt(metrics) preview_unstructured(metrics)
write_string_to_txt(text_anon) write_string_to_unstructured(text_anon)
@job(tags={ @job(tags={
"business_operation": "ANONYMISATION_PSEUDONYMISATION", "business_operation": "ANONYMISATION_PSEUDONYMISATION",
"resource_type": "RD_DATA" "resource_type": "RD_DATA"
}) })
def anonymize_pseudonymize_unstructured_job_s3(): def anonymise_pseudonymise_unstructured_job_s3():
text = read_txt_from_s3() text = read_unstructured_from_s3()
preview_txt(text) preview_unstructured(text)
text_anon, metrics = anonymize_pseudonymize_unstructured(text) text_anon, metrics = anonymize_pseudonymize_unstructured(text)
preview_txt(text_anon) preview_unstructured(text_anon)
preview_txt(metrics) preview_unstructured(metrics)
write_text_to_s3(text_anon) write_unstructured_to_s3(text_anon)
@job(tags={ @job(tags={
"business_operation": "DEPSEUDONYMISATION" "business_operation": "DEPSEUDONYMISATION"
}) })
def depseudonymize_unstructured_job(): def depseudonymise_unstructured_job():
text = read_txt_to_string() text = read_unstructured_to_string()
preview_txt(text) preview_unstructured(text)
text_anon, metrics = depseudonymize_unstructured(text) text_anon, metrics = depseudonymize_unstructured(text)
preview_txt(text_anon) preview_unstructured(text_anon)
write_string_to_txt(text_anon) write_string_to_unstructured(text_anon)
@job(tags={ @job(tags={
"business_operation": "DEPSEUDONYMISATION", "business_operation": "DEPSEUDONYMISATION",
"resource_type": "RD_DATA" "resource_type": "RD_DATA"
}) })
def depseudonymize_unstructured_job_s3(): def depseudonymise_unstructured_job_s3():
text = read_txt_from_s3() text = read_unstructured_from_s3()
preview_txt(text) preview_unstructured(text)
text_anon, metrics = depseudonymize_unstructured(text) text_anon, metrics = depseudonymize_unstructured(text)
preview_txt(text_anon) preview_unstructured(text_anon)
write_text_to_s3(text_anon) write_unstructured_to_s3(text_anon)

View File

@@ -30,10 +30,10 @@ from template_code_location.dataframe_level_anonymisation.jobs import (
# Field-level pseudo-anonymisation jobs # Field-level pseudo-anonymisation jobs
from template_code_location.field_level_pseudo_anonymisation.jobs import ( from template_code_location.field_level_pseudo_anonymisation.jobs import (
anonymize_pseudonymize_structured_job_s3, anonymise_pseudonymise_structured_job_s3,
depseudonymize_structured_job_s3, depseudonymise_structured_job_s3,
anonymize_pseudonymize_unstructured_job_s3, anonymise_pseudonymise_unstructured_job_s3,
depseudonymize_unstructured_job_s3, depseudonymise_unstructured_job_s3,
) )
from template_code_location.jobs import data_processing_job from template_code_location.jobs import data_processing_job
@@ -57,10 +57,10 @@ defs = Definitions(
t_closeness_job_s3, t_closeness_job_s3,
read_write_semistructured_job_s3, read_write_semistructured_job_s3,
# Field-level pseudo-anonymisation # Field-level pseudo-anonymisation
anonymize_pseudonymize_structured_job_s3, anonymise_pseudonymise_structured_job_s3,
depseudonymize_structured_job_s3, depseudonymise_structured_job_s3,
anonymize_pseudonymize_unstructured_job_s3, anonymise_pseudonymise_unstructured_job_s3,
depseudonymize_unstructured_job_s3, depseudonymise_unstructured_job_s3,
], ],
sensors=[notify_success, notify_failure, notify_canceled], sensors=[notify_success, notify_failure, notify_canceled],
resources={"s3": s3_resource.configured({"resource_name": "selfS3"})}, resources={"s3": s3_resource.configured({"resource_name": "selfS3"})},