Add PII anonymisation step — legal requirement from data protection review

This commit is contained in:
Nina Seidel 2026-03-24 09:30:00 +00:00
parent 15a5606f45
commit 773e577dad

View file

@ -28,3 +28,14 @@ def cast_types(df: pd.DataFrame, schema: dict) -> pd.DataFrame:
if col in df.columns:
df[col] = df[col].astype(dtype)
return df
def strip_pii(df: pd.DataFrame, pii_cols: list[str]) -> pd.DataFrame:
"""Replace PII columns with SHA-256 hashes for anonymisation."""
import hashlib
for col in pii_cols:
if col in df.columns:
df[col] = df[col].astype(str).apply(
lambda v: hashlib.sha256(v.encode()).hexdigest()
)
return df