Test PII anonymisation

This commit is contained in:
Nina Seidel 2026-03-24 10:15:00 +00:00
parent 773e577dad
commit d657c8375a

View file

@ -21,3 +21,12 @@ def test_cast_types():
df = pd.DataFrame({"count": ["1", "2", "3"]})
result = cast_types(df, {"count": int})
assert result["count"].dtype == int
def test_strip_pii_hashes_values():
import hashlib
df = pd.DataFrame({"email": ["alice@example.com"]})
from src.transform.clean import strip_pii
result = strip_pii(df, ["email"])
expected = hashlib.sha256(b"alice@example.com").hexdigest()
assert result["email"].values[0] == expected