Test PII anonymisation
This commit is contained in:
parent
773e577dad
commit
d657c8375a
1 changed files with 9 additions and 0 deletions
|
|
@ -21,3 +21,12 @@ def test_cast_types():
|
||||||
df = pd.DataFrame({"count": ["1", "2", "3"]})
|
df = pd.DataFrame({"count": ["1", "2", "3"]})
|
||||||
result = cast_types(df, {"count": int})
|
result = cast_types(df, {"count": int})
|
||||||
assert result["count"].dtype == int
|
assert result["count"].dtype == int
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_pii_hashes_values():
|
||||||
|
import hashlib
|
||||||
|
df = pd.DataFrame({"email": ["alice@example.com"]})
|
||||||
|
from src.transform.clean import strip_pii
|
||||||
|
result = strip_pii(df, ["email"])
|
||||||
|
expected = hashlib.sha256(b"alice@example.com").hexdigest()
|
||||||
|
assert result["email"].values[0] == expected
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue