diff --git a/tests/test_clean.py b/tests/test_clean.py new file mode 100644 index 0000000..03c7598 --- /dev/null +++ b/tests/test_clean.py @@ -0,0 +1,23 @@ +import pandas as pd +import pytest +from src.transform.clean import drop_nulls, deduplicate, cast_types + + +def test_drop_nulls_removes_rows(): + df = pd.DataFrame({"id": [1, 2, None], "val": ["a", "b", "c"]}) + result = drop_nulls(df, ["id"]) + assert len(result) == 2 + assert 3 not in result["id"].values + + +def test_deduplicate_keeps_last(): + df = pd.DataFrame({"id": [1, 1, 2], "val": ["old", "new", "only"]}) + result = deduplicate(df, ["id"]) + assert len(result) == 2 + assert result[result["id"] == 1]["val"].values[0] == "new" + + +def test_cast_types(): + df = pd.DataFrame({"count": ["1", "2", "3"]}) + result = cast_types(df, {"count": int}) + assert result["count"].dtype == int