Add unit tests for transform layer
This commit is contained in:
parent
f9d124ab6f
commit
ee3a056449
1 changed files with 23 additions and 0 deletions
23
tests/test_clean.py
Normal file
23
tests/test_clean.py
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
from src.transform.clean import drop_nulls, deduplicate, cast_types
|
||||||
|
|
||||||
|
|
||||||
|
def test_drop_nulls_removes_rows():
|
||||||
|
df = pd.DataFrame({"id": [1, 2, None], "val": ["a", "b", "c"]})
|
||||||
|
result = drop_nulls(df, ["id"])
|
||||||
|
assert len(result) == 2
|
||||||
|
assert 3 not in result["id"].values
|
||||||
|
|
||||||
|
|
||||||
|
def test_deduplicate_keeps_last():
|
||||||
|
df = pd.DataFrame({"id": [1, 1, 2], "val": ["old", "new", "only"]})
|
||||||
|
result = deduplicate(df, ["id"])
|
||||||
|
assert len(result) == 2
|
||||||
|
assert result[result["id"] == 1]["val"].values[0] == "new"
|
||||||
|
|
||||||
|
|
||||||
|
def test_cast_types():
|
||||||
|
df = pd.DataFrame({"count": ["1", "2", "3"]})
|
||||||
|
result = cast_types(df, {"count": int})
|
||||||
|
assert result["count"].dtype == int
|
||||||
Loading…
Reference in a new issue