Add nightly dry-run workflow
This commit is contained in:
parent
0804d26792
commit
057c14fa09
1 changed files with 33 additions and 0 deletions
33
.forgejo/workflows/nightly.yml
Normal file
33
.forgejo/workflows/nightly.yml
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
name: Nightly dry-run
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 2 * * *'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
dry-run:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: pip install -r requirements.txt
|
||||||
|
|
||||||
|
- name: Dry-run pipeline
|
||||||
|
run: |
|
||||||
|
python3 -c "
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, 'src')
|
||||||
|
from transform.clean import drop_nulls, deduplicate
|
||||||
|
import pandas as pd
|
||||||
|
df = pd.DataFrame({'id': [1, 1, 2, None], 'val': ['a', 'a', 'b', 'c']})
|
||||||
|
df = drop_nulls(df, ['id'])
|
||||||
|
df = deduplicate(df, ['id'])
|
||||||
|
assert len(df) == 2, f'Expected 2 rows, got {len(df)}'
|
||||||
|
print('Dry-run OK')
|
||||||
|
"
|
||||||
Loading…
Reference in a new issue