Skip to content

Instantly share code, notes, and snippets.

View eyaltrabelsi's full-sized avatar
:octocat:
:)

Eyal Trabelsi eyaltrabelsi

:octocat:
:)
View GitHub Profile
def test_model_cpu_gpu_same():
model_gpu = get_trained_model(model)
model_cpu = export_to_cpu(model_gpu)
base = get_production_sample(pct=0.1)
base = feature_engineering(input)
gpu_preds = model_gpu.predict(base)
cpu_preds = model_cpu.predict(base)
assert all(abs(gpu_preds - cpu_preds)) < 0.1
Testing Type Confidence Test Churn Cost Variation Pinpointing Issues
Unit Testing on Data High Low Low Narrow High
Syntax Finding High Low Low Very Narrow High
Component Testing for Data Medium Medium Medium Wide Medium
Property Testing for Data Medium Medium High Wide High
Integration Tests in All Pipeline Very High High High Medium Medium
Integration Tests in Data Engineering High Medium High Medium Medium
Artifact Testing in Model Creation Medium High High Narrow Medium
Tests in Model Creation Medium Medium Low Narrow Medium
@given(
df=data_frames(
columns=[
column("Pclass", dtype=int, elements=integers(min_value=1, max_value=3)),
column("Fare", dtype=int, elements=integers(min_value=0, max_value=1000)),
column("SibSp", dtype=int, elements=integers(min_value=0, max_value=10)),
column("Parch", dtype=int, elements=integers(min_value=0, max_value=10)),
column("Embarked", dtype=int, elements=integers(min_value=0, max_value=3))
],
index=indexes(min_size=1, max_size=10)
import pandera as pa
from pandera import Column, DataFrameSchema
def test_all_feature_engineering():
output_schema = DataFrameSchema({
"Pclass": Column(int),
"Fare": Column(int),
"FamilySize": Column(int),
"Embarked": Column(int)
})
@eyaltrabelsi
eyaltrabelsi / .github_workflows_pull-request.yml
Last active June 8, 2024 20:55
example for pull request github file
// .github/workflows/pull-request.yml
Name: <name>
on:
pull_request:
branches:
- main
jobs:
tests:
runs-on: ubuntu-latest
steps:
# .pre-commit-config.yaml
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1 # Update to the latest version as necessary
hooks:
...
- id: check-yaml
name: Check YAML syntax
- id: check-json
name: Check JSON syntax
# .pre-commit-config.yaml
...
repos:
- repo: https://github.com/sqlfluff/sqlfluff
rev: stable_version
hooks:
- id: sqlfluff-lint
# .pre-commit-config.yaml
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
hooks:
- id: check-ast # Testing valid python code
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.910 # Use the latest version available
hooks:
- id: mypy # Testing valid python static types code
def test_get_family_size():
df = get_production_sample(pct=0.1)
func = lambda x: get_family_size(x['SibSp'], x['Parch'])
sizes = df.apply(func, axis=1)
assert all(size >= 1 for size in sizes)
@given(st.integers(), st.integers())
@example(-1, 0)
def test_get_family_size(sib, parch):
fs = get_family_size(sib, parch)
assert fs >= 1