from evidently.model_profile import Profile
from evidently.pipeline.column_mapping import ColumnMapping
from zenml.integrations.evidently.data_validators import EvidentlyDataValidator
from zenml.steps import Output, step
def data_drift_detection(
reference_dataset: pd.DataFrame,
comparison_dataset: pd.DataFrame,
profile=Profile, dashboard=str
"""Custom data drift detection step with Evidently
reference_dataset: a Pandas DataFrame
comparison_dataset: a Pandas DataFrame of new data you wish to
compare against the reference data
profile: Evidently Profile generated for the data drift
dashboard: HTML report extracted from an Evidently Dashboard
generated for the data drift
# validation pre-processing (e.g. dataset preparation) can take place here
data_validator = EvidentlyDataValidator.get_active_data_validator()
profile, dashboard = data_validator.data_profiling(
dataset=reference_dataset,
comparison_dataset=comparison_dataset,
"categoricaltargetdrift",
column_mapping=ColumnMapping(
prediction="class_prediction"
# validation post-processing (e.g. interpret results, take actions) can happen here
return [profile, dashboard.html()]