Model deployers are stack components responsible for online model serving. They enable you to deploy machine learning models as managed web services and provide access through API endpoints.
Online serving is the process of hosting and loading machine learning models as part of a managed web service and providing access to the models through an API endpoint like HTTP or REST. Once deployed, you can send inference requests to the model through the web service’s API and receive fast, low-latency responses.
from zenml import pipeline, stepfrom zenml.client import Client@stepdef train_model() -> Model: # Train and return your model model = train(...) return model@stepdef deploy_model(model: Model) -> None: # Deploy using the active stack's model deployer from zenml.integrations.bentoml.steps import bentoml_deployer_step service = bentoml_deployer_step( model=model, model_name="my_classifier", port=3000, ) print(f"Model deployed at: {service.prediction_url}")@pipelinedef deployment_pipeline(): model = train_model() deploy_model(model)
from zenml.client import Clientclient = Client()model_deployer = client.active_stack.model_deployer# List all deployed modelsservices = model_deployer.find_model_server()for service in services: print(f"Model: {service.config.model_name}") print(f"Status: {service.status.state}") print(f"URL: {service.prediction_url}")
# Get a specific deploymentservice = model_deployer.find_model_server( pipeline_name="deployment_pipeline", pipeline_step_name="deploy_model", running=True)[0]if service.is_running: print(f"Service is running at {service.prediction_url}")else: print(f"Service status: {service.status.state}")
from zenml.integrations.bentoml.services import BentoMLDeploymentService@stepdef make_predictions(service: BentoMLDeploymentService) -> list: # Use the service directly in a pipeline step predictions = service.predict({"data": [[1, 2, 3, 4]]}) return predictions
Implement continuous deployment with scheduled pipelines:
from zenml import pipelinefrom zenml.config import Schedule@pipeline( enable_cache=False, schedule=Schedule(cron_expression="0 0 * * 0") # Weekly)def continuous_deployment_pipeline(): # Load latest data data = load_data() # Train model model = train_model(data) # Evaluate model metrics = evaluate_model(model, data) # Deploy if metrics are good deploy_if_metrics_good(model, metrics)
from zenml import step, Model@step(model=Model(name="sentiment_classifier"))def deploy_model(model: Any) -> None: # Deploy with version tracking service = deploy( model=model, model_name="sentiment_classifier", version="1.2.0", ) # ZenML automatically tracks the deployment # as part of the model version
# Check service statusservice = model_deployer.find_model_server(...)[0]print(service.status)print(service.status.last_error)# Get detailed logslogs = service.get_logs()for log in logs: print(log)