Cloud ML Platforms
Managed ML platforms handle infrastructure, scaling, and tooling so you can focus on modeling. Compare the three major providers and learn cost optimization strategies.
Platform Comparison
| Feature | AWS SageMaker | GCP Vertex AI | Azure ML |
|---|---|---|---|
| Training | Managed notebooks, Processing jobs | Pipelines, Training jobs | Compute clusters, Pipelines |
| Deployment | Endpoints, Serverless | Endpoints, Predictions | Managed endpoints |
| Feature Store | SageMaker Feature Store | Vertex AI Feature Store | Managed Feature Store |
| MLOps | Pipelines, Model Registry | Pipelines, Model Registry | Pipelines, Model Registry |
| AutoML | SageMaker Autopilot | Vertex AI AutoML | Azure AutoML |
| GPU | Extensive selection | TPUs + GPUs | Limited GPU options |
AWS SageMaker
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn import SKLearn
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
# Initialize session
sess = sagemaker.Session()
role = get_execution_role()
# Train a model
sklearn_estimator = SKLearn(
entry_point="train.py",
role=role,
instance_count=1,
instance_type="ml.m5.xlarge",
framework_version="1.2-1",
py_version="py3",
hyperparameters={
"n-estimators": 100,
"max-depth": 5
},
output_path=f"s3://{sess.default_bucket()}/models/"
)
sklearn_estimator.fit({"train": "s3://bucket/train/", "test": "s3://bucket/test/"})
# Deploy to endpoint
predictor = sklearn_estimator.deploy(
initial_instance_count=1,
instance_type="ml.t2.medium",
endpoint_name="my-model-endpoint"
)
# Serverless inference
from sagemaker.serverless import ServerlessInferenceConfig
serverless_config = ServerlessInferenceConfig(
memory_size_in_mb=2048,
max_concurrency=10
)
predictor = sklearn_estimator.deploy(
serverless_inference_config=serverless_config
)
# Batch transform
transformer = sklearn_estimator.transformer(
instance_count=1,
instance_type="ml.m5.xlarge",
output_path=f"s3://{sess.default_bucket()}/output/"
)
transformer.transform(data="s3://bucket/test/")
transformer.wait()
GCP Vertex AI
from google.cloud import aiplatform
from google.cloud import aiplatform_v1
import sklearn
# Initialize
aiplatform.init(
project="my-project",
location="us-central1",
staging_bucket="gs://my-bucket"
)
# Custom training job
job = aiplatform.CustomTrainingJob(
display_name="sklearn-training",
script_path="train.py",
container_uri="us-docker.pkg.dev/vertex-ai/training/scikit-learn-gpu.1-0:latest",
requirements=["pandas==1.5.0"],
model_serving_container_image_uri=aiplatform.prediction.ServingContainer(
image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-gpu.1-0:latest"
)
)
model = job.run(
replica_count=1,
machine_type="n1-standard-4",
accelerator_type="NVIDIA_TESLA_T4",
accelerator_count=1
)
# Deploy endpoint
endpoint = model.deploy(
deployed_model_display_name="sklearn-endpoint",
machine_type="n1-standard-4",
min_replica_count=1,
max_replica_count=5
)
# Online prediction
response = endpoint.predict(instances=[[1.0, 2.0, 3.0, 4.0]])
# Batch prediction
batch_prediction_job = model.batch_predict(
job_display_name="sklearn-batch",
gcs_source_uri="gs://bucket/test.jsonl",
gcs_destination_output_uri_prefix="gs://bucket/output/"
)
Azure ML
from azureml.core import Workspace, Experiment, Environment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
# Initialize workspace
ws = Workspace.from_config()
# Create compute cluster
compute_config = AmlCompute.provisioning_configuration(
vm_size="STANDARD_DS3_V2",
min_nodes=0,
max_nodes=4,
idle_seconds_before_scaledown=1800
)
compute_target = ComputeTarget.create(ws, "cpu-cluster", compute_config)
compute_target.wait_for_completion()
# Define environment
env = Environment.from_conda_specification(
name="sklearn-env",
file_path="conda_env.yml"
)
# Create pipeline
training_step = PythonScriptStep(
name="train",
script_name="train.py",
arguments=["--n-estimators", 100, "--max-depth", 5],
compute_target=compute_target,
inputs=[PipelineData("training_data")],
runconfig=env.get_run_config()
)
pipeline = Pipeline(workspace=ws, steps=[training_step])
experiment = Experiment(ws, "sklearn-experiment")
run = experiment.submit(pipeline)
# Deploy model
from azureml.core.model import Model
model = Model(ws, "sklearn-model")
from azureml.core.webservice import AciWebservice, Webservice
aci_config = AciWebservice.deploy_configuration(
cpu_cores=1,
memory_gb=1,
auth_enabled=True
)
service = Model.deploy(
workspace=ws,
name="sklearn-service",
models=[model],
inference_config=inference_config,
deployment_config=aci_config
)
Cost Optimization
# Spot/Preemptible instances for training
# AWS Spot
sklearn_estimator = SKLearn(
...,
use_spot_instances=True,
max_run=3600,
max_wait=7200
)
# GCP Preemptible
job.run(
...
replica_count=1,
enable_web_access=False,
sync=True
)
# Right-sizing recommendations
def estimate_compute_requirements(dataset_size_gb, model_type):
"""Estimate appropriate instance type"""
if model_type == "linear":
return "ml.t2.medium"
elif model_type == "tree_ensemble":
return "ml.m5.xlarge"
elif model_type == "deep_learning":
if dataset_size_gb > 100:
return "ml.p3.2xlarge" # GPU
return "ml.m5.2xlarge"
return "ml.m5.xlarge"
# Auto-scaling configuration
from sagemaker.autoscaling import AutoScalingPolicy
scaling_policy = AutoScalingPolicy(
min_capacity=1,
max_capacity=10,
target_value=70.0, # Target 70% utilization
scale_in_cooldown=300,
scale_out_cooldown=60
)
Best Practices
- Start with managed notebooks for exploration, then move to training jobs
- Use spot/preemptible instances for non-critical training (up to 70% savings)
- Implement auto-scaling based on prediction latency or queue depth
- Monitor costs with provider-specific cost tools
- Use multi-model endpoints for cost-efficient serving of many small models