Cloud ML Platforms

Managed ML platforms handle infrastructure, scaling, and tooling so you can focus on modeling. Compare the three major providers and learn cost optimization strategies.

Platform Comparison

Feature	AWS SageMaker	GCP Vertex AI	Azure ML
Training	Managed notebooks, Processing jobs	Pipelines, Training jobs	Compute clusters, Pipelines
Deployment	Endpoints, Serverless	Endpoints, Predictions	Managed endpoints
Feature Store	SageMaker Feature Store	Vertex AI Feature Store	Managed Feature Store
MLOps	Pipelines, Model Registry	Pipelines, Model Registry	Pipelines, Model Registry
AutoML	SageMaker Autopilot	Vertex AI AutoML	Azure AutoML
GPU	Extensive selection	TPUs + GPUs	Limited GPU options

AWS SageMaker

import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn import SKLearn
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel

# Initialize session
sess = sagemaker.Session()
role = get_execution_role()

# Train a model
sklearn_estimator = SKLearn(
    entry_point="train.py",
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    framework_version="1.2-1",
    py_version="py3",
    hyperparameters={
        "n-estimators": 100,
        "max-depth": 5
    },
    output_path=f"s3://{sess.default_bucket()}/models/"
)

sklearn_estimator.fit({"train": "s3://bucket/train/", "test": "s3://bucket/test/"})

# Deploy to endpoint
predictor = sklearn_estimator.deploy(
    initial_instance_count=1,
    instance_type="ml.t2.medium",
    endpoint_name="my-model-endpoint"
)

# Serverless inference
from sagemaker.serverless import ServerlessInferenceConfig

serverless_config = ServerlessInferenceConfig(
    memory_size_in_mb=2048,
    max_concurrency=10
)

predictor = sklearn_estimator.deploy(
    serverless_inference_config=serverless_config
)

# Batch transform
transformer = sklearn_estimator.transformer(
    instance_count=1,
    instance_type="ml.m5.xlarge",
    output_path=f"s3://{sess.default_bucket()}/output/"
)
transformer.transform(data="s3://bucket/test/")
transformer.wait()

GCP Vertex AI

from google.cloud import aiplatform
from google.cloud import aiplatform_v1
import sklearn

# Initialize
aiplatform.init(
    project="my-project",
    location="us-central1",
    staging_bucket="gs://my-bucket"
)

# Custom training job
job = aiplatform.CustomTrainingJob(
    display_name="sklearn-training",
    script_path="train.py",
    container_uri="us-docker.pkg.dev/vertex-ai/training/scikit-learn-gpu.1-0:latest",
    requirements=["pandas==1.5.0"],
    model_serving_container_image_uri=aiplatform.prediction.ServingContainer(
        image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-gpu.1-0:latest"
    )
)

model = job.run(
    replica_count=1,
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1
)

# Deploy endpoint
endpoint = model.deploy(
    deployed_model_display_name="sklearn-endpoint",
    machine_type="n1-standard-4",
    min_replica_count=1,
    max_replica_count=5
)

# Online prediction
response = endpoint.predict(instances=[[1.0, 2.0, 3.0, 4.0]])

# Batch prediction
batch_prediction_job = model.batch_predict(
    job_display_name="sklearn-batch",
    gcs_source_uri="gs://bucket/test.jsonl",
    gcs_destination_output_uri_prefix="gs://bucket/output/"
)

Azure ML

from azureml.core import Workspace, Experiment, Environment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep

# Initialize workspace
ws = Workspace.from_config()

# Create compute cluster
compute_config = AmlCompute.provisioning_configuration(
    vm_size="STANDARD_DS3_V2",
    min_nodes=0,
    max_nodes=4,
    idle_seconds_before_scaledown=1800
)
compute_target = ComputeTarget.create(ws, "cpu-cluster", compute_config)
compute_target.wait_for_completion()

# Define environment
env = Environment.from_conda_specification(
    name="sklearn-env",
    file_path="conda_env.yml"
)

# Create pipeline
training_step = PythonScriptStep(
    name="train",
    script_name="train.py",
    arguments=["--n-estimators", 100, "--max-depth", 5],
    compute_target=compute_target,
    inputs=[PipelineData("training_data")],
    runconfig=env.get_run_config()
)

pipeline = Pipeline(workspace=ws, steps=[training_step])
experiment = Experiment(ws, "sklearn-experiment")
run = experiment.submit(pipeline)

# Deploy model
from azureml.core.model import Model
model = Model(ws, "sklearn-model")

from azureml.core.webservice import AciWebservice, Webservice
aci_config = AciWebservice.deploy_configuration(
    cpu_cores=1,
    memory_gb=1,
    auth_enabled=True
)

service = Model.deploy(
    workspace=ws,
    name="sklearn-service",
    models=[model],
    inference_config=inference_config,
    deployment_config=aci_config
)

Cost Optimization

# Spot/Preemptible instances for training
# AWS Spot
sklearn_estimator = SKLearn(
    ...,
    use_spot_instances=True,
    max_run=3600,
    max_wait=7200
)

# GCP Preemptible
job.run(
    ...
    replica_count=1,
    enable_web_access=False,
    sync=True
)

# Right-sizing recommendations
def estimate_compute_requirements(dataset_size_gb, model_type):
    """Estimate appropriate instance type"""
    if model_type == "linear":
        return "ml.t2.medium"
    elif model_type == "tree_ensemble":
        return "ml.m5.xlarge"
    elif model_type == "deep_learning":
        if dataset_size_gb > 100:
            return "ml.p3.2xlarge"  # GPU
        return "ml.m5.2xlarge"
    return "ml.m5.xlarge"

# Auto-scaling configuration
from sagemaker.autoscaling import AutoScalingPolicy

scaling_policy = AutoScalingPolicy(
    min_capacity=1,
    max_capacity=10,
    target_value=70.0,  # Target 70% utilization
    scale_in_cooldown=300,
    scale_out_cooldown=60
)

Best Practices

Start with managed notebooks for exploration, then move to training jobs
Use spot/preemptible instances for non-critical training (up to 70% savings)
Implement auto-scaling based on prediction latency or queue depth
Monitor costs with provider-specific cost tools
Use multi-model endpoints for cost-efficient serving of many small models