Neural Architecture Search

Search Space Definition

from typing import List, Dict, Tuple
from dataclasses import dataclass
import random

@dataclass
class Operation:
    name: str
    params: Dict
    flops: float

class SearchSpace:
    def __init__(self):
        self.operations = {
            "conv3x3": Operation("conv3x3", {"channels": [16, 32, 64, 128]}, 0.1),
            "conv5x5": Operation("conv5x5", {"channels": [16, 32, 64, 128]}, 0.25),
            "pool3x3": Operation("pool3x3", {"type": ["max", "avg"]}, 0.01),
            "skip": Operation("skip", {}, 0.0),
            "fc": Operation("fc", {"units": [64, 128, 256]}, 0.5)
        }
        
        self.connections = ["sequential", "skip", "concat", "add"]
        self.max_depth = 8
        self.min_depth = 3
    
    def random_architecture(self) -> Dict:
        depth = random.randint(self.min_depth, self.max_depth)
        
        layers = []
        for i in range(depth):
            op_name = random.choice(list(self.operations.keys()))
            op = self.operations[op_name]
            
            layer = {
                "operation": op_name,
                "params": {k: random.choice(v) for k, v in op.params.items()},
                "connection": random.choice(self.connections) if i > 0 else "sequential"
            }
            layers.append(layer)
        
        return {"layers": layers, "depth": depth}

search_space = SearchSpace()
random_arch = search_space.random_architecture()

DARTS (Differentiable NAS)

import torch
import torch.nn as nn
import torch.nn.functional as F

class MixedOperation(nn.Module):
    def __init__(self, ops_dict, in_channels, out_channels):
        super().__init__()
        self.ops = nn.ModuleList([
            self._build_op(name, params, in_channels, out_channels)
            for name, params in ops_dict.items()
        ])
        self.alphas = nn.Parameter(torch.randn(len(ops_dict)))
    
    def _build_op(self, name, params, in_ch, out_ch):
        if name == "conv3x3":
            return nn.Conv2d(in_ch, out_ch, 3, padding=1)
        elif name == "conv5x5":
            return nn.Conv2d(in_ch, out_ch, 5, padding=2)
        elif name == "pool3x3":
            return nn.MaxPool2d(3, stride=1, padding=1)
        elif name == "skip":
            return nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1)
        else:
            return nn.Linear(in_ch, out_ch)
    
    def forward(self, x):
        weights = F.softmax(self.alphas, dim=0)
        
        outputs = [w * op(x) for w, op in zip(weights, self.ops)]
        
        return sum(outputs)

class DARTSCell(nn.Module):
    def __init__(self, in_channels, out_channels, n_nodes=4):
        super().__init__()
        self.nodes = nn.ModuleList()
        
        ops_dict = {
            "conv3x3": {},
            "conv5x5": {},
            "pool3x3": {},
            "skip": {},
            "fc": {}
        }
        
        for i in range(n_nodes):
            node_ops = nn.ModuleList([
                MixedOperation(ops_dict, in_channels, out_channels)
                for _ in range(i + 1)
            ])
            self.nodes.append(node_ops)
    
    def forward(self, x):
        states = [x]
        
        for node_ops in self.nodes:
            node_outputs = [op(s) for op, s in zip(node_ops, states)]
            states.append(sum(node_outputs))
        
        return torch.cat(states[1:], dim=1)

class DARTSModel(nn.Module):
    def __init__(self, n_classes=10):
        super().__init__()
        self.stem = nn.Conv2d(3, 16, 3, padding=1)
        
        self.cells = nn.ModuleList([
            DARTSCell(16, 16) for _ in range(8)
        ])
        
        self.classifier = nn.Linear(16 * 8, n_classes)
    
    def forward(self, x):
        x = self.stem(x)
        
        for cell in self.cells:
            x = cell(x)
        
        x = F.adaptive_avg_pool2d(x, 1).flatten(1)
        return self.classifier(x)

darts = DARTSModel(n_classes=10)

Evolutionary Search

import numpy as np
from typing import List

class EvolutionaryNAS:
    def __init__(self, search_space, population_size=20):
        self.search_space = search_space
        self.population_size = population_size
        self.population = []
        self.fitness_scores = []
    
    def initialize_population(self):
        self.population = [
            self.search_space.random_architecture()
            for _ in range(self.population_size)
        ]
    
    def evaluate(self, architecture, train_fn, eval_fn):
        model = train_fn(architecture)
        accuracy = eval_fn(model)
        params = sum(p.numel() for p in model.parameters())
        
        fitness = accuracy - 0.001 * params / 1e6
        return fitness
    
    def select_parents(self, tournament_size=3):
        parents = []
        for _ in range(2):
            candidates = random.sample(
                list(zip(self.population, self.fitness_scores)),
                tournament_size
            )
            parent = max(candidates, key=lambda x: x[1])[0]
            parents.append(parent)
        return parents
    
    def crossover(self, parent1, parent2):
        child = {
            "layers": [],
            "depth": parent1["depth"]
        }
        
        crossover_point = parent1["depth"] // 2
        
        child["layers"] = parent1["layers"][:crossover_point] + \
                         parent2["layers"][crossover_point:]
        
        return child
    
    def mutate(self, architecture, mutation_rate=0.1):
        mutated = architecture.copy()
        
        for layer in mutated["layers"]:
            if random.random() < mutation_rate:
                layer["operation"] = random.choice(
                    list(self.search_space.operations.keys())
                )
        
        return mutated

evolutionary = EvolutionaryNAS(search_space)
evolutionary.initialize_population()

Weight Sharing NAS

class OneShotNAS:
    def __init__(self, search_space):
        self.search_space = search_space
        self.shared_weights = {}
    
    def build_supernet(self):
        for op_name, op in self.search_space.operations.items():
            self.shared_weights[op_name] = self._init_weights(op)
    
    def _init_weights(self, op):
        return nn.Parameter(torch.randn(128, 128) * 0.01)
    
    def sample_path(self):
        path = []
        for _ in range(self.search_space.max_depth):
            op_name = random.choice(list(self.search_space.operations.keys()))
            path.append(op_name)
        return path
    
    def forward_with_path(self, x, path):
        for op_name in path:
            weights = self.shared_weights[op_name]
            x = F.linear(x, weights)
        return x

one_shot = OneShotNAS(search_space)
one_shot.build_supernet()
sampled_path = one_shot.sample_path()

Best Practices

Start with constrained search spaces
Use weight sharing for efficiency
Apply early stopping for unpromising architectures
Balance accuracy and efficiency objectives
Use warm-starting from previous searches
Validate discovered architectures thoroughly

Neural Architecture Search

Neural Architecture Search

Search Space Definition

DARTS (Differentiable NAS)

Evolutionary Search

Weight Sharing NAS

Best Practices

Premium Content

Need Expert Generative AI Help?