Neural Architecture Search
Search Space Definition
from typing import List, Dict, Tuple
from dataclasses import dataclass
import random
@dataclass
class Operation:
name: str
params: Dict
flops: float
class SearchSpace:
def __init__(self):
self.operations = {
"conv3x3": Operation("conv3x3", {"channels": [16, 32, 64, 128]}, 0.1),
"conv5x5": Operation("conv5x5", {"channels": [16, 32, 64, 128]}, 0.25),
"pool3x3": Operation("pool3x3", {"type": ["max", "avg"]}, 0.01),
"skip": Operation("skip", {}, 0.0),
"fc": Operation("fc", {"units": [64, 128, 256]}, 0.5)
}
self.connections = ["sequential", "skip", "concat", "add"]
self.max_depth = 8
self.min_depth = 3
def random_architecture(self) -> Dict:
depth = random.randint(self.min_depth, self.max_depth)
layers = []
for i in range(depth):
op_name = random.choice(list(self.operations.keys()))
op = self.operations[op_name]
layer = {
"operation": op_name,
"params": {k: random.choice(v) for k, v in op.params.items()},
"connection": random.choice(self.connections) if i > 0 else "sequential"
}
layers.append(layer)
return {"layers": layers, "depth": depth}
search_space = SearchSpace()
random_arch = search_space.random_architecture()
DARTS (Differentiable NAS)
import torch
import torch.nn as nn
import torch.nn.functional as F
class MixedOperation(nn.Module):
def __init__(self, ops_dict, in_channels, out_channels):
super().__init__()
self.ops = nn.ModuleList([
self._build_op(name, params, in_channels, out_channels)
for name, params in ops_dict.items()
])
self.alphas = nn.Parameter(torch.randn(len(ops_dict)))
def _build_op(self, name, params, in_ch, out_ch):
if name == "conv3x3":
return nn.Conv2d(in_ch, out_ch, 3, padding=1)
elif name == "conv5x5":
return nn.Conv2d(in_ch, out_ch, 5, padding=2)
elif name == "pool3x3":
return nn.MaxPool2d(3, stride=1, padding=1)
elif name == "skip":
return nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1)
else:
return nn.Linear(in_ch, out_ch)
def forward(self, x):
weights = F.softmax(self.alphas, dim=0)
outputs = [w * op(x) for w, op in zip(weights, self.ops)]
return sum(outputs)
class DARTSCell(nn.Module):
def __init__(self, in_channels, out_channels, n_nodes=4):
super().__init__()
self.nodes = nn.ModuleList()
ops_dict = {
"conv3x3": {},
"conv5x5": {},
"pool3x3": {},
"skip": {},
"fc": {}
}
for i in range(n_nodes):
node_ops = nn.ModuleList([
MixedOperation(ops_dict, in_channels, out_channels)
for _ in range(i + 1)
])
self.nodes.append(node_ops)
def forward(self, x):
states = [x]
for node_ops in self.nodes:
node_outputs = [op(s) for op, s in zip(node_ops, states)]
states.append(sum(node_outputs))
return torch.cat(states[1:], dim=1)
class DARTSModel(nn.Module):
def __init__(self, n_classes=10):
super().__init__()
self.stem = nn.Conv2d(3, 16, 3, padding=1)
self.cells = nn.ModuleList([
DARTSCell(16, 16) for _ in range(8)
])
self.classifier = nn.Linear(16 * 8, n_classes)
def forward(self, x):
x = self.stem(x)
for cell in self.cells:
x = cell(x)
x = F.adaptive_avg_pool2d(x, 1).flatten(1)
return self.classifier(x)
darts = DARTSModel(n_classes=10)
Evolutionary Search
import numpy as np
from typing import List
class EvolutionaryNAS:
def __init__(self, search_space, population_size=20):
self.search_space = search_space
self.population_size = population_size
self.population = []
self.fitness_scores = []
def initialize_population(self):
self.population = [
self.search_space.random_architecture()
for _ in range(self.population_size)
]
def evaluate(self, architecture, train_fn, eval_fn):
model = train_fn(architecture)
accuracy = eval_fn(model)
params = sum(p.numel() for p in model.parameters())
fitness = accuracy - 0.001 * params / 1e6
return fitness
def select_parents(self, tournament_size=3):
parents = []
for _ in range(2):
candidates = random.sample(
list(zip(self.population, self.fitness_scores)),
tournament_size
)
parent = max(candidates, key=lambda x: x[1])[0]
parents.append(parent)
return parents
def crossover(self, parent1, parent2):
child = {
"layers": [],
"depth": parent1["depth"]
}
crossover_point = parent1["depth"] // 2
child["layers"] = parent1["layers"][:crossover_point] + \
parent2["layers"][crossover_point:]
return child
def mutate(self, architecture, mutation_rate=0.1):
mutated = architecture.copy()
for layer in mutated["layers"]:
if random.random() < mutation_rate:
layer["operation"] = random.choice(
list(self.search_space.operations.keys())
)
return mutated
evolutionary = EvolutionaryNAS(search_space)
evolutionary.initialize_population()
Weight Sharing NAS
class OneShotNAS:
def __init__(self, search_space):
self.search_space = search_space
self.shared_weights = {}
def build_supernet(self):
for op_name, op in self.search_space.operations.items():
self.shared_weights[op_name] = self._init_weights(op)
def _init_weights(self, op):
return nn.Parameter(torch.randn(128, 128) * 0.01)
def sample_path(self):
path = []
for _ in range(self.search_space.max_depth):
op_name = random.choice(list(self.search_space.operations.keys()))
path.append(op_name)
return path
def forward_with_path(self, x, path):
for op_name in path:
weights = self.shared_weights[op_name]
x = F.linear(x, weights)
return x
one_shot = OneShotNAS(search_space)
one_shot.build_supernet()
sampled_path = one_shot.sample_path()
Best Practices
- Start with constrained search spaces
- Use weight sharing for efficiency
- Apply early stopping for unpromising architectures
- Balance accuracy and efficiency objectives
- Use warm-starting from previous searches
- Validate discovered architectures thoroughly