Code Generation
Code Generation Models
from transformers import AutoModelForCausalLM, AutoTokenizer
class CodeGenerator:
def __init__(self, model_name="codellama/CodeLlama-7b-hf"):
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(model_name)
def complete(self, prefix, max_tokens=100):
inputs = self.tokenizer(prefix, return_tensors="pt")
outputs = self.model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=0.2,
top_p=0.95
)
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
def fill_middle(self, prefix, suffix, max_tokens=100):
prompt = f"<PRE> {prefix} <SUF> {suffix} <MID>"
inputs = self.tokenizer(prompt, return_tensors="pt")
outputs = self.model.generate(
**inputs,
max_new_tokens=max_tokens
)
return self.tokenizer.decode(outputs[0])
Code-Specific Tokenization
# BPE tokenization for code
class CodeTokenizer:
def __init__(self):
self.special_tokens = {
"INDENT": "<INDENT>",
"DEDENT": "<DEDENT>",
"NEWLINE": "<NEWLINE>",
"SPACE": "<SPACE>"
}
def tokenize_with_structure(self, code):
tokens = []
indent_level = 0
for line in code.split('\n'):
current_indent = len(line) - len(line.lstrip())
while indent_level * 4 < current_indent:
tokens.append(self.special_tokens["INDENT"])
indent_level += 1
while indent_level * 4 > current_indent:
tokens.append(self.special_tokens["DEDENT"])
indent_level -= 1
tokens.extend(self.tokenize_line(line.strip()))
return tokens
Popular Code Models
| Model | Size | Languages | Features |
|---|---|---|---|
| Codex | 12B | Multi | GitHub Copilot |
| CodeLlama | 7-34B | Multi | Open source |
| StarCoder | 15B | 86 | BigCode |
| DeepSeek-Coder | 33B | Multi | Fill-in-middle |
Code Review Assistant
class CodeReviewAssistant:
def __init__(self, code_model):
self.model = code_model
def review_code(self, code):
prompt = f"""Review this code for potential issues:
```python
{{code}}
```
Identify:
1. Bugs or errors
2. Security vulnerabilities
3. Performance issues
4. Style improvements
Provide specific suggestions:"""
return self.model.complete(prompt)
def suggest_fix(self, code, error_message):
prompt = f"""Fix this code error:
Code:
```python
{{code}}
```
Error: {{error_message}}
Fixed code:"""
return self.model.complete(prompt)
Summary
Code generation models are transforming software development by automating coding tasks. Understanding their architecture and capabilities is essential for modern developers.
Next: We'll explore text-to-image generation.