Compare AI model behavior side-by-side. Run identical prompts through two models and diff the outputs — catch regressions before they hit production.
Start free. Scale as your team grows.
Integrate model diffs into your CI/CD pipeline. All API calls are usage-based.
curl -X POST https://api.modeldiff.dev/v1/diff \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"left_model": "gpt-4o",
"right_model": "gpt-4-turbo",
"prompts": [
{"id": "q1", "prompt": "What is quantum entanglement?"},
{"id": "q2", "prompt": "Explain recursion in programming."}
],
"providers": {
"left": "openai",
"right": "anthropic"
}
}'
const response = await fetch('https://api.modeldiff.dev/v1/diff', {
method: 'POST',
headers: {
'Authorization': 'Bearer YOUR_API_KEY',
'Content-Type': 'application/json',
},
body: JSON.stringify({
left_model: 'gpt-4o',
right_model: 'claude-3-5-sonnet',
prompts: [
{ id: 'q1', prompt: 'What is quantum entanglement?' },
{ id: 'q2', prompt: 'Explain recursion in programming.' }
],
providers: { left: 'openai', right: 'anthropic' }
})
});
const results = await response.json();
console.log(`${results.summary.regressions} regressions found`);
name: Model Diff
on: [push]
jobs:
diff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run Model Diff
env:
API_KEY: ${{ secrets.MODELDIFF_API_KEY }}
run: |
curl -X POST https://api.modeldiff.dev/v1/diff \
-H "Authorization: Bearer $API_KEY" \
-H "Content-Type: application/json" \
-d @prompts/test-suite.json | jq '.summary'
import requests
response = requests.post(
'https://api.modeldiff.dev/v1/diff',
headers={
'Authorization': 'Bearer YOUR_API_KEY',
'Content-Type': 'application/json',
},
json={
'left_model': 'gpt-4o',
'right_model': 'claude-3-5-sonnet',
'prompts': [
{'id': 'q1', 'prompt': 'What is quantum entanglement?'}
],
'providers': {'left': 'openai', 'right': 'anthropic'}
}
)
results = response.json()
print(f"Regressions: {results['summary']['regressions']}")