AI & Agents
•9 min read
Guardrails
Protect AI agents with input validation, output checks, and tool access control.
Security Guardrails
For advanced security – enhanced PII detection, prompt injection prevention, audit trails, and GDPR/CCPA compliance – see Security & Compliance.
Built-in Guardrails
Directive ships with guardrails you can drop into any orchestrator. No external dependencies needed.
PII Detection
Detect and optionally redact personal information before it reaches the agent:
import {
createAgentOrchestrator,
createPIIGuardrail,
} from '@directive-run/ai';
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
input: [
// Block any input that contains personal information
createPIIGuardrail({}),
// Or scrub PII in-place and allow the request to continue
createPIIGuardrail({
redact: true,
redactReplacement: '[REDACTED]',
patterns: [
/\b\d{3}-\d{2}-\d{4}\b/, // SSN
/\b\d{16}\b/, // Credit card
/\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i, // Email
],
}),
],
},
});
Content Moderation
Block harmful content using your moderation API:
import { createModerationGuardrail } from '@directive-run/ai';
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
// Check user input before it reaches the agent
input: [
createModerationGuardrail({
checkFn: async (text) => {
const result = await openai.moderations.create({ input: text });
return result.results[0].flagged;
},
message: 'Content flagged by moderation',
}),
],
// Check agent output before it reaches the user
output: [
createModerationGuardrail({
checkFn: async (text) => {
const result = await openai.moderations.create({ input: text });
return result.results[0].flagged;
},
}),
],
},
});
Tool Access Control
Allow or deny specific tools:
import { createToolGuardrail } from '@directive-run/ai';
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
toolCall: [
// Allowlist – only these tools are permitted
createToolGuardrail({
allowlist: ['search', 'calculator', 'weather'],
}),
// Denylist – block dangerous tools by name
createToolGuardrail({
denylist: ['shell', 'filesystem', 'eval'],
caseSensitive: false, // Match regardless of casing
}),
],
},
});
Output Type Validation
Ensure agent output matches an expected type:
import { createOutputTypeGuardrail } from '@directive-run/ai';
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
output: [
// Require a non-empty string response
createOutputTypeGuardrail({ type: 'string', minStringLength: 1 }),
// Require an object with specific keys present
createOutputTypeGuardrail({
type: 'object',
requiredFields: ['answer', 'sources'],
}),
// Require an array within a size range
createOutputTypeGuardrail({
type: 'array',
minLength: 1,
maxLength: 100,
}),
],
},
});
Output Schema Validation
For complex output validation, use createOutputSchemaGuardrail with a custom validator – or plug in Zod:
import { createOutputSchemaGuardrail } from '@directive-run/ai';
// Validate output with a custom function
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
output: [
createOutputSchemaGuardrail({
validate: (output) => {
if (typeof output !== 'object' || output === null) {
return { valid: false, errors: ['Output must be an object'] };
}
if (!('answer' in output)) {
return { valid: false, errors: ['Missing required field: answer'] };
}
return { valid: true };
},
}),
],
},
});
// Or plug in Zod for schema-level validation
import { z } from 'zod';
const OutputSchema = z.object({
answer: z.string(),
confidence: z.number().min(0).max(1),
sources: z.array(z.string()),
});
const zodOrchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
output: [
createOutputSchemaGuardrail({
// Delegate validation to Zod's safeParse
validate: (output) => {
const result = OutputSchema.safeParse(output);
if (result.success) {
return { valid: true };
}
return {
valid: false,
errors: result.error.errors.map((e) => e.message),
};
},
}),
],
},
});
Output Length Limit
Limit output by character count or estimated token count:
import { createLengthGuardrail } from '@directive-run/ai';
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
output: [
// Cap output by raw character count
createLengthGuardrail({ maxCharacters: 5000 }),
// Cap output by estimated token count (default: chars / 4)
createLengthGuardrail({ maxTokens: 1000 }),
// Provide your own token estimator
createLengthGuardrail({
maxTokens: 1000,
estimateTokens: (text) => text.split(' ').length,
}),
],
},
});
Content Filter
Block output matching specific keywords or patterns:
import { createContentFilterGuardrail } from '@directive-run/ai';
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
output: [
// Block output containing sensitive keywords or patterns
createContentFilterGuardrail({
blockedPatterns: [
'internal-only', // Plain string (auto-escaped for regex safety)
/\bpassword\b/i, // RegExp for exact word match
/api[_-]key/i, // RegExp with alternation
],
caseSensitive: false, // String patterns match case-insensitively
}),
],
},
});
String patterns are automatically regex-escaped, so special characters like . match literally.
Rate Limiting
Limit request frequency based on token usage and request count:
import { createRateLimitGuardrail } from '@directive-run/ai';
// Enforce both token-based and request-based rate limits
const rateLimiter = createRateLimitGuardrail({
maxTokensPerMinute: 10000,
maxRequestsPerMinute: 60,
});
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
input: [rateLimiter], // Checked before every agent run
},
});
// Clear the rate limiter's sliding window (useful in tests)
rateLimiter.reset();
Custom Guardrails
Write your own guardrail as a function that returns { passed, reason?, transformed? }. The function receives (data, context) – you can omit context if you don't need it:
import { createAgentOrchestrator } from '@directive-run/ai';
import type { GuardrailFn, InputGuardrailData, OutputGuardrailData } from '@directive-run/ai';
// Block inputs that are too long
const maxLengthGuardrail: GuardrailFn<InputGuardrailData> = (data) => {
if (data.input.length > 10000) {
return { passed: false, reason: 'Input exceeds 10,000 characters' };
}
return { passed: true };
};
// Clean up whitespace and pass the transformed input downstream
const normalizeWhitespace: GuardrailFn<InputGuardrailData> = (data) => {
const cleaned = data.input.replace(/\s+/g, ' ').trim();
return { passed: true, transformed: cleaned };
};
// Reject empty agent responses
const noEmptyResponse: GuardrailFn<OutputGuardrailData> = (data) => {
const output = typeof data.output === 'string' ? data.output : JSON.stringify(data.output);
if (!output || output.trim().length === 0) {
return { passed: false, reason: 'Agent returned empty response' };
}
return { passed: true };
};
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
input: [maxLengthGuardrail, normalizeWhitespace], // Run in order
output: [noEmptyResponse],
},
});
Named Guardrails
Give guardrails a name for better error messages, and optionally add retry support:
import type { NamedGuardrail, InputGuardrailData } from '@directive-run/ai';
const piiCheck: NamedGuardrail<InputGuardrailData> = {
name: 'pii-detector', // Shows up in error messages and hooks
fn: async (data, context) => {
const hasPII = await externalPIIService.check(data.input);
return { passed: !hasPII, reason: hasPII ? 'Contains PII' : undefined };
},
critical: true, // Block the run on failure (default: true)
// Retry transient failures with exponential backoff
retry: {
attempts: 3,
backoff: 'exponential',
baseDelayMs: 100,
maxDelayMs: 5000,
},
};
const orchestrator = createAgentOrchestrator({
runner,
autoApproveToolCalls: true,
guardrails: {
input: [piiCheck], // Named guardrails mix freely with plain functions
},
});
Error Handling
When a guardrail fails, a structured GuardrailError is thrown:
import { isGuardrailError } from '@directive-run/ai';
try {
await orchestrator.run(agent, userInput);
} catch (error) {
// Type-narrow to a structured GuardrailError
if (isGuardrailError(error)) {
console.log(error.code); // 'INPUT_GUARDRAIL_FAILED' | 'OUTPUT_GUARDRAIL_FAILED' | 'TOOL_CALL_GUARDRAIL_FAILED'
console.log(error.guardrailName); // Which guardrail fired
console.log(error.guardrailType); // 'input' | 'output' | 'toolCall'
console.log(error.userMessage); // Safe to display in your UI
console.log(error.agentName); // Which agent was running
// Sensitive fields are non-enumerable (hidden from JSON.stringify / console.log)
console.log(error.input); // The raw input that triggered the error
console.log(error.data); // Additional guardrail context
}
}
Streaming Guardrails
Evaluate guardrails on partial output as tokens stream in:
import {
createStreamingRunner,
createLengthStreamingGuardrail,
createPatternStreamingGuardrail,
createToxicityStreamingGuardrail,
combineStreamingGuardrails,
} from '@directive-run/ai';
// Halt the stream if the output grows too long
const lengthGuard = createLengthStreamingGuardrail({
maxTokens: 2000,
warnAt: 1500, // Emit a warning chunk at 75% capacity
});
// Halt the stream when sensitive data patterns appear
const patternGuard = createPatternStreamingGuardrail({
patterns: [
{ regex: /\b(SSN|social security)\b/i, name: 'PII' },
{ regex: /\b\d{3}-\d{2}-\d{4}\b/, name: 'SSN' },
],
});
// Merge both guardrails into a single checker
const combined = combineStreamingGuardrails([lengthGuard, patternGuard]);
// Attach to a standalone streaming runner
const streamRunner = createStreamingRunner(baseRunner, {
streamingGuardrails: [combined],
});
Builder Pattern
Use the fluent builder to compose guardrails:
import {
createOrchestratorBuilder,
createPIIGuardrail,
createToolGuardrail,
createOutputTypeGuardrail,
} from '@directive-run/ai';
const orchestrator = createOrchestratorBuilder()
// Input guardrails run in the order they are added
.withInputGuardrail('pii', createPIIGuardrail({ redact: true }))
.withInputGuardrail('length', (data) => ({
passed: data.input.length <= 10000,
reason: data.input.length > 10000 ? 'Input too long' : undefined,
}))
// Tool call and output guardrails
.withToolCallGuardrail('tools', createToolGuardrail({ denylist: ['shell'] }))
.withOutputGuardrail('type', createOutputTypeGuardrail({ type: 'string' }))
// Finalize with the runner
.build({
runner,
autoApproveToolCalls: true,
});
Framework Integration
Handle guardrail errors in your UI by catching GuardrailError from orchestrator.run() and displaying the userMessage.
React
import { useState, useCallback } from 'react';
import { useAgentOrchestrator, useFact } from '@directive-run/react';
import { isGuardrailError } from '@directive-run/ai';
function GuardedChat() {
const orchestrator = useAgentOrchestrator({ runner, autoApproveToolCalls: true });
const agent = useFact(orchestrator.system, '__agent');
const [error, setError] = useState<string | null>(null);
const send = useCallback(async (input: string) => {
setError(null); // Clear any previous guardrail error
try {
await orchestrator.run(myAgent, input);
} catch (err) {
// Show the user-safe message if a guardrail blocked the request
if (isGuardrailError(err)) {
setError(err.userMessage);
}
}
}, [orchestrator]);
return (
<div>
<p>Status: {agent?.status}</p>
{error && <p className="error">{error}</p>}
</div>
);
}
Vue
<script setup>
import { ref, onUnmounted } from 'vue';
import { createAgentOrchestrator, isGuardrailError } from '@directive-run/ai';
import { useFact } from '@directive-run/vue';
const orchestrator = createAgentOrchestrator({ runner, autoApproveToolCalls: true });
onUnmounted(() => orchestrator.dispose());
const agent = useFact(orchestrator.system, '__agent');
const error = ref<string | null>(null);
async function send(input: string) {
error.value = null; // Clear previous error
try {
await orchestrator.run(myAgent, input);
} catch (err) {
// Surface guardrail errors to the user
if (isGuardrailError(err)) error.value = err.userMessage;
}
}
</script>
<template>
<p>Status: {{ agent?.status }}</p>
<p v-if="error" class="error">{{ error }}</p>
</template>
Svelte
<script>
import { createAgentOrchestrator, isGuardrailError } from '@directive-run/ai';
import { useFact } from '@directive-run/svelte';
import { onDestroy } from 'svelte';
const orchestrator = createAgentOrchestrator({ runner, autoApproveToolCalls: true });
onDestroy(() => orchestrator.dispose());
const agent = useFact(orchestrator.system, '__agent');
let error = null;
async function send(input) {
error = null; // Reset before each attempt
try {
await orchestrator.run(myAgent, input);
} catch (err) {
if (isGuardrailError(err)) error = err.userMessage;
}
}
</script>
<p>Status: {$agent?.status}</p>
{#if error}<p class="error">{error}</p>{/if}
Solid
import { createSignal } from 'solid-js';
import { createAgentOrchestrator, isGuardrailError } from '@directive-run/ai';
import { useFact } from '@directive-run/solid';
import { onCleanup } from 'solid-js';
function GuardedChat() {
const orchestrator = createAgentOrchestrator({ runner, autoApproveToolCalls: true });
onCleanup(() => orchestrator.dispose());
const agent = useFact(orchestrator.system, '__agent');
const [error, setError] = createSignal<string | null>(null);
async function send(input: string) {
setError(null); // Clear previous error signal
try {
await orchestrator.run(myAgent, input);
} catch (err) {
if (isGuardrailError(err)) setError(err.userMessage);
}
}
return (
<div>
<p>Status: {agent()?.status}</p>
{error() && <p class="error">{error()}</p>}
</div>
);
}
Lit
import { LitElement, html } from 'lit';
import { createAgentOrchestrator, isGuardrailError } from '@directive-run/ai';
import { FactController } from '@directive-run/lit';
class GuardedChat extends LitElement {
private orchestrator = createAgentOrchestrator({ runner, autoApproveToolCalls: true });
private agent = new FactController(this, this.orchestrator.system, '__agent');
private error: string | null = null;
disconnectedCallback() {
super.disconnectedCallback();
this.orchestrator.dispose();
}
async send(input: string) {
this.error = null;
try {
await this.orchestrator.run(myAgent, input);
} catch (err) {
// Show the user-safe message and trigger a re-render
if (isGuardrailError(err)) {
this.error = err.userMessage;
this.requestUpdate();
}
}
}
render() {
return html`
<p>Status: ${this.agent.value?.status}</p>
${this.error ? html`<p class="error">${this.error}</p>` : ''}
`;
}
}
Next Steps
- Agent Orchestrator – Full orchestrator API
- Streaming – Real-time response processing
- PII Detection – Privacy compliance

