Documentation

Guides & Examples

Learn how to configure guardrails, integrate with popular frameworks, and optimize AI safety mechanisms.

Guardrail Configuration Guides

Toxicity Detection Guardrail

Configure multi-model toxicity detection for comprehensive content safety.

from perfecxion_g_rails.guardrails import ToxicityGuardrail

# Basic toxicity guardrail
toxicity_guardrail = ToxicityGuardrail(
    threshold=0.7,
    action="block"
)

# Advanced configuration with multiple models
toxicity_guardrail = ToxicityGuardrail(
    models=[
        {
            "name": "perspective",
            "weight": 0.6,
            "categories": ["TOXICITY", "SEVERE_TOXICITY", "THREAT", "INSULT"]
        },
        {
            "name": "detoxify",
            "weight": 0.4,
            "categories": ["toxic", "obscene", "threat", "insult"]
        }
    ],
    threshold=0.7,
    action="block",
    custom_responses={
        "SEVERE_TOXICITY": "I cannot process this request due to inappropriate content.",
        "THREAT": "Threatening language is not allowed in our system."
    },
    log_violations=True,
    notify_moderators=True
)

# Category-specific thresholds
toxicity_guardrail.set_category_thresholds({
    "hate_speech": 0.5,      # More strict for hate speech
    "profanity": 0.8,        # More lenient for mild profanity
    "threat": 0.6,           # Moderate for threats
    "insult": 0.75           # Standard for insults
})

Best Practice: Use multiple toxicity models with weighted scoring for more accurate detection and reduced false positives.

Bias Detection and Mitigation

Detect and mitigate various forms of bias in AI outputs.

from perfecxion_g_rails.guardrails import BiasGuardrail

# Configure bias detection
bias_guardrail = BiasGuardrail(
    protected_attributes=[
        "gender", "race", "ethnicity", "religion", 
        "age", "disability", "sexual_orientation"
    ],
    bias_types={
        "stereotyping": {
            "enabled": True,
            "threshold": 0.6,
            "examples": ["women are bad at math", "men don't cry"]
        },
        "discrimination": {
            "enabled": True,
            "threshold": 0.5,
            "examples": ["only hire young people", "avoid certain neighborhoods"]
        },
        "representation": {
            "enabled": True,
            "check_diversity": True,
            "min_representation": 0.2
        }
    },
    mitigation_strategies={
        "rebalancing": True,
        "counterfactual_augmentation": True,
        "neutral_rephrasing": True
    },
    action="modify",
    explain_modifications=True
)

# Test for bias in responses
def check_response_bias(response):
    result = bias_guardrail.check(response)
    
    if result.bias_detected:
        print(f"Bias detected: {result.bias_types}")
        print(f"Confidence: {result.confidence}")
        print(f"Suggested modification: {result.modified_text}")
        
        # Apply mitigation
        if result.action == "modify":
            return result.modified_text
        elif result.action == "warn":
            return f"⚠️ Potential bias detected: {response}"
        else:  # block
            return "Response blocked due to bias concerns."
    
    return response

PII Detection and Redaction

Protect sensitive personal information with intelligent detection and redaction.

from perfecxion_g_rails.guardrails import PIIGuardrail

# Configure PII detection
pii_guardrail = PIIGuardrail(
    detect_types={
        "email": {
            "enabled": True,
            "redact_domain": False,  # Keep domain for context
            "replacement": "[EMAIL]"
        },
        "phone": {
            "enabled": True,
            "formats": ["US", "UK", "EU"],
            "replacement": "[PHONE]"
        },
        "ssn": {
            "enabled": True,
            "partial_redact": True,  # Show last 4 digits
            "replacement": "XXX-XX-[LAST4]"
        },
        "credit_card": {
            "enabled": True,
            "replacement": "[CARD]"
        },
        "address": {
            "enabled": True,
            "keep_city": True,  # Keep city for context
            "replacement": "[ADDRESS]"
        },
        "name": {
            "enabled": True,
            "use_initials": True,  # Replace with initials
            "common_names_only": False
        }
    },
    custom_patterns=[
        {
            "name": "employee_id",
            "pattern": r"EMPd{6}",
            "replacement": "[EMPLOYEE_ID]"
        },
        {
            "name": "medical_record",
            "pattern": r"MRN-d{8}",
            "replacement": "[MEDICAL_RECORD]"
        }
    ],
    context_aware=True,  # Consider context when detecting
    confidence_threshold=0.8,
    action="redact"
)

# Advanced PII handling with context preservation
def process_with_pii_protection(text, preserve_context=True):
    result = pii_guardrail.check(text)
    
    if preserve_context:
        # Keep some information for context
        return result.redacted_with_context
    else:
        # Full redaction
        return result.fully_redacted

Hallucination Detection

Detect and prevent AI hallucinations with fact-checking and source verification.

from perfecxion_g_rails.guardrails import HallucinationGuardrail

# Configure hallucination detection
hallucination_guardrail = HallucinationGuardrail(
    fact_sources=[
        {
            "name": "company_kb",
            "type": "vector_db",
            "endpoint": "https://kb.company.com/api",
            "weight": 0.8
        },
        {
            "name": "wikipedia",
            "type": "api",
            "endpoint": "https://api.wikipedia.org",
            "weight": 0.5
        }
    ],
    verification_methods={
        "entity_verification": True,
        "date_verification": True,
        "numerical_verification": True,
        "citation_checking": True
    },
    confidence_threshold=0.8,
    require_sources=True,
    max_unverified_claims=2,
    action="flag"
)

# Check for hallucinations with source attribution
def verify_ai_response(question, answer):
    result = hallucination_guardrail.check(
        input=question,
        output=answer
    )
    
    if result.hallucination_detected:
        print(f"Potential hallucination detected!")
        print(f"Unverified claims: {result.unverified_claims}")
        print(f"Confidence: {result.confidence}")
        
        if result.suggested_correction:
            return {
                "original": answer,
                "corrected": result.suggested_correction,
                "sources": result.verified_sources,
                "warning": "Some claims could not be verified"
            }
    
    return {
        "response": answer,
        "sources": result.verified_sources,
        "confidence": result.confidence
    }

Framework Integration Examples

LangChain Integration

Seamlessly integrate G-Rails with LangChain applications.

from langchain.chains import LLMChain
from langchain.callbacks import BaseCallbackHandler
from perfecxion_g_rails import LangChainGuardrail

# Create guardrail callback handler
class GRailsHandler(BaseCallbackHandler):
    def __init__(self, guardrails):
        self.guardrails = guardrails
    
    def on_llm_start(self, serialized, prompts, **kwargs):
        # Check inputs before sending to LLM
        for prompt in prompts:
            result = self.guardrails.check_input(prompt)
            if result.action == "block":
                raise ValueError(f"Input blocked: {result.reason}")
    
    def on_llm_end(self, response, **kwargs):
        # Check outputs before returning
        for generation in response.generations:
            result = self.guardrails.check_output(
                output=generation[0].text
            )
            if result.action == "block":
                generation[0].text = "[Content blocked by guardrails]"
            elif result.action == "modify":
                generation[0].text = result.modified_output

# Use with LangChain
guardrails = GRailsClient(api_key="your-key").create_guardrail_set(...)
handler = GRailsHandler(guardrails)

chain = LLMChain(
    llm=llm,
    prompt=prompt,
    callbacks=[handler]
)

# Alternative: Use the wrapper
from perfecxion_g_rails.integrations import LangChainWrapper

safe_chain = LangChainWrapper(
    chain=chain,
    guardrails=guardrails,
    stream_safe=True  # Enable for streaming
)

OpenAI API Integration

Add guardrails to OpenAI API calls.

import openai
from perfecxion_g_rails import OpenAIGuardrail

# Wrap OpenAI client
safe_openai = OpenAIGuardrail(
    client=openai,
    guardrails=guardrails,
    log_all_calls=True
)

# Use as normal - guardrails applied automatically
response = safe_openai.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "user", "content": "Tell me about AI safety"}
    ]
)

# Advanced: Custom handling for violations
@safe_openai.on_violation
def handle_violation(violation):
    if violation.severity == "high":
        # Log to security system
        security_logger.error(f"High severity violation: {violation}")
        # Return safe response
        return "I cannot process this request due to safety concerns."
    else:
        # Return modified response
        return violation.safe_alternative

# Streaming with guardrails
stream = safe_openai.chat.completions.create(
    model="gpt-4",
    messages=messages,
    stream=True
)

for chunk in stream:
    # Each chunk is checked before yielding
    print(chunk.choices[0].delta.content, end="")

Hugging Face Transformers Integration

Protect Hugging Face model outputs with guardrails.

from transformers import pipeline
from perfecxion_g_rails import HuggingFaceGuardrail

# Create protected pipeline
generator = pipeline("text-generation", model="gpt2")
safe_generator = HuggingFaceGuardrail(
    pipeline=generator,
    guardrails=guardrails
)

# Generate with protection
result = safe_generator(
    "Write a story about AI",
    max_length=100,
    num_return_sequences=3
)

# Batch processing with guardrails
texts = ["Question 1", "Question 2", "Question 3"]
safe_results = safe_generator(texts, batch_size=2)

# Custom model with guardrails
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("model-name")
tokenizer = AutoTokenizer.from_pretrained("model-name")

@guardrails.protect
def generate_text(prompt, **kwargs):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, **kwargs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

Performance Optimization

1. Intelligent Caching

Reduce latency with smart caching strategies.

# Configure caching for guardrails
from perfecxion_g_rails import CacheConfig

cache_config = CacheConfig(
    enabled=True,
    backend="redis",  # or "memory", "dynamodb"
    ttl=3600,  # 1 hour
    max_size=10000,
    
    # Smart caching rules
    cache_rules={
        "toxicity": {
            "cache_similar": True,  # Cache similar inputs
            "similarity_threshold": 0.95,
            "ttl": 7200  # Longer TTL for toxicity
        },
        "pii": {
            "cache_exact_only": True,  # Only exact matches
            "ttl": 1800  # Shorter TTL for PII
        }
    }
)

grails = GRailsClient(
    api_key="your-key",
    cache_config=cache_config
)

# Preload common violations
grails.preload_cache([
    "common profanity list",
    "known spam patterns",
    "frequent policy violations"
])

2. Batch Processing

Process multiple requests efficiently.

# Batch processing for high throughput
batch_processor = grails.create_batch_processor(
    max_batch_size=100,
    max_wait_ms=50,  # Max time to wait for batch
    parallel_processing=True,
    num_workers=4
)

# Process many inputs efficiently
inputs = ["text1", "text2", ..., "text1000"]
results = batch_processor.check_batch(inputs)

# Async batch processing
async def process_stream(stream):
    async for batch in batch_processor.process_stream(stream):
        for result in batch:
            if result.passed:
                await forward_to_model(result.input)
            else:
                await handle_violation(result)

3. GPU Acceleration

Leverage GPU for faster processing.

# Enable GPU acceleration
grails = GRailsClient(
    api_key="your-key",
    device="cuda:0",  # Use first GPU
    precision="fp16",  # Mixed precision for speed
    batch_size=64  # Larger batches for GPU
)

# Monitor GPU usage
gpu_stats = grails.get_gpu_stats()
print(f"GPU Memory: {gpu_stats.memory_used}/{gpu_stats.memory_total}")
print(f"GPU Utilization: {gpu_stats.utilization}%")

Monitoring and Analytics

Real-Time Monitoring Dashboard

# Set up monitoring dashboard
from perfecxion_g_rails import Dashboard

dashboard = Dashboard(grails)

# Configure metrics
dashboard.add_metrics([
    {
        "name": "violation_rate",
        "type": "percentage",
        "window": "5m",
        "alert_threshold": 0.1
    },
    {
        "name": "latency_p95",
        "type": "histogram",
        "window": "1m",
        "alert_threshold": 100  # ms
    },
    {
        "name": "guardrail_effectiveness",
        "type": "custom",
        "calculation": lambda m: m.blocks / (m.blocks + m.passes)
    }
])

# Export to monitoring systems
dashboard.export_prometheus(port=9090)
dashboard.export_datadog(api_key="dd-key")
dashboard.export_cloudwatch(region="us-east-1")

# Custom alerts
@dashboard.alert(
    condition="violation_rate > 0.15",
    channels=["email", "slack"]
)
def high_violation_alert(metrics):
    return f"High violation rate: {metrics.violation_rate:.2%}"

A/B Testing Guardrails

# A/B test different guardrail configurations
ab_test = grails.create_ab_test(
    name="toxicity_threshold_optimization",
    hypothesis="Lower threshold reduces harmful content without impacting UX",
    variants={
        "control": {
            "toxicity_threshold": 0.7,
            "action": "block"
        },
        "treatment": {
            "toxicity_threshold": 0.6,
            "action": "warn"
        }
    },
    metrics=[
        "user_satisfaction",
        "violation_rate", 
        "false_positive_rate",
        "response_time"
    ],
    sample_size=10000,
    duration_days=7
)

# Monitor test progress
status = ab_test.get_status()
print(f"Progress: {status.samples_collected}/{status.sample_size}")
print(f"Current winner: {status.leading_variant}")
print(f"Statistical significance: {status.p_value}")

# Auto-deploy winner
ab_test.auto_deploy_winner(
    confidence_level=0.95,
    min_improvement=0.05
)

Common Implementation Scenarios

Customer Support Chatbot

Comprehensive guardrails for customer-facing AI.

# Customer support guardrail configuration
support_guardrails = grails.create_guardrail_set(
    name="customer_support",
    guardrails=[
        ToxicityGuardrail(
            threshold=0.3,  # Very strict for customer service
            action="block",
            custom_response="I apologize, but I cannot process that request."
        ),
        PIIGuardrail(
            action="redact",
            store_safely=True,  # Store PII securely for support tickets
            compliance="GDPR"
        ),
        BiasGuardrail(
            action="modify",
            ensure_inclusivity=True
        ),
        CustomGuardrail(
            name="company_policy",
            rules={
                "no_legal_advice": {
                    "pattern": r"legal|lawsuit|sue",
                    "response": "For legal matters, please consult our legal department."
                },
                "no_medical_advice": {
                    "pattern": r"medical|health|diagnosis",
                    "response": "For health concerns, please consult a medical professional."
                }
            }
        )
    ],
    fallback_response="I'm sorry, I cannot help with that request. Please contact human support."
)

Content Generation Platform

Ensure safe and appropriate content generation.

# Content generation guardrails
content_guardrails = grails.create_guardrail_set(
    name="content_generation",
    guardrails=[
        CopyrightGuardrail(
            check_plagiarism=True,
            similarity_threshold=0.8,
            sources=["web", "books", "academic"]
        ),
        BrandSafetyGuardrail(
            inappropriate_topics=["violence", "adult", "gambling"],
            competitor_mentions="block",
            brand_guidelines_url="https://company.com/brand"
        ),
        FactualityGuardrail(
            fact_check=True,
            require_citations=True,
            confidence_threshold=0.85
        ),
        SEOGuardrail(
            check_keyword_stuffing=True,
            optimal_keyword_density=0.02,
            readability_score_min=60
        )
    ]
)

# Use with content generation
def generate_article(topic, keywords):
    prompt = f"Write an article about {topic} including {keywords}"
    
    # Generate with guardrails
    result = safe_generator(prompt)
    
    # Additional post-processing
    if result.seo_score < 0.7:
        result = optimize_for_seo(result)
    
    return {
        "content": result.text,
        "safety_score": result.safety_score,
        "seo_score": result.seo_score,
        "citations": result.citations
    }

Common Issues and Solutions

High False Positive Rate

Adjust thresholds, use multiple models, and implement context-aware detection

Performance Degradation

Enable caching, use batch processing, and consider GPU acceleration

Inconsistent Results

Ensure model versions are locked and use deterministic settings

Integration Conflicts

Check version compatibility and use provided integration wrappers

Additional Resources

Back to perfecX G-Rails Documentation Example Repository on GitHub Contact Support