Guides & Examples
Learn how to configure guardrails, integrate with popular frameworks, and optimize AI safety mechanisms.
Guardrail Configuration Guides
Toxicity Detection Guardrail
Configure multi-model toxicity detection for comprehensive content safety.
from perfecxion_g_rails.guardrails import ToxicityGuardrail # Basic toxicity guardrail toxicity_guardrail = ToxicityGuardrail( threshold=0.7, action="block" ) # Advanced configuration with multiple models toxicity_guardrail = ToxicityGuardrail( models=[ { "name": "perspective", "weight": 0.6, "categories": ["TOXICITY", "SEVERE_TOXICITY", "THREAT", "INSULT"] }, { "name": "detoxify", "weight": 0.4, "categories": ["toxic", "obscene", "threat", "insult"] } ], threshold=0.7, action="block", custom_responses={ "SEVERE_TOXICITY": "I cannot process this request due to inappropriate content.", "THREAT": "Threatening language is not allowed in our system." }, log_violations=True, notify_moderators=True ) # Category-specific thresholds toxicity_guardrail.set_category_thresholds({ "hate_speech": 0.5, # More strict for hate speech "profanity": 0.8, # More lenient for mild profanity "threat": 0.6, # Moderate for threats "insult": 0.75 # Standard for insults })
Best Practice: Use multiple toxicity models with weighted scoring for more accurate detection and reduced false positives.
Bias Detection and Mitigation
Detect and mitigate various forms of bias in AI outputs.
from perfecxion_g_rails.guardrails import BiasGuardrail # Configure bias detection bias_guardrail = BiasGuardrail( protected_attributes=[ "gender", "race", "ethnicity", "religion", "age", "disability", "sexual_orientation" ], bias_types={ "stereotyping": { "enabled": True, "threshold": 0.6, "examples": ["women are bad at math", "men don't cry"] }, "discrimination": { "enabled": True, "threshold": 0.5, "examples": ["only hire young people", "avoid certain neighborhoods"] }, "representation": { "enabled": True, "check_diversity": True, "min_representation": 0.2 } }, mitigation_strategies={ "rebalancing": True, "counterfactual_augmentation": True, "neutral_rephrasing": True }, action="modify", explain_modifications=True ) # Test for bias in responses def check_response_bias(response): result = bias_guardrail.check(response) if result.bias_detected: print(f"Bias detected: {result.bias_types}") print(f"Confidence: {result.confidence}") print(f"Suggested modification: {result.modified_text}") # Apply mitigation if result.action == "modify": return result.modified_text elif result.action == "warn": return f"⚠️ Potential bias detected: {response}" else: # block return "Response blocked due to bias concerns." return response
PII Detection and Redaction
Protect sensitive personal information with intelligent detection and redaction.
from perfecxion_g_rails.guardrails import PIIGuardrail # Configure PII detection pii_guardrail = PIIGuardrail( detect_types={ "email": { "enabled": True, "redact_domain": False, # Keep domain for context "replacement": "[EMAIL]" }, "phone": { "enabled": True, "formats": ["US", "UK", "EU"], "replacement": "[PHONE]" }, "ssn": { "enabled": True, "partial_redact": True, # Show last 4 digits "replacement": "XXX-XX-[LAST4]" }, "credit_card": { "enabled": True, "replacement": "[CARD]" }, "address": { "enabled": True, "keep_city": True, # Keep city for context "replacement": "[ADDRESS]" }, "name": { "enabled": True, "use_initials": True, # Replace with initials "common_names_only": False } }, custom_patterns=[ { "name": "employee_id", "pattern": r"EMPd{6}", "replacement": "[EMPLOYEE_ID]" }, { "name": "medical_record", "pattern": r"MRN-d{8}", "replacement": "[MEDICAL_RECORD]" } ], context_aware=True, # Consider context when detecting confidence_threshold=0.8, action="redact" ) # Advanced PII handling with context preservation def process_with_pii_protection(text, preserve_context=True): result = pii_guardrail.check(text) if preserve_context: # Keep some information for context return result.redacted_with_context else: # Full redaction return result.fully_redacted
Hallucination Detection
Detect and prevent AI hallucinations with fact-checking and source verification.
from perfecxion_g_rails.guardrails import HallucinationGuardrail # Configure hallucination detection hallucination_guardrail = HallucinationGuardrail( fact_sources=[ { "name": "company_kb", "type": "vector_db", "endpoint": "https://kb.company.com/api", "weight": 0.8 }, { "name": "wikipedia", "type": "api", "endpoint": "https://api.wikipedia.org", "weight": 0.5 } ], verification_methods={ "entity_verification": True, "date_verification": True, "numerical_verification": True, "citation_checking": True }, confidence_threshold=0.8, require_sources=True, max_unverified_claims=2, action="flag" ) # Check for hallucinations with source attribution def verify_ai_response(question, answer): result = hallucination_guardrail.check( input=question, output=answer ) if result.hallucination_detected: print(f"Potential hallucination detected!") print(f"Unverified claims: {result.unverified_claims}") print(f"Confidence: {result.confidence}") if result.suggested_correction: return { "original": answer, "corrected": result.suggested_correction, "sources": result.verified_sources, "warning": "Some claims could not be verified" } return { "response": answer, "sources": result.verified_sources, "confidence": result.confidence }
Framework Integration Examples
LangChain Integration
Seamlessly integrate G-Rails with LangChain applications.
from langchain.chains import LLMChain from langchain.callbacks import BaseCallbackHandler from perfecxion_g_rails import LangChainGuardrail # Create guardrail callback handler class GRailsHandler(BaseCallbackHandler): def __init__(self, guardrails): self.guardrails = guardrails def on_llm_start(self, serialized, prompts, **kwargs): # Check inputs before sending to LLM for prompt in prompts: result = self.guardrails.check_input(prompt) if result.action == "block": raise ValueError(f"Input blocked: {result.reason}") def on_llm_end(self, response, **kwargs): # Check outputs before returning for generation in response.generations: result = self.guardrails.check_output( output=generation[0].text ) if result.action == "block": generation[0].text = "[Content blocked by guardrails]" elif result.action == "modify": generation[0].text = result.modified_output # Use with LangChain guardrails = GRailsClient(api_key="your-key").create_guardrail_set(...) handler = GRailsHandler(guardrails) chain = LLMChain( llm=llm, prompt=prompt, callbacks=[handler] ) # Alternative: Use the wrapper from perfecxion_g_rails.integrations import LangChainWrapper safe_chain = LangChainWrapper( chain=chain, guardrails=guardrails, stream_safe=True # Enable for streaming )
OpenAI API Integration
Add guardrails to OpenAI API calls.
import openai from perfecxion_g_rails import OpenAIGuardrail # Wrap OpenAI client safe_openai = OpenAIGuardrail( client=openai, guardrails=guardrails, log_all_calls=True ) # Use as normal - guardrails applied automatically response = safe_openai.chat.completions.create( model="gpt-4", messages=[ {"role": "user", "content": "Tell me about AI safety"} ] ) # Advanced: Custom handling for violations @safe_openai.on_violation def handle_violation(violation): if violation.severity == "high": # Log to security system security_logger.error(f"High severity violation: {violation}") # Return safe response return "I cannot process this request due to safety concerns." else: # Return modified response return violation.safe_alternative # Streaming with guardrails stream = safe_openai.chat.completions.create( model="gpt-4", messages=messages, stream=True ) for chunk in stream: # Each chunk is checked before yielding print(chunk.choices[0].delta.content, end="")
Hugging Face Transformers Integration
Protect Hugging Face model outputs with guardrails.
from transformers import pipeline from perfecxion_g_rails import HuggingFaceGuardrail # Create protected pipeline generator = pipeline("text-generation", model="gpt2") safe_generator = HuggingFaceGuardrail( pipeline=generator, guardrails=guardrails ) # Generate with protection result = safe_generator( "Write a story about AI", max_length=100, num_return_sequences=3 ) # Batch processing with guardrails texts = ["Question 1", "Question 2", "Question 3"] safe_results = safe_generator(texts, batch_size=2) # Custom model with guardrails from transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("model-name") tokenizer = AutoTokenizer.from_pretrained("model-name") @guardrails.protect def generate_text(prompt, **kwargs): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, **kwargs) return tokenizer.decode(outputs[0], skip_special_tokens=True)
Performance Optimization
1. Intelligent Caching
Reduce latency with smart caching strategies.
# Configure caching for guardrails from perfecxion_g_rails import CacheConfig cache_config = CacheConfig( enabled=True, backend="redis", # or "memory", "dynamodb" ttl=3600, # 1 hour max_size=10000, # Smart caching rules cache_rules={ "toxicity": { "cache_similar": True, # Cache similar inputs "similarity_threshold": 0.95, "ttl": 7200 # Longer TTL for toxicity }, "pii": { "cache_exact_only": True, # Only exact matches "ttl": 1800 # Shorter TTL for PII } } ) grails = GRailsClient( api_key="your-key", cache_config=cache_config ) # Preload common violations grails.preload_cache([ "common profanity list", "known spam patterns", "frequent policy violations" ])
2. Batch Processing
Process multiple requests efficiently.
# Batch processing for high throughput batch_processor = grails.create_batch_processor( max_batch_size=100, max_wait_ms=50, # Max time to wait for batch parallel_processing=True, num_workers=4 ) # Process many inputs efficiently inputs = ["text1", "text2", ..., "text1000"] results = batch_processor.check_batch(inputs) # Async batch processing async def process_stream(stream): async for batch in batch_processor.process_stream(stream): for result in batch: if result.passed: await forward_to_model(result.input) else: await handle_violation(result)
3. GPU Acceleration
Leverage GPU for faster processing.
# Enable GPU acceleration grails = GRailsClient( api_key="your-key", device="cuda:0", # Use first GPU precision="fp16", # Mixed precision for speed batch_size=64 # Larger batches for GPU ) # Monitor GPU usage gpu_stats = grails.get_gpu_stats() print(f"GPU Memory: {gpu_stats.memory_used}/{gpu_stats.memory_total}") print(f"GPU Utilization: {gpu_stats.utilization}%")
Monitoring and Analytics
Real-Time Monitoring Dashboard
# Set up monitoring dashboard from perfecxion_g_rails import Dashboard dashboard = Dashboard(grails) # Configure metrics dashboard.add_metrics([ { "name": "violation_rate", "type": "percentage", "window": "5m", "alert_threshold": 0.1 }, { "name": "latency_p95", "type": "histogram", "window": "1m", "alert_threshold": 100 # ms }, { "name": "guardrail_effectiveness", "type": "custom", "calculation": lambda m: m.blocks / (m.blocks + m.passes) } ]) # Export to monitoring systems dashboard.export_prometheus(port=9090) dashboard.export_datadog(api_key="dd-key") dashboard.export_cloudwatch(region="us-east-1") # Custom alerts @dashboard.alert( condition="violation_rate > 0.15", channels=["email", "slack"] ) def high_violation_alert(metrics): return f"High violation rate: {metrics.violation_rate:.2%}"
A/B Testing Guardrails
# A/B test different guardrail configurations ab_test = grails.create_ab_test( name="toxicity_threshold_optimization", hypothesis="Lower threshold reduces harmful content without impacting UX", variants={ "control": { "toxicity_threshold": 0.7, "action": "block" }, "treatment": { "toxicity_threshold": 0.6, "action": "warn" } }, metrics=[ "user_satisfaction", "violation_rate", "false_positive_rate", "response_time" ], sample_size=10000, duration_days=7 ) # Monitor test progress status = ab_test.get_status() print(f"Progress: {status.samples_collected}/{status.sample_size}") print(f"Current winner: {status.leading_variant}") print(f"Statistical significance: {status.p_value}") # Auto-deploy winner ab_test.auto_deploy_winner( confidence_level=0.95, min_improvement=0.05 )
Common Implementation Scenarios
Customer Support Chatbot
Comprehensive guardrails for customer-facing AI.
# Customer support guardrail configuration support_guardrails = grails.create_guardrail_set( name="customer_support", guardrails=[ ToxicityGuardrail( threshold=0.3, # Very strict for customer service action="block", custom_response="I apologize, but I cannot process that request." ), PIIGuardrail( action="redact", store_safely=True, # Store PII securely for support tickets compliance="GDPR" ), BiasGuardrail( action="modify", ensure_inclusivity=True ), CustomGuardrail( name="company_policy", rules={ "no_legal_advice": { "pattern": r"legal|lawsuit|sue", "response": "For legal matters, please consult our legal department." }, "no_medical_advice": { "pattern": r"medical|health|diagnosis", "response": "For health concerns, please consult a medical professional." } } ) ], fallback_response="I'm sorry, I cannot help with that request. Please contact human support." )
Content Generation Platform
Ensure safe and appropriate content generation.
# Content generation guardrails content_guardrails = grails.create_guardrail_set( name="content_generation", guardrails=[ CopyrightGuardrail( check_plagiarism=True, similarity_threshold=0.8, sources=["web", "books", "academic"] ), BrandSafetyGuardrail( inappropriate_topics=["violence", "adult", "gambling"], competitor_mentions="block", brand_guidelines_url="https://company.com/brand" ), FactualityGuardrail( fact_check=True, require_citations=True, confidence_threshold=0.85 ), SEOGuardrail( check_keyword_stuffing=True, optimal_keyword_density=0.02, readability_score_min=60 ) ] ) # Use with content generation def generate_article(topic, keywords): prompt = f"Write an article about {topic} including {keywords}" # Generate with guardrails result = safe_generator(prompt) # Additional post-processing if result.seo_score < 0.7: result = optimize_for_seo(result) return { "content": result.text, "safety_score": result.safety_score, "seo_score": result.seo_score, "citations": result.citations }
Common Issues and Solutions
High False Positive Rate
Adjust thresholds, use multiple models, and implement context-aware detection
Performance Degradation
Enable caching, use batch processing, and consider GPU acceleration
Inconsistent Results
Ensure model versions are locked and use deterministic settings
Integration Conflicts
Check version compatibility and use provided integration wrappers