localGPT/system_health_check.py
PromptEngineer 2421514f3e Integrate multimodal RAG codebase
- Replaced existing localGPT codebase with multimodal RAG implementation
- Includes full-stack application with backend, frontend, and RAG system
- Added Docker support and comprehensive documentation
- Enhanced with multimodal capabilities for document processing
- Preserved git history for localGPT while integrating new functionality
2025-07-11 00:17:15 -07:00

190 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""
System Health Check for RAG System
Quick validation of configurations, models, and data access.
"""
import sys
import traceback
from pathlib import Path
def print_status(message, success=None):
"""Print status with emoji"""
if success is True:
print(f"{message}")
elif success is False:
print(f"{message}")
else:
print(f"🔍 {message}")
def check_imports():
"""Test basic imports"""
print_status("Testing basic imports...")
try:
from rag_system.main import get_agent, EXTERNAL_MODELS, OLLAMA_CONFIG, PIPELINE_CONFIGS
print_status("Basic imports successful", True)
return True
except Exception as e:
print_status(f"Import failed: {e}", False)
return False
def check_configurations():
"""Validate configurations"""
print_status("Checking configurations...")
try:
from rag_system.main import EXTERNAL_MODELS, OLLAMA_CONFIG, PIPELINE_CONFIGS
print(f"📊 External Models: {EXTERNAL_MODELS}")
print(f"📊 Ollama Config: {OLLAMA_CONFIG}")
print(f"📊 Pipeline Configs: {PIPELINE_CONFIGS}")
# Check for common model dimension issues
embedding_model = EXTERNAL_MODELS.get("embedding_model", "Unknown")
if "bge-small" in embedding_model:
print_status(f"Embedding model: {embedding_model} (384 dims)", True)
elif "Qwen3-Embedding" in embedding_model:
print_status(f"Embedding model: {embedding_model} (1024 dims) - Check data compatibility!", None)
else:
print_status(f"Embedding model: {embedding_model} - Verify dimensions!", None)
print_status("Configuration check completed", True)
return True
except Exception as e:
print_status(f"Configuration check failed: {e}", False)
return False
def check_agent_initialization():
"""Test agent initialization"""
print_status("Testing agent initialization...")
try:
from rag_system.main import get_agent
agent = get_agent('default')
print_status("Agent initialization successful", True)
return agent
except Exception as e:
print_status(f"Agent initialization failed: {e}", False)
traceback.print_exc()
return None
def check_embedding_model(agent):
"""Test embedding model"""
print_status("Testing embedding model...")
try:
embedder = agent.retrieval_pipeline._get_text_embedder()
test_emb = embedder.create_embeddings(['test'])
model_name = getattr(embedder.model, 'name_or_path', 'Unknown')
dimensions = test_emb.shape[1]
print_status(f"Embedding model: {model_name}", True)
print_status(f"Vector dimension: {dimensions}", True)
# Warn about dimension compatibility
if dimensions == 384:
print_status("Using 384-dim embeddings (bge-small compatible)", True)
elif dimensions == 1024:
print_status("Using 1024-dim embeddings (Qwen3 compatible) - Ensure data compatibility!", None)
return True
except Exception as e:
print_status(f"Embedding model test failed: {e}", False)
return False
def check_database_access():
"""Test database access"""
print_status("Testing database access...")
try:
import lancedb
db = lancedb.connect('./lancedb')
tables = db.table_names()
print_status(f"LanceDB connected - {len(tables)} tables available", True)
if tables:
print("📋 Available tables:")
for table in tables[:5]: # Show first 5 tables
print(f" - {table}")
if len(tables) > 5:
print(f" ... and {len(tables) - 5} more")
else:
print_status("No tables found - may need to index documents first", None)
return True
except Exception as e:
print_status(f"Database access failed: {e}", False)
return False
def check_sample_query(agent):
"""Test a sample query if tables exist"""
print_status("Testing sample query...")
try:
import lancedb
db = lancedb.connect('./lancedb')
tables = db.table_names()
if not tables:
print_status("No tables available for query test", None)
return True
# Use first available table
table_name = tables[0]
print_status(f"Testing query on table: {table_name}")
result = agent.run('what is this document about?', table_name=table_name)
if result and 'answer' in result:
print_status("Sample query successful", True)
print(f"📝 Answer preview: {result['answer'][:100]}...")
print(f"📊 Found {len(result.get('source_documents', []))} source documents")
else:
print_status("Query returned empty result", None)
return True
except Exception as e:
print_status(f"Sample query failed: {e}", False)
return False
def main():
"""Run complete system health check"""
print("🏥 RAG System Health Check")
print("=" * 50)
checks_passed = 0
total_checks = 6
# Basic checks
if check_imports():
checks_passed += 1
if check_configurations():
checks_passed += 1
if check_database_access():
checks_passed += 1
# Agent-dependent checks
agent = check_agent_initialization()
if agent:
checks_passed += 1
if check_embedding_model(agent):
checks_passed += 1
if check_sample_query(agent):
checks_passed += 1
# Summary
print("\n" + "=" * 50)
print(f"🏥 Health Check Complete: {checks_passed}/{total_checks} checks passed")
if checks_passed == total_checks:
print_status("System is healthy! 🎉", True)
return 0
elif checks_passed >= total_checks - 1:
print_status("System mostly healthy with minor issues", None)
return 0
else:
print_status("System has significant issues that need attention", False)
return 1
if __name__ == "__main__":
sys.exit(main())