localGPT/rag_system/main.py
PromptEngineer 2421514f3e Integrate multimodal RAG codebase
- Replaced existing localGPT codebase with multimodal RAG implementation
- Includes full-stack application with backend, frontend, and RAG system
- Added Docker support and comprehensive documentation
- Enhanced with multimodal capabilities for document processing
- Preserved git history for localGPT while integrating new functionality
2025-07-11 00:17:15 -07:00

340 lines
12 KiB
Python

import os
import json
import sys
import argparse
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# The sys.path manipulation has been removed to prevent import conflicts.
# This script should be run as a module from the project root, e.g.:
# python -m rag_system.main api
from rag_system.agent.loop import Agent
from rag_system.utils.ollama_client import OllamaClient
# Configuration is now defined in this file - no import needed
# Advanced RAG System Configuration
# ==================================
# This file contains the MASTER configuration for all models used in the RAG system.
# All components should reference these configurations to ensure consistency.
# ============================================================================
# 🎯 MASTER MODEL CONFIGURATION
# ============================================================================
# All model configurations are centralized here to prevent conflicts
# Ollama Models Configuration (for inference via Ollama)
OLLAMA_CONFIG = {
"host": os.getenv("OLLAMA_HOST", "http://localhost:11434"),
"generation_model": "qwen3:8b", # Main text generation model
"enrichment_model": "qwen3:0.6b", # Lightweight model for routing/enrichment
}
# External Model Configuration (HuggingFace models used directly)
EXTERNAL_MODELS = {
"embedding_model": "Qwen/Qwen3-Embedding-0.6B", # HuggingFace embedding model (1024 dims - fresh start)
"reranker_model": "answerdotai/answerai-colbert-small-v1", # ColBERT reranker
"vision_model": "Qwen/Qwen-VL-Chat", # Vision model for multimodal
"fallback_reranker": "BAAI/bge-reranker-base", # Backup reranker
}
# ============================================================================
# 🔧 PIPELINE CONFIGURATIONS
# ============================================================================
PIPELINE_CONFIGS = {
"default": {
"description": "Production-ready pipeline with hybrid search, AI reranking, and verification",
"storage": {
"lancedb_uri": "./lancedb",
"text_table_name": "text_pages_v3",
"image_table_name": "image_pages_v3",
"bm25_path": "./index_store/bm25",
"graph_path": "./index_store/graph/knowledge_graph.gml"
},
"retrieval": {
"retriever": "multivector",
"search_type": "hybrid",
"late_chunking": {
"enabled": True,
"table_suffix": "_lc_v3"
},
"dense": {
"enabled": True,
"weight": 0.7
},
"bm25": {
"enabled": True,
"index_name": "rag_bm25_index"
},
"graph": {
"enabled": False,
"graph_path": "./index_store/graph/knowledge_graph.gml"
}
},
# 🎯 EMBEDDING MODEL: Uses HuggingFace Qwen model directly
"embedding_model_name": EXTERNAL_MODELS["embedding_model"],
# 🎯 VISION MODEL: For multimodal capabilities
"vision_model_name": EXTERNAL_MODELS["vision_model"],
# 🎯 RERANKER: AI-powered reranking with ColBERT
"reranker": {
"enabled": True,
"type": "ai",
"strategy": "rerankers-lib",
"model_name": EXTERNAL_MODELS["reranker_model"],
"top_k": 10
},
"query_decomposition": {
"enabled": True,
"max_sub_queries": 3,
"compose_from_sub_answers": True
},
"verification": {"enabled": True},
"retrieval_k": 20,
"context_window_size": 0,
"semantic_cache_threshold": 0.98,
"cache_scope": "global",
# 🔧 Contextual enrichment configuration
"contextual_enricher": {
"enabled": True,
"window_size": 1
},
# 🔧 Indexing configuration
"indexing": {
"embedding_batch_size": 50,
"enrichment_batch_size": 10,
"enable_progress_tracking": True
}
},
"fast": {
"description": "Speed-optimized pipeline with minimal overhead",
"storage": {
"lancedb_uri": "./lancedb",
"text_table_name": "text_pages_v3",
"image_table_name": "image_pages_v3",
"bm25_path": "./index_store/bm25"
},
"retrieval": {
"retriever": "multivector",
"search_type": "vector_only",
"late_chunking": {"enabled": False},
"dense": {"enabled": True}
},
"embedding_model_name": EXTERNAL_MODELS["embedding_model"],
"reranker": {"enabled": False},
"query_decomposition": {"enabled": False},
"verification": {"enabled": False},
"retrieval_k": 10,
"context_window_size": 0,
# 🔧 Contextual enrichment (disabled for speed)
"contextual_enricher": {
"enabled": False,
"window_size": 1
},
# 🔧 Indexing configuration
"indexing": {
"embedding_batch_size": 100,
"enrichment_batch_size": 50,
"enable_progress_tracking": False
}
},
"bm25": {
"enabled": True,
"index_name": "rag_bm25_index"
},
"graph_rag": {
"enabled": False, # Keep disabled for now unless specified
}
}
# ============================================================================
# 🏭 FACTORY FUNCTIONS
# ============================================================================
def get_agent(mode: str = "default") -> Agent:
"""
Factory function to get an instance of the RAG agent based on the specified mode.
Args:
mode: Configuration mode ("default", "fast")
Returns:
Configured Agent instance
"""
load_dotenv()
# Initialize the Ollama client with the host from config
llm_client = OllamaClient(host=OLLAMA_CONFIG["host"])
# Get the configuration for the specified mode
config = PIPELINE_CONFIGS.get(mode, PIPELINE_CONFIGS['default'])
agent = Agent(
pipeline_configs=config,
llm_client=llm_client,
ollama_config=OLLAMA_CONFIG
)
return agent
def validate_model_config():
"""
Validates the model configuration for consistency and availability.
Raises:
ValueError: If configuration conflicts are detected
"""
print("🔍 Validating model configuration...")
# Check for embedding model consistency
default_embedding = PIPELINE_CONFIGS["default"]["embedding_model_name"]
external_embedding = EXTERNAL_MODELS["embedding_model"]
if default_embedding != external_embedding:
raise ValueError(f"Embedding model mismatch: {default_embedding} != {external_embedding}")
# Check reranker configuration
default_reranker = PIPELINE_CONFIGS["default"]["reranker"]["model_name"]
external_reranker = EXTERNAL_MODELS["reranker_model"]
if default_reranker != external_reranker:
raise ValueError(f"Reranker model mismatch: {default_reranker} != {external_reranker}")
print("✅ Model configuration validation passed!")
return True
# ============================================================================
# 🚀 UTILITY FUNCTIONS
# ============================================================================
def run_indexing(docs_path: str, config_mode: str = "default"):
"""Runs the indexing pipeline for the specified documents."""
print(f"📚 Starting indexing for documents in: {docs_path}")
validate_model_config()
# Local import to avoid circular dependencies
from rag_system.pipelines.indexing_pipeline import IndexingPipeline
# Get the appropriate indexing pipeline from the factory
indexing_pipeline = IndexingPipeline(PIPELINE_CONFIGS[config_mode])
# Find all PDF files in the directory
pdf_files = [os.path.join(docs_path, f) for f in os.listdir(docs_path) if f.endswith(".pdf")]
if not pdf_files:
print("No PDF files found to index.")
return
# Process all documents through the pipeline
indexing_pipeline.process_documents(pdf_files)
print("✅ Indexing complete.")
def run_chat(query: str):
"""
Runs the agentic RAG pipeline for a given query.
Returns the result as a JSON string.
"""
try:
validate_model_config()
ollama_client = OllamaClient(OLLAMA_CONFIG["host"])
except ConnectionError as e:
print(e)
return json.dumps({"error": str(e)}, indent=2)
except ValueError as e:
print(f"Configuration Error: {e}")
return json.dumps({"error": f"Configuration Error: {e}"}, indent=2)
agent = Agent(PIPELINE_CONFIGS['default'], ollama_client, OLLAMA_CONFIG)
result = agent.run(query)
return json.dumps(result, indent=2, ensure_ascii=False)
def show_graph():
"""
Loads and displays the knowledge graph.
"""
import networkx as nx
import matplotlib.pyplot as plt
graph_path = PIPELINE_CONFIGS["indexing"]["graph_path"]
if not os.path.exists(graph_path):
print("Knowledge graph not found. Please run the 'index' command first.")
return
G = nx.read_gml(graph_path)
print("--- Knowledge Graph ---")
print("Nodes:", G.nodes(data=True))
print("Edges:", G.edges(data=True))
print("---------------------")
# Optional: Visualize the graph
try:
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_size=2000, node_color="skyblue", font_size=10, font_weight="bold")
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title("Knowledge Graph Visualization")
plt.show()
except Exception as e:
print(f"\nCould not visualize the graph. Matplotlib might not be installed or configured for your environment.")
print(f"Error: {e}")
def run_api_server():
"""Starts the advanced RAG API server."""
from rag_system.api_server import start_server
start_server()
def main():
if len(sys.argv) < 2:
print("Usage: python main.py [index|chat|show_graph|api] [query]")
return
command = sys.argv[1]
if command == "index":
# Allow passing file paths from the command line
files = sys.argv[2:] if len(sys.argv) > 2 else None
run_indexing(files)
elif command == "chat":
if len(sys.argv) < 3:
print("Usage: python main.py chat <query>")
return
query = " ".join(sys.argv[2:])
# 🆕 Print the result for command-line usage
print(run_chat(query))
elif command == "show_graph":
show_graph()
elif command == "api":
run_api_server()
else:
print(f"Unknown command: {command}")
if __name__ == "__main__":
# This allows running the script from the command line to index documents.
parser = argparse.ArgumentParser(description="Main entry point for the RAG system.")
parser.add_argument(
'--index',
type=str,
help='Path to the directory containing documents to index.'
)
parser.add_argument(
'--config',
type=str,
default='default',
help='The configuration profile to use (e.g., "default", "fast").'
)
args = parser.parse_args()
# Load environment variables
load_dotenv()
if args.index:
run_indexing(args.index, args.config)
else:
# This is where you might start a server or interactive session
print("No action specified. Use --index to process documents.")
# Example of how to get an agent instance
# agent = get_agent(args.config)
# print(f"Agent loaded with '{args.config}' config.")