diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 8ed2232e4..f7926abe8 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2297,6 +2297,36 @@ DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig( os.environ.get("DATALAB_MARKER_OUTPUT_FORMAT", "markdown"), ) +MINERU_API_MODE = PersistentConfig( + "MINERU_API_MODE", + "rag.mineru_api_mode", + os.environ.get("MINERU_API_MODE", "local"), # "local" or "cloud" +) + +MINERU_API_URL = PersistentConfig( + "MINERU_API_URL", + "rag.mineru_api_url", + os.environ.get("MINERU_API_URL", "http://localhost:8000"), +) + +MINERU_API_KEY = PersistentConfig( + "MINERU_API_KEY", + "rag.mineru_api_key", + os.environ.get("MINERU_API_KEY", ""), +) + +mineru_params = os.getenv("MINERU_PARAMS", "") +try: + mineru_params = json.loads(mineru_params) +except json.JSONDecodeError: + mineru_params = {} + +MINERU_PARAMS = PersistentConfig( + "MINERU_PARAMS", + "rag.mineru_params", + mineru_params, +) + EXTERNAL_DOCUMENT_LOADER_URL = PersistentConfig( "EXTERNAL_DOCUMENT_LOADER_URL", "rag.external_document_loader_url", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 221c20f30..9998af0e7 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -243,6 +243,10 @@ from open_webui.config import ( DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION, DATALAB_MARKER_FORMAT_LINES, DATALAB_MARKER_OUTPUT_FORMAT, + MINERU_API_MODE, + MINERU_API_URL, + MINERU_API_KEY, + MINERU_PARAMS, DATALAB_MARKER_USE_LLM, EXTERNAL_DOCUMENT_LOADER_URL, EXTERNAL_DOCUMENT_LOADER_API_KEY, @@ -853,6 +857,10 @@ app.state.config.DOCLING_PICTURE_DESCRIPTION_API = DOCLING_PICTURE_DESCRIPTION_A app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY +app.state.config.MINERU_API_MODE = MINERU_API_MODE +app.state.config.MINERU_API_URL = MINERU_API_URL +app.state.config.MINERU_API_KEY = MINERU_API_KEY +app.state.config.MINERU_PARAMS = MINERU_PARAMS app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER app.state.config.TIKTOKEN_ENCODING_NAME = TIKTOKEN_ENCODING_NAME diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index b3d90cc8f..2ef1d75e0 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -27,6 +27,7 @@ from open_webui.retrieval.loaders.external_document import ExternalDocumentLoade from open_webui.retrieval.loaders.mistral import MistralLoader from open_webui.retrieval.loaders.datalab_marker import DatalabMarkerLoader +from open_webui.retrieval.loaders.mineru import MinerULoader from open_webui.env import SRC_LOG_LEVELS, GLOBAL_LOG_LEVEL @@ -367,6 +368,22 @@ class Loader: api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), azure_credential=DefaultAzureCredential(), ) + elif self.engine == "mineru" and file_ext in [ + "pdf", + "doc", + "docx", + "ppt", + "pptx", + "xls", + "xlsx", + ]: + loader = MinerULoader( + file_path=file_path, + api_mode=self.kwargs.get("MINERU_API_MODE", "local"), + api_url=self.kwargs.get("MINERU_API_URL", "http://localhost:8000"), + api_key=self.kwargs.get("MINERU_API_KEY", ""), + params=self.kwargs.get("MINERU_PARAMS", {}), + ) elif ( self.engine == "mistral_ocr" and self.kwargs.get("MISTRAL_OCR_API_KEY") != "" diff --git a/backend/open_webui/retrieval/loaders/mineru.py b/backend/open_webui/retrieval/loaders/mineru.py new file mode 100644 index 000000000..437f44ae6 --- /dev/null +++ b/backend/open_webui/retrieval/loaders/mineru.py @@ -0,0 +1,541 @@ +import os +import time +import requests +import logging +import tempfile +import zipfile +from typing import List, Optional +from langchain_core.documents import Document +from fastapi import HTTPException, status + +log = logging.getLogger(__name__) + + +class MinerULoader: + """ + MinerU document parser loader supporting both Cloud API and Local API modes. + + Cloud API: Uses MinerU managed service with async task-based processing + Local API: Uses self-hosted MinerU API with synchronous processing + """ + + def __init__( + self, + file_path: str, + api_mode: str = "local", + api_url: str = "http://localhost:8000", + api_key: str = "", + params: dict = None, + ): + self.file_path = file_path + self.api_mode = api_mode.lower() + self.api_url = api_url.rstrip("/") + self.api_key = api_key + + # Parse params dict with defaults + params = params or {} + self.enable_ocr = params.get("enable_ocr", False) + self.enable_formula = params.get("enable_formula", True) + self.enable_table = params.get("enable_table", True) + self.language = params.get("language", "en") + self.model_version = params.get("model_version", "pipeline") + self.page_ranges = params.get("page_ranges", "") + + # Validate API mode + if self.api_mode not in ["local", "cloud"]: + raise ValueError( + f"Invalid API mode: {self.api_mode}. Must be 'local' or 'cloud'" + ) + + # Validate Cloud API requirements + if self.api_mode == "cloud" and not self.api_key: + raise ValueError("API key is required for Cloud API mode") + + def load(self) -> List[Document]: + """ + Main entry point for loading and parsing the document. + Routes to Cloud or Local API based on api_mode. + """ + try: + if self.api_mode == "cloud": + return self._load_cloud_api() + else: + return self._load_local_api() + except Exception as e: + log.error(f"Error loading document with MinerU: {e}") + raise + + def _load_local_api(self) -> List[Document]: + """ + Load document using Local API (synchronous). + Posts file to /file_parse endpoint and gets immediate response. + """ + log.info(f"Using MinerU Local API at {self.api_url}") + + filename = os.path.basename(self.file_path) + + # Build form data for Local API + form_data = { + "return_md": "true", + "formula_enable": str(self.enable_formula).lower(), + "table_enable": str(self.enable_table).lower(), + } + + # Parse method based on OCR setting + if self.enable_ocr: + form_data["parse_method"] = "ocr" + else: + form_data["parse_method"] = "auto" + + # Language configuration (Local API uses lang_list array) + if self.language: + form_data["lang_list"] = self.language + + # Backend/model version (Local API uses "backend" parameter) + if self.model_version == "vlm": + form_data["backend"] = "vlm-vllm-engine" + else: + form_data["backend"] = "pipeline" + + # Page ranges (Local API uses start_page_id and end_page_id) + if self.page_ranges: + # For simplicity, if page_ranges is specified, log a warning + # Full page range parsing would require parsing the string + log.warning( + f"Page ranges '{self.page_ranges}' specified but Local API uses different format. " + "Consider using start_page_id/end_page_id parameters if needed." + ) + + try: + with open(self.file_path, "rb") as f: + files = {"files": (filename, f, "application/octet-stream")} + + log.info(f"Sending file to MinerU Local API: {filename}") + log.debug(f"Local API parameters: {form_data}") + + response = requests.post( + f"{self.api_url}/file_parse", + data=form_data, + files=files, + timeout=300, # 5 minute timeout for large documents + ) + response.raise_for_status() + + except FileNotFoundError: + raise HTTPException( + status.HTTP_404_NOT_FOUND, detail=f"File not found: {self.file_path}" + ) + except requests.Timeout: + raise HTTPException( + status.HTTP_504_GATEWAY_TIMEOUT, + detail="MinerU Local API request timed out", + ) + except requests.HTTPError as e: + error_detail = f"MinerU Local API request failed: {e}" + if e.response is not None: + try: + error_data = e.response.json() + error_detail += f" - {error_data}" + except: + error_detail += f" - {e.response.text}" + raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=error_detail) + except Exception as e: + raise HTTPException( + status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error calling MinerU Local API: {str(e)}", + ) + + # Parse response + try: + result = response.json() + except ValueError as e: + raise HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail=f"Invalid JSON response from MinerU Local API: {e}", + ) + + # Extract markdown content from response + if "results" not in result: + raise HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail="MinerU Local API response missing 'results' field", + ) + + results = result["results"] + if not results: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail="MinerU returned empty results", + ) + + # Get the first (and typically only) result + file_result = list(results.values())[0] + markdown_content = file_result.get("md_content", "") + + if not markdown_content: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail="MinerU returned empty markdown content", + ) + + log.info(f"Successfully parsed document with MinerU Local API: {filename}") + + # Create metadata + metadata = { + "source": filename, + "api_mode": "local", + "backend": result.get("backend", "unknown"), + "version": result.get("version", "unknown"), + } + + return [Document(page_content=markdown_content, metadata=metadata)] + + def _load_cloud_api(self) -> List[Document]: + """ + Load document using Cloud API (asynchronous). + Uses batch upload endpoint to avoid need for public file URLs. + """ + log.info(f"Using MinerU Cloud API at {self.api_url}") + + filename = os.path.basename(self.file_path) + + # Step 1: Request presigned upload URL + batch_id, upload_url = self._request_upload_url(filename) + + # Step 2: Upload file to presigned URL + self._upload_to_presigned_url(upload_url) + + # Step 3: Poll for results + result = self._poll_batch_status(batch_id, filename) + + # Step 4: Download and extract markdown from ZIP + markdown_content = self._download_and_extract_zip( + result["full_zip_url"], filename + ) + + log.info(f"Successfully parsed document with MinerU Cloud API: {filename}") + + # Create metadata + metadata = { + "source": filename, + "api_mode": "cloud", + "batch_id": batch_id, + } + + return [Document(page_content=markdown_content, metadata=metadata)] + + def _request_upload_url(self, filename: str) -> tuple: + """ + Request presigned upload URL from Cloud API. + Returns (batch_id, upload_url). + """ + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + + # Build request body + request_body = { + "enable_formula": self.enable_formula, + "enable_table": self.enable_table, + "language": self.language, + "model_version": self.model_version, + "files": [ + { + "name": filename, + "is_ocr": self.enable_ocr, + } + ], + } + + # Add page ranges if specified + if self.page_ranges: + request_body["files"][0]["page_ranges"] = self.page_ranges + + log.info(f"Requesting upload URL for: {filename}") + log.debug(f"Cloud API request body: {request_body}") + + try: + response = requests.post( + f"{self.api_url}/file-urls/batch", + headers=headers, + json=request_body, + timeout=30, + ) + response.raise_for_status() + except requests.HTTPError as e: + error_detail = f"Failed to request upload URL: {e}" + if e.response is not None: + try: + error_data = e.response.json() + error_detail += f" - {error_data.get('msg', error_data)}" + except: + error_detail += f" - {e.response.text}" + raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=error_detail) + except Exception as e: + raise HTTPException( + status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error requesting upload URL: {str(e)}", + ) + + try: + result = response.json() + except ValueError as e: + raise HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail=f"Invalid JSON response: {e}", + ) + + # Check for API error response + if result.get("code") != 0: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail=f"MinerU Cloud API error: {result.get('msg', 'Unknown error')}", + ) + + data = result.get("data", {}) + batch_id = data.get("batch_id") + file_urls = data.get("file_urls", []) + + if not batch_id or not file_urls: + raise HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail="MinerU Cloud API response missing batch_id or file_urls", + ) + + upload_url = file_urls[0] + log.info(f"Received upload URL for batch: {batch_id}") + + return batch_id, upload_url + + def _upload_to_presigned_url(self, upload_url: str) -> None: + """ + Upload file to presigned URL (no authentication needed). + """ + log.info(f"Uploading file to presigned URL") + + try: + with open(self.file_path, "rb") as f: + response = requests.put( + upload_url, + data=f, + timeout=300, # 5 minute timeout for large files + ) + response.raise_for_status() + except FileNotFoundError: + raise HTTPException( + status.HTTP_404_NOT_FOUND, detail=f"File not found: {self.file_path}" + ) + except requests.Timeout: + raise HTTPException( + status.HTTP_504_GATEWAY_TIMEOUT, + detail="File upload to presigned URL timed out", + ) + except requests.HTTPError as e: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail=f"Failed to upload file to presigned URL: {e}", + ) + except Exception as e: + raise HTTPException( + status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error uploading file: {str(e)}", + ) + + log.info("File uploaded successfully") + + def _poll_batch_status(self, batch_id: str, filename: str) -> dict: + """ + Poll batch status until completion. + Returns the result dict for the file. + """ + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + max_iterations = 300 # 10 minutes max (2 seconds per iteration) + poll_interval = 2 # seconds + + log.info(f"Polling batch status: {batch_id}") + + for iteration in range(max_iterations): + try: + response = requests.get( + f"{self.api_url}/extract-results/batch/{batch_id}", + headers=headers, + timeout=30, + ) + response.raise_for_status() + except requests.HTTPError as e: + error_detail = f"Failed to poll batch status: {e}" + if e.response is not None: + try: + error_data = e.response.json() + error_detail += f" - {error_data.get('msg', error_data)}" + except: + error_detail += f" - {e.response.text}" + raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=error_detail) + except Exception as e: + raise HTTPException( + status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error polling batch status: {str(e)}", + ) + + try: + result = response.json() + except ValueError as e: + raise HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail=f"Invalid JSON response while polling: {e}", + ) + + # Check for API error response + if result.get("code") != 0: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail=f"MinerU Cloud API error: {result.get('msg', 'Unknown error')}", + ) + + data = result.get("data", {}) + extract_result = data.get("extract_result", []) + + # Find our file in the batch results + file_result = None + for item in extract_result: + if item.get("file_name") == filename: + file_result = item + break + + if not file_result: + raise HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail=f"File {filename} not found in batch results", + ) + + state = file_result.get("state") + + if state == "done": + log.info(f"Processing complete for {filename}") + return file_result + elif state == "failed": + error_msg = file_result.get("err_msg", "Unknown error") + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail=f"MinerU processing failed: {error_msg}", + ) + elif state in ["waiting-file", "pending", "running", "converting"]: + # Still processing + if iteration % 10 == 0: # Log every 20 seconds + log.info( + f"Processing status: {state} (iteration {iteration + 1}/{max_iterations})" + ) + time.sleep(poll_interval) + else: + log.warning(f"Unknown state: {state}") + time.sleep(poll_interval) + + # Timeout + raise HTTPException( + status.HTTP_504_GATEWAY_TIMEOUT, + detail="MinerU processing timed out after 10 minutes", + ) + + def _download_and_extract_zip(self, zip_url: str, filename: str) -> str: + """ + Download ZIP file from CDN and extract markdown content. + Returns the markdown content as a string. + """ + log.info(f"Downloading results from: {zip_url}") + + try: + response = requests.get(zip_url, timeout=60) + response.raise_for_status() + except requests.HTTPError as e: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail=f"Failed to download results ZIP: {e}", + ) + except Exception as e: + raise HTTPException( + status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error downloading results: {str(e)}", + ) + + # Save ZIP to temporary file and extract + try: + with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_zip: + tmp_zip.write(response.content) + tmp_zip_path = tmp_zip.name + + with tempfile.TemporaryDirectory() as tmp_dir: + # Extract ZIP + with zipfile.ZipFile(tmp_zip_path, "r") as zip_ref: + zip_ref.extractall(tmp_dir) + + # Find markdown file - search recursively for any .md file + markdown_content = None + found_md_path = None + + # First, list all files in the ZIP for debugging + all_files = [] + for root, dirs, files in os.walk(tmp_dir): + for file in files: + full_path = os.path.join(root, file) + all_files.append(full_path) + # Look for any .md file + if file.endswith(".md"): + found_md_path = full_path + log.info(f"Found markdown file at: {full_path}") + try: + with open(full_path, "r", encoding="utf-8") as f: + markdown_content = f.read() + if ( + markdown_content + ): # Use the first non-empty markdown file + break + except Exception as e: + log.warning(f"Failed to read {full_path}: {e}") + if markdown_content: + break + + if markdown_content is None: + log.error(f"Available files in ZIP: {all_files}") + # Try to provide more helpful error message + md_files = [f for f in all_files if f.endswith(".md")] + if md_files: + error_msg = ( + f"Found .md files but couldn't read them: {md_files}" + ) + else: + error_msg = ( + f"No .md files found in ZIP. Available files: {all_files}" + ) + raise HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail=error_msg, + ) + + # Clean up temporary ZIP file + os.unlink(tmp_zip_path) + + except zipfile.BadZipFile as e: + raise HTTPException( + status.HTTP_502_BAD_GATEWAY, + detail=f"Invalid ZIP file received: {e}", + ) + except Exception as e: + raise HTTPException( + status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error extracting ZIP: {str(e)}", + ) + + if not markdown_content: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + detail="Extracted markdown content is empty", + ) + + log.info( + f"Successfully extracted markdown content ({len(markdown_content)} characters)" + ) + return markdown_content diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index c79d3ce65..cb66e8926 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -466,6 +466,11 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, + # MinerU settings + "MINERU_API_MODE": request.app.state.config.MINERU_API_MODE, + "MINERU_API_URL": request.app.state.config.MINERU_API_URL, + "MINERU_API_KEY": request.app.state.config.MINERU_API_KEY, + "MINERU_PARAMS": request.app.state.config.MINERU_PARAMS, # Reranking settings "RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL, "RAG_RERANKING_ENGINE": request.app.state.config.RAG_RERANKING_ENGINE, @@ -647,6 +652,12 @@ class ConfigForm(BaseModel): DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None MISTRAL_OCR_API_KEY: Optional[str] = None + # MinerU settings + MINERU_API_MODE: Optional[str] = None + MINERU_API_URL: Optional[str] = None + MINERU_API_KEY: Optional[str] = None + MINERU_PARAMS: Optional[dict] = None + # Reranking settings RAG_RERANKING_MODEL: Optional[str] = None RAG_RERANKING_ENGINE: Optional[str] = None @@ -886,6 +897,28 @@ async def update_rag_config( else request.app.state.config.MISTRAL_OCR_API_KEY ) + # MinerU settings + request.app.state.config.MINERU_API_MODE = ( + form_data.MINERU_API_MODE + if form_data.MINERU_API_MODE is not None + else request.app.state.config.MINERU_API_MODE + ) + request.app.state.config.MINERU_API_URL = ( + form_data.MINERU_API_URL + if form_data.MINERU_API_URL is not None + else request.app.state.config.MINERU_API_URL + ) + request.app.state.config.MINERU_API_KEY = ( + form_data.MINERU_API_KEY + if form_data.MINERU_API_KEY is not None + else request.app.state.config.MINERU_API_KEY + ) + request.app.state.config.MINERU_PARAMS = ( + form_data.MINERU_PARAMS + if form_data.MINERU_PARAMS is not None + else request.app.state.config.MINERU_PARAMS + ) + # Reranking settings if request.app.state.config.RAG_RERANKING_ENGINE == "": # Unloading the internal reranker and clear VRAM memory @@ -1150,6 +1183,11 @@ async def update_rag_config( "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, + # MinerU settings + "MINERU_API_MODE": request.app.state.config.MINERU_API_MODE, + "MINERU_API_URL": request.app.state.config.MINERU_API_URL, + "MINERU_API_KEY": request.app.state.config.MINERU_API_KEY, + "MINERU_PARAMS": request.app.state.config.MINERU_PARAMS, # Reranking settings "RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL, "RAG_RERANKING_ENGINE": request.app.state.config.RAG_RERANKING_ENGINE, @@ -1560,6 +1598,10 @@ def process_file( DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, MISTRAL_OCR_API_KEY=request.app.state.config.MISTRAL_OCR_API_KEY, + MINERU_API_MODE=request.app.state.config.MINERU_API_MODE, + MINERU_API_URL=request.app.state.config.MINERU_API_URL, + MINERU_API_KEY=request.app.state.config.MINERU_API_KEY, + MINERU_PARAMS=request.app.state.config.MINERU_PARAMS, ) docs = loader.load( file.filename, file.meta.get("content_type"), file_path diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 77b390374..fbda44c4b 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -207,6 +207,15 @@ return; } + if ( + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mineru' && + RAGConfig.MINERU_API_MODE === 'cloud' && + RAGConfig.MINERU_API_KEY === '' + ) { + toast.error($i18n.t('MinerU API Key required for Cloud API mode.')); + return; + } + if (!RAGConfig.BYPASS_EMBEDDING_AND_RETRIEVAL) { await embeddingModelUpdateHandler(); } @@ -337,6 +346,7 @@ + @@ -749,7 +759,89 @@ bind:value={RAGConfig.MISTRAL_OCR_API_KEY} /> + {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mineru'} + +
+
+
+ {$i18n.t('API Mode')} +
+ +
+
+ + +
+ +
+ + + {#if RAGConfig.MINERU_API_MODE === 'cloud'} +
+ +
{/if} + + +
+
+ + {$i18n.t('Parameters')} + +
+
+