From 051b6daa8299fd332503bd584563556e2ae6adab Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 11 Sep 2025 22:29:02 +0400 Subject: [PATCH] refac/fix: large file upload --- backend/open_webui/routers/retrieval.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 10c0221b0..1f32791ba 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1370,6 +1370,7 @@ def save_docs_to_vector_db( prefix=RAG_EMBEDDING_CONTENT_PREFIX, user=user, ) + log.info(f"embeddings generated {len(embeddings)} for {len(texts)} items") items = [ { @@ -1387,6 +1388,7 @@ def save_docs_to_vector_db( items=items, ) + log.info(f"added {len(items)} items to collection {collection_name}") return True except Exception as e: log.exception(e) @@ -1545,13 +1547,20 @@ def process_file( log.debug(f"text_content: {text_content}") Files.update_file_data_by_id( file.id, - {"status": "completed", "content": text_content}, + {"content": text_content}, ) - hash = calculate_sha256_string(text_content) Files.update_file_hash_by_id(file.id, hash) - if not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL: + if request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL: + Files.update_file_data_by_id(file.id, {"status": "completed"}) + return { + "status": True, + "collection_name": None, + "filename": file.filename, + "content": text_content, + } + else: try: result = save_docs_to_vector_db( request, @@ -1565,6 +1574,7 @@ def process_file( add=(True if form_data.collection_name else False), user=user, ) + log.info(f"added {len(docs)} items to collection {collection_name}") if result: Files.update_file_metadata_by_id( @@ -1582,13 +1592,6 @@ def process_file( } except Exception as e: raise e - else: - return { - "status": True, - "collection_name": None, - "filename": file.filename, - "content": text_content, - } except Exception as e: log.exception(e)