This commit is contained in:
Timothy Jaeryang Baek 2025-10-04 01:19:33 -05:00
parent f65231becb
commit d40c710354
3 changed files with 269 additions and 47 deletions

View File

@ -5,6 +5,7 @@ import os
import shutil
import asyncio
import re
import uuid
from datetime import datetime
from pathlib import Path
@ -1690,50 +1691,34 @@ def process_text(
)
@router.post("/process/youtube")
def process_youtube_video(
request: Request, form_data: ProcessUrlForm, user=Depends(get_verified_user)
):
try:
collection_name = form_data.collection_name
if not collection_name:
collection_name = calculate_sha256_string(form_data.url)[:63]
def is_youtube_url(url: str) -> bool:
youtube_regex = r"^(https?://)?(www\.)?(youtube\.com|youtu\.be)/.+$"
return re.match(youtube_regex, url) is not None
loader = YoutubeLoader(
form_data.url,
def get_loader(request, url: str):
if is_youtube_url(url):
return YoutubeLoader(
url,
language=request.app.state.config.YOUTUBE_LOADER_LANGUAGE,
proxy_url=request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
)
docs = loader.load()
content = " ".join([doc.page_content for doc in docs])
log.debug(f"text_content: {content}")
save_docs_to_vector_db(
request, docs, collection_name, overwrite=True, user=user
)
return {
"status": True,
"collection_name": collection_name,
"filename": form_data.url,
"file": {
"data": {
"content": content,
},
"meta": {
"name": form_data.url,
},
},
}
except Exception as e:
log.exception(e)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
else:
return get_web_loader(
url,
verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS,
)
def get_content_from_url(request, url: str) -> str:
loader = get_loader(request, url)
docs = loader.load()
content = " ".join([doc.page_content for doc in docs])
return content, docs
@router.post("/process/youtube")
@router.post("/process/web")
def process_web(
request: Request, form_data: ProcessUrlForm, user=Depends(get_verified_user)
@ -1743,14 +1728,7 @@ def process_web(
if not collection_name:
collection_name = calculate_sha256_string(form_data.url)[:63]
loader = get_web_loader(
form_data.url,
verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS,
)
docs = loader.load()
content = " ".join([doc.page_content for doc in docs])
content, docs = get_content_from_url(request, form_data.url)
log.debug(f"text_content: {content}")
if not request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:

View File

@ -1001,11 +1001,11 @@ async def process_chat_payload(request, form_data, user, metadata, model):
log.debug(f"form_data: {form_data}")
system_message = get_system_message(form_data.get("messages", []))
if system_message:
if system_message: # Chat Controls/User Settings
try:
form_data = apply_system_prompt_to_body(
system_message.get("content"), form_data, metadata, user, replace=True
)
) # Required to handle system prompt variables
except:
pass

244
package-lock.json generated
View File

@ -94,6 +94,7 @@
"undici": "^7.3.0",
"uuid": "^9.0.1",
"vega": "^6.2.0",
"vega-lite": "^6.4.1",
"vite-plugin-static-copy": "^2.2.0",
"y-prosemirror": "^1.3.7",
"yaml": "^2.7.1",
@ -5593,6 +5594,99 @@
"node": ">=8"
}
},
"node_modules/cliui": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-9.0.1.tgz",
"integrity": "sha512-k7ndgKhwoQveBL+/1tqGJYNz097I7WOvwbmmU2AR5+magtbjPWQTS1C5vzGkBC8Ym8UWRzfKUzUUqFLypY4Q+w==",
"license": "ISC",
"dependencies": {
"string-width": "^7.2.0",
"strip-ansi": "^7.1.0",
"wrap-ansi": "^9.0.0"
},
"engines": {
"node": ">=20"
}
},
"node_modules/cliui/node_modules/ansi-regex": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz",
"integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==",
"license": "MIT",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/ansi-regex?sponsor=1"
}
},
"node_modules/cliui/node_modules/ansi-styles": {
"version": "6.2.3",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz",
"integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==",
"license": "MIT",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
}
},
"node_modules/cliui/node_modules/emoji-regex": {
"version": "10.5.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.5.0.tgz",
"integrity": "sha512-lb49vf1Xzfx080OKA0o6l8DQQpV+6Vg95zyCJX9VB/BqKYlhG7N4wgROUUHRA+ZPUefLnteQOad7z1kT2bV7bg==",
"license": "MIT"
},
"node_modules/cliui/node_modules/string-width": {
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz",
"integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==",
"license": "MIT",
"dependencies": {
"emoji-regex": "^10.3.0",
"get-east-asian-width": "^1.0.0",
"strip-ansi": "^7.1.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/cliui/node_modules/strip-ansi": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz",
"integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==",
"license": "MIT",
"dependencies": {
"ansi-regex": "^6.0.1"
},
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
}
},
"node_modules/cliui/node_modules/wrap-ansi": {
"version": "9.0.2",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-9.0.2.tgz",
"integrity": "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==",
"license": "MIT",
"dependencies": {
"ansi-styles": "^6.2.1",
"string-width": "^7.0.0",
"strip-ansi": "^7.1.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
}
},
"node_modules/clone": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/clone/-/clone-2.1.2.tgz",
@ -7069,6 +7163,15 @@
"@esbuild/win32-x64": "0.25.1"
}
},
"node_modules/escalade": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
"integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/escape-string-regexp": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@ -7781,6 +7884,27 @@
"resolved": "https://registry.npmjs.org/gc-hook/-/gc-hook-0.3.1.tgz",
"integrity": "sha512-E5M+O/h2o7eZzGhzRZGex6hbB3k4NWqO0eA+OzLRLXxhdbYPajZnynPwAtphnh+cRHPwsj5Z80dqZlfI4eK55A=="
},
"node_modules/get-caller-file": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
"license": "ISC",
"engines": {
"node": "6.* || 8.* || >= 10.*"
}
},
"node_modules/get-east-asian-width": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.4.0.tgz",
"integrity": "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==",
"license": "MIT",
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/get-func-name": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.2.tgz",
@ -8794,6 +8918,12 @@
"integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==",
"dev": true
},
"node_modules/json-stringify-pretty-compact": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/json-stringify-pretty-compact/-/json-stringify-pretty-compact-4.0.0.tgz",
"integrity": "sha512-3CNZ2DnrpByG9Nqj6Xo8vqbjT4F6N+tb4Gb28ESAZjYZ5yqvmc56J+/kuIwkaAMOyblTQhUW7PxMkUb8Q36N3Q==",
"license": "MIT"
},
"node_modules/json-stringify-safe": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
@ -13281,6 +13411,35 @@
"vega-util": "^2.1.0"
}
},
"node_modules/vega-lite": {
"version": "6.4.1",
"resolved": "https://registry.npmjs.org/vega-lite/-/vega-lite-6.4.1.tgz",
"integrity": "sha512-KO3ybHNouRK4A0al/+2fN9UqgTEfxrd/ntGLY933Hg5UOYotDVQdshR3zn7OfXwQ7uj0W96Vfa5R+QxO8am3IQ==",
"license": "BSD-3-Clause",
"dependencies": {
"json-stringify-pretty-compact": "~4.0.0",
"tslib": "~2.8.1",
"vega-event-selector": "~4.0.0",
"vega-expression": "~6.1.0",
"vega-util": "~2.1.0",
"yargs": "~18.0.0"
},
"bin": {
"vl2pdf": "bin/vl2pdf",
"vl2png": "bin/vl2png",
"vl2svg": "bin/vl2svg",
"vl2vg": "bin/vl2vg"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://app.hubspot.com/payments/GyPC972GD9Rt"
},
"peerDependencies": {
"vega": "^6.0.0"
}
},
"node_modules/vega-loader": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/vega-loader/-/vega-loader-5.1.0.tgz",
@ -14642,6 +14801,15 @@
"yjs": "^13.0.0"
}
},
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
"license": "ISC",
"engines": {
"node": ">=10"
}
},
"node_modules/yallist": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
@ -14663,6 +14831,82 @@
"node": ">= 14"
}
},
"node_modules/yargs": {
"version": "18.0.0",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-18.0.0.tgz",
"integrity": "sha512-4UEqdc2RYGHZc7Doyqkrqiln3p9X2DZVxaGbwhn2pi7MrRagKaOcIKe8L3OxYcbhXLgLFUS3zAYuQjKBQgmuNg==",
"license": "MIT",
"dependencies": {
"cliui": "^9.0.1",
"escalade": "^3.1.1",
"get-caller-file": "^2.0.5",
"string-width": "^7.2.0",
"y18n": "^5.0.5",
"yargs-parser": "^22.0.0"
},
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=23"
}
},
"node_modules/yargs-parser": {
"version": "22.0.0",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-22.0.0.tgz",
"integrity": "sha512-rwu/ClNdSMpkSrUb+d6BRsSkLUq1fmfsY6TOpYzTwvwkg1/NRG85KBy3kq++A8LKQwX6lsu+aWad+2khvuXrqw==",
"license": "ISC",
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=23"
}
},
"node_modules/yargs/node_modules/ansi-regex": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz",
"integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==",
"license": "MIT",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/ansi-regex?sponsor=1"
}
},
"node_modules/yargs/node_modules/emoji-regex": {
"version": "10.5.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.5.0.tgz",
"integrity": "sha512-lb49vf1Xzfx080OKA0o6l8DQQpV+6Vg95zyCJX9VB/BqKYlhG7N4wgROUUHRA+ZPUefLnteQOad7z1kT2bV7bg==",
"license": "MIT"
},
"node_modules/yargs/node_modules/string-width": {
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz",
"integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==",
"license": "MIT",
"dependencies": {
"emoji-regex": "^10.3.0",
"get-east-asian-width": "^1.0.0",
"strip-ansi": "^7.1.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/yargs/node_modules/strip-ansi": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz",
"integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==",
"license": "MIT",
"dependencies": {
"ansi-regex": "^6.0.1"
},
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
}
},
"node_modules/yauzl": {
"version": "2.10.0",
"resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",