refac/fix: tag handling

2025-12-05 12:20:26 +01:00 · 2025-07-18 11:56:40 +04:00 · 2025-07-18 11:56:40 +04:00 · 2fa8aff82f
commit 2fa8aff82f
parent 6ccf783f00
1 changed files with 19 additions and 10 deletions
--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
@ -1472,12 +1472,12 @@ async def process_chat_response(

                        if reasoning_duration is not None:
                            if raw:
-                                content = f'{content}\n<{block["start_tag"]}>{block["content"]}<{block["end_tag"]}>\n'
+                                content = f'{content}\n{block["start_tag"]}{block["content"]}{block["end_tag"]}\n'
                            else:
                                content = f'{content}\n<details type="reasoning" done="true" duration="{reasoning_duration}">\n<summary>Thought for {reasoning_duration} seconds</summary>\n{reasoning_display_content}\n</details>\n'
                        else:
                            if raw:
-                                content = f'{content}\n<{block["start_tag"]}>{block["content"]}<{block["end_tag"]}>\n'
+                                content = f'{content}\n{block["start_tag"]}{block["content"]}{block["end_tag"]}\n'
                            else:
                                content = f'{content}\n<details type="reasoning" done="false">\n<summary>Thinking…</summary>\n{reasoning_display_content}\n</details>\n'

@ -1575,16 +1575,14 @@ async def process_chat_response(
                if content_blocks[-1]["type"] == "text":
                    for start_tag, end_tag in tags:

-                        if start_tag.startswith("<") and end_tag.endswith(">"):
+                        start_tag_pattern = rf"{re.escape(start_tag)}"
+                        if start_tag.startswith("<") and start_tag.endswith(">"):
                            # Match start tag e.g., <tag> or <tag attr="value">
                            # remove both '<' and '>' from start_tag
-                            start_tag = start_tag[1:-1]
-
                            # Match start tag with attributes
-                            start_tag_pattern = rf"<{re.escape(start_tag)}(\s.*?)?>"
-                        else:
-                            # Handle cases where start_tag is just a tag name
-                            start_tag_pattern = rf"{re.escape(start_tag)}"
+                            start_tag_pattern = (
+                                rf"<{re.escape(start_tag[1:-1])}(\s.*?)?>"
+                            )

                        match = re.search(start_tag_pattern, content)
                        if match:
@ -1637,6 +1635,8 @@ async def process_chat_response(
                    start_tag = content_blocks[-1]["start_tag"]
                    end_tag = content_blocks[-1]["end_tag"]

+                    print(start_tag, end_tag)
+
                    if end_tag.startswith("<") and end_tag.endswith(">"):
                        # Match end tag e.g., </tag>
                        end_tag_pattern = rf"{re.escape(end_tag)}"
@ -1714,8 +1714,17 @@ async def process_chat_response(
                                )

                        # Clean processed content
+                        start_tag_pattern = rf"{re.escape(start_tag)}"
+                        if start_tag.startswith("<") and start_tag.endswith(">"):
+                            # Match start tag e.g., <tag> or <tag attr="value">
+                            # remove both '<' and '>' from start_tag
+                            # Match start tag with attributes
+                            start_tag_pattern = (
+                                rf"<{re.escape(start_tag[1:-1])}(\s.*?)?>"
+                            )
+
                        content = re.sub(
-                            rf"<{re.escape(start_tag)}(.*?)>(.|\n)*?<{re.escape(end_tag)}>",
+                            rf"{start_tag_pattern}(.|\n)*?{re.escape(end_tag)}",
                            "",
                            content,
                            flags=re.DOTALL,