mirror of
https://github.com/zebrajr/localGPT.git
synced 2025-12-06 00:20:19 +01:00
fix: normalize excessive whitespace in streaming markdown responses
- Create comprehensive text normalization utility to clean up excessive newlines - Apply normalization to streaming tokens in session-chat.tsx - Apply normalization to rendered text in conversation-page.tsx - Add test case demonstrating the fix for excessive empty lines - Preserve proper markdown formatting while removing visual gaps Co-Authored-By: PromptEngineer <jnfarooq@outlook.com>
This commit is contained in:
parent
6af1165894
commit
dc9722de28
|
|
@ -10,6 +10,7 @@ import { ScrollArea } from "@/components/ui/scroll-area"
|
|||
import { ChatMessage } from "@/lib/api"
|
||||
import { cn } from "@/lib/utils"
|
||||
import Markdown from "@/components/Markdown"
|
||||
import { normalizeWhitespace } from "@/utils/textNormalization"
|
||||
|
||||
interface ConversationPageProps {
|
||||
messages: ChatMessage[]
|
||||
|
|
@ -110,7 +111,7 @@ function ThinkingText({ text }: { text: string }) {
|
|||
</details>
|
||||
)}
|
||||
{visibleText.trim() && (
|
||||
<Markdown text={visibleText} className="whitespace-pre-wrap" />
|
||||
<Markdown text={normalizeWhitespace(visibleText)} className="whitespace-pre-wrap" />
|
||||
)}
|
||||
</>
|
||||
);
|
||||
|
|
@ -151,7 +152,7 @@ function StructuredMessageBlock({ content }: { content: Array<Record<string, any
|
|||
{step.key === 'final' && step.details && typeof step.details === 'object' && !Array.isArray(step.details) ? (
|
||||
<div className="space-y-3">
|
||||
<div className="whitespace-pre-wrap text-gray-100">
|
||||
<ThinkingText text={step.details.answer} />
|
||||
<ThinkingText text={normalizeWhitespace(step.details.answer)} />
|
||||
</div>
|
||||
{!hasSubAnswers && step.details.source_documents && step.details.source_documents.length > 0 && (
|
||||
<CitationsBlock docs={step.details.source_documents} />
|
||||
|
|
@ -159,7 +160,7 @@ function StructuredMessageBlock({ content }: { content: Array<Record<string, any
|
|||
</div>
|
||||
) : step.key === 'final' && step.details && typeof step.details === 'string' ? (
|
||||
<div className="whitespace-pre-wrap text-gray-100">
|
||||
<ThinkingText text={step.details} />
|
||||
<ThinkingText text={normalizeWhitespace(step.details)} />
|
||||
</div>
|
||||
) : Array.isArray(step.details) ? (
|
||||
step.key === 'decompose' && step.details.every((d: any)=> typeof d === 'string') ? (
|
||||
|
|
@ -175,7 +176,7 @@ function StructuredMessageBlock({ content }: { content: Array<Record<string, any
|
|||
{step.details.map((detail: any, idx: number) => (
|
||||
<div key={idx} className="border-l-2 border-blue-400 pl-2">
|
||||
<div className="font-semibold">{detail.question}</div>
|
||||
<div><ThinkingText text={detail.answer} /></div>
|
||||
<div><ThinkingText text={normalizeWhitespace(detail.answer)} /></div>
|
||||
{detail.source_documents && detail.source_documents.length > 0 && (
|
||||
<CitationsBlock docs={detail.source_documents} />
|
||||
)}
|
||||
|
|
@ -185,7 +186,7 @@ function StructuredMessageBlock({ content }: { content: Array<Record<string, any
|
|||
)
|
||||
) : (
|
||||
// Handle string details
|
||||
<ThinkingText text={step.details as string} />
|
||||
<ThinkingText text={normalizeWhitespace(step.details as string)} />
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
|
|
@ -327,7 +328,7 @@ export function ConversationPage({
|
|||
) : (
|
||||
<div className="whitespace-pre-wrap text-base leading-relaxed">
|
||||
{typeof message.content === 'string'
|
||||
? <ThinkingText text={message.content} />
|
||||
? <ThinkingText text={normalizeWhitespace(message.content)} />
|
||||
: <StructuredMessageBlock content={message.content} />
|
||||
}
|
||||
</div>
|
||||
|
|
@ -413,4 +414,4 @@ export function ConversationPage({
|
|||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@ import { EmptyChatState } from "./empty-chat-state"
|
|||
import { ChatMessage, ChatSession, chatAPI, generateUUID } from "@/lib/api"
|
||||
import { AttachedFile } from "@/lib/types"
|
||||
import { useEffect, useState, forwardRef, useImperativeHandle, useCallback } from "react"
|
||||
import { normalizeStreamingToken } from "@/utils/textNormalization"
|
||||
import { Button } from "./button"
|
||||
import type { Step } from '@/lib/api'
|
||||
import { ChatSettingsModal } from '@/components/ui/chat-settings-modal'
|
||||
|
|
@ -368,8 +369,7 @@ export const SessionChat = forwardRef<SessionChatRef, SessionChatProps>(({
|
|||
return m; // skip empty/whitespace-only chunks
|
||||
}
|
||||
let updated = current.endsWith(tok) ? current : current + tok;
|
||||
// Clean up excessive newlines
|
||||
updated = updated.replace(/\n{3,}/g, '\n\n');
|
||||
updated = normalizeStreamingToken('', updated);
|
||||
if (steps[finalIdx].key === 'direct') {
|
||||
steps[0].details = updated;
|
||||
} else {
|
||||
|
|
@ -396,8 +396,7 @@ export const SessionChat = forwardRef<SessionChatRef, SessionChatProps>(({
|
|||
const curAns: string = detailsArr[idx].answer || '';
|
||||
if (!curAns.endsWith(tok)) {
|
||||
let updatedAnswer = curAns + tok;
|
||||
// Clean up excessive newlines
|
||||
updatedAnswer = updatedAnswer.replace(/\n{3,}/g, '\n\n');
|
||||
updatedAnswer = normalizeStreamingToken('', updatedAnswer);
|
||||
detailsArr[idx].answer = updatedAnswer;
|
||||
}
|
||||
steps[5].details = detailsArr;
|
||||
|
|
@ -681,4 +680,4 @@ export const SessionChat = forwardRef<SessionChatRef, SessionChatProps>(({
|
|||
)
|
||||
})
|
||||
|
||||
SessionChat.displayName = "SessionChat"
|
||||
SessionChat.displayName = "SessionChat"
|
||||
63
src/utils/textNormalization.ts
Normal file
63
src/utils/textNormalization.ts
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Comprehensive text normalization utility for cleaning up excessive whitespace
|
||||
* in streaming markdown responses to prevent large visual gaps in the UI.
|
||||
*/
|
||||
|
||||
export function normalizeWhitespace(text: string): string {
|
||||
if (!text || typeof text !== 'string') {
|
||||
return '';
|
||||
}
|
||||
|
||||
text = text.replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
text = text.replace(/[ \t]+$/gm, '');
|
||||
|
||||
text = text.replace(/[ \t]{3,}/g, ' ');
|
||||
|
||||
text = text.replace(/[ \t]*\n[ \t]*\n[ \t]*\n/g, '\n\n');
|
||||
|
||||
text = text.replace(/[ \t]+\n/g, '\n');
|
||||
|
||||
text = text.trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specialized normalization for streaming tokens to prevent accumulation
|
||||
* of excessive whitespace during real-time text generation.
|
||||
*/
|
||||
export function normalizeStreamingToken(currentText: string, newToken: string): string {
|
||||
if (!newToken || typeof newToken !== 'string') {
|
||||
return currentText;
|
||||
}
|
||||
|
||||
let combined = currentText + newToken;
|
||||
|
||||
combined = normalizeWhitespace(combined);
|
||||
|
||||
return combined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if text contains excessive whitespace that needs normalization
|
||||
*/
|
||||
export function hasExcessiveWhitespace(text: string): boolean {
|
||||
if (!text || typeof text !== 'string') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/\n{3,}/.test(text)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (/[ \t]{3,}/.test(text)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (/[ \t]*\n[ \t]*\n[ \t]*\n/.test(text)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
80
test_markdown_streaming.js
Normal file
80
test_markdown_streaming.js
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
|
||||
const testMarkdownWithExcessiveNewlines = `# Test Response
|
||||
|
||||
This is a test response with excessive newlines.
|
||||
|
||||
|
||||
|
||||
Here's some content after multiple empty lines.
|
||||
|
||||
|
||||
|
||||
|
||||
## Section Header
|
||||
|
||||
More content here.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Subsection
|
||||
|
||||
Final content with lots of spacing.
|
||||
|
||||
|
||||
|
||||
|
||||
The end.`;
|
||||
|
||||
const testStreamingTokens = [
|
||||
"# Test Response\n\n",
|
||||
"This is a test response",
|
||||
" with excessive newlines.\n\n\n\n",
|
||||
"Here's some content after",
|
||||
" multiple empty lines.\n\n\n\n\n",
|
||||
"## Section Header\n\n",
|
||||
"More content here.\n\n\n\n\n\n\n",
|
||||
"### Subsection\n\n",
|
||||
"Final content with lots",
|
||||
" of spacing.\n\n\n\n\n",
|
||||
"The end."
|
||||
];
|
||||
|
||||
function currentCleanup(text) {
|
||||
return text.replace(/\n{3,}/g, '\n\n');
|
||||
}
|
||||
|
||||
function improvedCleanup(text) {
|
||||
text = text.replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
text = text.replace(/[ \t]+$/gm, '');
|
||||
|
||||
text = text.replace(/[ \t]{3,}/g, ' ');
|
||||
|
||||
text = text.replace(/[ \t]*\n[ \t]*\n[ \t]*\n/g, '\n\n');
|
||||
|
||||
text = text.trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
console.log("=== ORIGINAL TEXT ===");
|
||||
console.log(JSON.stringify(testMarkdownWithExcessiveNewlines));
|
||||
|
||||
console.log("\n=== CURRENT CLEANUP ===");
|
||||
console.log(JSON.stringify(currentCleanup(testMarkdownWithExcessiveNewlines)));
|
||||
|
||||
console.log("\n=== IMPROVED CLEANUP ===");
|
||||
console.log(JSON.stringify(improvedCleanup(testMarkdownWithExcessiveNewlines)));
|
||||
|
||||
console.log("\n=== STREAMING SIMULATION ===");
|
||||
let streamedText = "";
|
||||
testStreamingTokens.forEach((token, i) => {
|
||||
streamedText += token;
|
||||
console.log(`Token ${i + 1}: "${token}"`);
|
||||
console.log(`Accumulated (current): "${currentCleanup(streamedText)}"`);
|
||||
console.log(`Accumulated (improved): "${improvedCleanup(streamedText)}"`);
|
||||
console.log("---");
|
||||
});
|
||||
Loading…
Reference in New Issue
Block a user