fix: normalize excessive whitespace in streaming markdown responses

- Create comprehensive text normalization utility to clean up excessive newlines
- Apply normalization to streaming tokens in session-chat.tsx
- Apply normalization to rendered text in conversation-page.tsx
- Add test case demonstrating the fix for excessive empty lines
- Preserve proper markdown formatting while removing visual gaps

Co-Authored-By: PromptEngineer <jnfarooq@outlook.com>
This commit is contained in:
Devin AI 2025-07-15 07:30:00 +00:00
parent 6af1165894
commit dc9722de28
4 changed files with 155 additions and 12 deletions

View File

@ -10,6 +10,7 @@ import { ScrollArea } from "@/components/ui/scroll-area"
import { ChatMessage } from "@/lib/api"
import { cn } from "@/lib/utils"
import Markdown from "@/components/Markdown"
import { normalizeWhitespace } from "@/utils/textNormalization"
interface ConversationPageProps {
messages: ChatMessage[]
@ -110,7 +111,7 @@ function ThinkingText({ text }: { text: string }) {
</details>
)}
{visibleText.trim() && (
<Markdown text={visibleText} className="whitespace-pre-wrap" />
<Markdown text={normalizeWhitespace(visibleText)} className="whitespace-pre-wrap" />
)}
</>
);
@ -151,7 +152,7 @@ function StructuredMessageBlock({ content }: { content: Array<Record<string, any
{step.key === 'final' && step.details && typeof step.details === 'object' && !Array.isArray(step.details) ? (
<div className="space-y-3">
<div className="whitespace-pre-wrap text-gray-100">
<ThinkingText text={step.details.answer} />
<ThinkingText text={normalizeWhitespace(step.details.answer)} />
</div>
{!hasSubAnswers && step.details.source_documents && step.details.source_documents.length > 0 && (
<CitationsBlock docs={step.details.source_documents} />
@ -159,7 +160,7 @@ function StructuredMessageBlock({ content }: { content: Array<Record<string, any
</div>
) : step.key === 'final' && step.details && typeof step.details === 'string' ? (
<div className="whitespace-pre-wrap text-gray-100">
<ThinkingText text={step.details} />
<ThinkingText text={normalizeWhitespace(step.details)} />
</div>
) : Array.isArray(step.details) ? (
step.key === 'decompose' && step.details.every((d: any)=> typeof d === 'string') ? (
@ -175,7 +176,7 @@ function StructuredMessageBlock({ content }: { content: Array<Record<string, any
{step.details.map((detail: any, idx: number) => (
<div key={idx} className="border-l-2 border-blue-400 pl-2">
<div className="font-semibold">{detail.question}</div>
<div><ThinkingText text={detail.answer} /></div>
<div><ThinkingText text={normalizeWhitespace(detail.answer)} /></div>
{detail.source_documents && detail.source_documents.length > 0 && (
<CitationsBlock docs={detail.source_documents} />
)}
@ -185,7 +186,7 @@ function StructuredMessageBlock({ content }: { content: Array<Record<string, any
)
) : (
// Handle string details
<ThinkingText text={step.details as string} />
<ThinkingText text={normalizeWhitespace(step.details as string)} />
)}
</div>
);
@ -327,7 +328,7 @@ export function ConversationPage({
) : (
<div className="whitespace-pre-wrap text-base leading-relaxed">
{typeof message.content === 'string'
? <ThinkingText text={message.content} />
? <ThinkingText text={normalizeWhitespace(message.content)} />
: <StructuredMessageBlock content={message.content} />
}
</div>

View File

@ -7,6 +7,7 @@ import { EmptyChatState } from "./empty-chat-state"
import { ChatMessage, ChatSession, chatAPI, generateUUID } from "@/lib/api"
import { AttachedFile } from "@/lib/types"
import { useEffect, useState, forwardRef, useImperativeHandle, useCallback } from "react"
import { normalizeStreamingToken } from "@/utils/textNormalization"
import { Button } from "./button"
import type { Step } from '@/lib/api'
import { ChatSettingsModal } from '@/components/ui/chat-settings-modal'
@ -368,8 +369,7 @@ export const SessionChat = forwardRef<SessionChatRef, SessionChatProps>(({
return m; // skip empty/whitespace-only chunks
}
let updated = current.endsWith(tok) ? current : current + tok;
// Clean up excessive newlines
updated = updated.replace(/\n{3,}/g, '\n\n');
updated = normalizeStreamingToken('', updated);
if (steps[finalIdx].key === 'direct') {
steps[0].details = updated;
} else {
@ -396,8 +396,7 @@ export const SessionChat = forwardRef<SessionChatRef, SessionChatProps>(({
const curAns: string = detailsArr[idx].answer || '';
if (!curAns.endsWith(tok)) {
let updatedAnswer = curAns + tok;
// Clean up excessive newlines
updatedAnswer = updatedAnswer.replace(/\n{3,}/g, '\n\n');
updatedAnswer = normalizeStreamingToken('', updatedAnswer);
detailsArr[idx].answer = updatedAnswer;
}
steps[5].details = detailsArr;

View File

@ -0,0 +1,63 @@
/**
* Comprehensive text normalization utility for cleaning up excessive whitespace
* in streaming markdown responses to prevent large visual gaps in the UI.
*/
export function normalizeWhitespace(text: string): string {
if (!text || typeof text !== 'string') {
return '';
}
text = text.replace(/\n{3,}/g, '\n\n');
text = text.replace(/[ \t]+$/gm, '');
text = text.replace(/[ \t]{3,}/g, ' ');
text = text.replace(/[ \t]*\n[ \t]*\n[ \t]*\n/g, '\n\n');
text = text.replace(/[ \t]+\n/g, '\n');
text = text.trim();
return text;
}
/**
* Specialized normalization for streaming tokens to prevent accumulation
* of excessive whitespace during real-time text generation.
*/
export function normalizeStreamingToken(currentText: string, newToken: string): string {
if (!newToken || typeof newToken !== 'string') {
return currentText;
}
let combined = currentText + newToken;
combined = normalizeWhitespace(combined);
return combined;
}
/**
* Check if text contains excessive whitespace that needs normalization
*/
export function hasExcessiveWhitespace(text: string): boolean {
if (!text || typeof text !== 'string') {
return false;
}
if (/\n{3,}/.test(text)) {
return true;
}
if (/[ \t]{3,}/.test(text)) {
return true;
}
if (/[ \t]*\n[ \t]*\n[ \t]*\n/.test(text)) {
return true;
}
return false;
}

View File

@ -0,0 +1,80 @@
const testMarkdownWithExcessiveNewlines = `# Test Response
This is a test response with excessive newlines.
Here's some content after multiple empty lines.
## Section Header
More content here.
### Subsection
Final content with lots of spacing.
The end.`;
const testStreamingTokens = [
"# Test Response\n\n",
"This is a test response",
" with excessive newlines.\n\n\n\n",
"Here's some content after",
" multiple empty lines.\n\n\n\n\n",
"## Section Header\n\n",
"More content here.\n\n\n\n\n\n\n",
"### Subsection\n\n",
"Final content with lots",
" of spacing.\n\n\n\n\n",
"The end."
];
function currentCleanup(text) {
return text.replace(/\n{3,}/g, '\n\n');
}
function improvedCleanup(text) {
text = text.replace(/\n{3,}/g, '\n\n');
text = text.replace(/[ \t]+$/gm, '');
text = text.replace(/[ \t]{3,}/g, ' ');
text = text.replace(/[ \t]*\n[ \t]*\n[ \t]*\n/g, '\n\n');
text = text.trim();
return text;
}
console.log("=== ORIGINAL TEXT ===");
console.log(JSON.stringify(testMarkdownWithExcessiveNewlines));
console.log("\n=== CURRENT CLEANUP ===");
console.log(JSON.stringify(currentCleanup(testMarkdownWithExcessiveNewlines)));
console.log("\n=== IMPROVED CLEANUP ===");
console.log(JSON.stringify(improvedCleanup(testMarkdownWithExcessiveNewlines)));
console.log("\n=== STREAMING SIMULATION ===");
let streamedText = "";
testStreamingTokens.forEach((token, i) => {
streamedText += token;
console.log(`Token ${i + 1}: "${token}"`);
console.log(`Accumulated (current): "${currentCleanup(streamedText)}"`);
console.log(`Accumulated (improved): "${improvedCleanup(streamedText)}"`);
console.log("---");
});