Removing whitespace between Thinking and Content in Qwen3VL (#12838)

Eats extra whitespace at the end/beginning of content
This commit is contained in:
Grace 2025-10-29 15:14:28 -07:00 committed by GitHub
parent c88647104d
commit 0a2d92081b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 553 additions and 17 deletions

View File

@ -16,6 +16,8 @@ const (
CollectingThinkingContent qwenParserState = iota
CollectingContent
CollectingToolContent
ThinkingDoneEatingWhitespace
ToolCallDoneEatingWhitespace
)
const (
@ -111,17 +113,28 @@ func (p *Qwen3VLParser) parseEvents() []qwenEvent {
return all
}
func emitContentBeforeTag(p *Qwen3VLParser, events []qwenEvent, tag string) []qwenEvent {
func splitAtTag(p *Qwen3VLParser, tag string, trimAfter bool) (string, string) {
split := strings.SplitN(p.buffer.String(), tag, 2)
before := split[0]
before = strings.TrimRightFunc(before, unicode.IsSpace)
if len(before) > 0 {
events = append(events, qwenEventContent{content: before})
}
after := split[1]
if trimAfter {
after = strings.TrimLeftFunc(after, unicode.IsSpace)
}
p.buffer.Reset()
p.buffer.WriteString(after)
return events
return before, after // return events
}
func (p *Qwen3VLParser) eatLeadingWhitespaceAndTransitionTo(nextState qwenParserState) ([]qwenEvent, bool) {
trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
p.buffer.Reset()
if trimmed == "" {
return nil, false
}
p.state = nextState
p.buffer.WriteString(trimmed)
return nil, true
}
func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
@ -130,7 +143,11 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
switch p.state {
case CollectingContent:
if strings.Contains(p.buffer.String(), toolOpenTag) {
events = emitContentBeforeTag(p, events, toolOpenTag)
// events = emitContentBeforeTag(p, events, toolOpenTag)
before, _ := splitAtTag(p, toolOpenTag, false)
if len(before) > 0 {
events = append(events, qwenEventContent{content: before})
}
p.state = CollectingToolContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
@ -167,27 +184,26 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
slog.Warn("qwen tool call closing tag found but no content before it")
}
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
after := split[1]
events = append(events, qwenEventRawToolCall{raw: before})
p.buffer.Reset()
p.buffer.WriteString(after)
p.state = CollectingContent
p.state = ToolCallDoneEatingWhitespace
return events, true
} else {
return events, false
}
case CollectingThinkingContent:
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
split := strings.SplitN(p.buffer.String(), thinkingCloseTag, 2)
// before := split[0]
before := strings.TrimRightFunc(split[0], unicode.IsSpace)
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
if len(before) > 0 {
events = append(events, qwenEventThinkingContent{content: before})
thinking, remaining := splitAtTag(p, thinkingCloseTag, true)
if len(thinking) > 0 {
events = append(events, qwenEventThinkingContent{content: thinking})
}
if remaining == "" {
p.state = ThinkingDoneEatingWhitespace
} else {
p.state = CollectingContent
}
p.buffer.Reset()
p.buffer.WriteString(after)
p.state = CollectingContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
@ -215,6 +231,10 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
}
return events, false
}
case ThinkingDoneEatingWhitespace:
return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
case ToolCallDoneEatingWhitespace:
return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
default:
panic("unreachable")
}

View File

@ -653,3 +653,189 @@ func TestQwen3VLNonThinkingToolParser(t *testing.T) {
}
}
}
func TestQwen3VLNonThinkingToolCallWhitespaceHandling(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "before<tool_call> tool content </tool_call>after",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
qwenEventRawToolCall{raw: " tool content "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> tool content </tool_call> \n\n\n\n\n\n\n after",
wantEvents: []qwenEvent{
qwenEventContent{content: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh"},
qwenEventRawToolCall{raw: " tool content "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "<tool_call> tool content </tool_call> ",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: " tool content "},
},
},
{
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> anotha one </tool_call> \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
wantEvents: []qwenEvent{
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
qwenEventRawToolCall{raw: " anotha one "},
qwenEventContent{content: "after \n\n\n\n\n\n blep"},
},
},
},
},
{
desc: "whitespace between content and tool call",
steps: []step{
{
input: "content \n <tool_call>tool</tool_call> \n more content",
wantEvents: []qwenEvent{
qwenEventContent{content: "content"},
qwenEventRawToolCall{raw: "tool"},
qwenEventContent{content: "more content"},
},
},
},
},
{
desc: "consecutive tool calls with whitespace",
steps: []step{
{
input: "<tool_call>first</tool_call> \n <tool_call>second</tool_call> \n <tool_call>third</tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "first"},
qwenEventRawToolCall{raw: "second"},
qwenEventRawToolCall{raw: "third"},
},
},
},
},
{
desc: "whitespace before and after tool open tag",
steps: []step{
{
input: "text \n <tool_call>content</tool_call>",
wantEvents: []qwenEvent{
qwenEventContent{content: "text"},
qwenEventRawToolCall{raw: "content"},
},
},
},
},
{
desc: "unicode whitespace around tool calls",
steps: []step{
{
input: "text\u00a0\u3000<tool_call>content</tool_call>\u00a0\u3000text",
wantEvents: []qwenEvent{
qwenEventContent{content: "text"},
qwenEventRawToolCall{raw: "content"},
qwenEventContent{content: "text"},
},
},
},
},
{
desc: "empty tool call with surrounding whitespace",
steps: []step{
{
input: "before <tool_call></tool_call> after",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
qwenEventRawToolCall{raw: ""},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace in tool call split across chunks",
steps: []step{
{
input: "before<tool_call> ",
wantEvents: []qwenEvent{qwenEventContent{content: "before"}},
},
{
input: "tool",
wantEvents: []qwenEvent{},
},
{
input: " </tool_call>after",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: " tool "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "mixed whitespace types between tool calls",
steps: []step{
{
input: "<tool_call>first</tool_call> \t\n\r <tool_call>second</tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "first"},
qwenEventRawToolCall{raw: "second"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: false}
parser.Init([]api.Tool{}, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}

View File

@ -546,3 +546,333 @@ func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
}
}
}
func TestQwen3VLThinkingWhitespaceHandling(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "whitespace after thinking tag is trimmed",
steps: []step{
{
input: "thinking content</think> \n\t content starts here",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking content"},
qwenEventContent{content: "content starts here"},
},
},
},
},
{
desc: "whitespace after thinking tag split across chunks",
steps: []step{
{
input: "thinking content</think> ",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
},
{
input: " \n\t",
wantEvents: []qwenEvent{},
},
{
input: "content",
wantEvents: []qwenEvent{
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "only whitespace after thinking tag",
steps: []step{
{
input: "thinking content</think> \n\t ",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
},
},
},
{
desc: "multiple spaces and tabs after thinking",
steps: []step{
{
input: "think</think> \t\t\n\n text",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "think"},
qwenEventContent{content: "text"},
},
},
},
},
{
desc: "trailing whitespace before thinking tag is preserved in content",
steps: []step{
{
input: "thinking with spaces </think>text",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking with spaces"},
qwenEventContent{content: "text"},
},
},
},
},
{
desc: "whitespace between thinking and tool call",
steps: []step{
{
input: "thinking</think> \n <tool_call>{\"name\":\"test\"}</tool_call>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking"},
qwenEventRawToolCall{raw: "{\"name\":\"test\"}"},
},
},
},
},
{
desc: "no whitespace after thinking tag",
steps: []step{
{
input: "thinking</think>content",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking"},
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "unicode whitespace after thinking tag",
steps: []step{
{
input: "thinking</think>\u00a0\u3000content",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking"},
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "whitespace split with partial thinking tag",
steps: []step{
{
input: "thinking</th",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking"}},
},
{
input: "ink> \n",
wantEvents: []qwenEvent{},
},
{
input: " content",
wantEvents: []qwenEvent{
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "empty thinking tag with whitespace after",
steps: []step{
{
input: "</think> \ncontent",
wantEvents: []qwenEvent{
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "bruh</think> \n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> tool content </tool_call> \n\n\n\n\n\n\n after",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "bruh"},
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
qwenEventRawToolCall{raw: " tool content "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "bruh</think> shdjfhksdhfj ",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "bruh"},
qwenEventContent{content: "shdjfhksdhfj"},
},
},
{
input: "another word ",
wantEvents: []qwenEvent{
qwenEventContent{content: " another word"},
},
},
{
input: "<tool_call> tool content </tool_call> ",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: " tool content "},
},
},
{
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> anotha one </tool_call> \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
wantEvents: []qwenEvent{
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
qwenEventRawToolCall{raw: " anotha one "},
qwenEventContent{content: "after \n\n\n\n\n\n blep"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwen3VLToolCallWhitespaceHandling(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
prefillMsg *api.Message // allows starting in content mode instead of thinking mode
}{
{
desc: "whitespace inside tool call is fully preserved (with content prefill)",
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
steps: []step{
{
input: "before<tool_call> tool content </tool_call> \n after",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
qwenEventRawToolCall{raw: " tool content "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace after tool call trimmed across chunks (with content prefill)",
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
steps: []step{
{
input: "before<tool_call>tool</tool_call> ",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
qwenEventRawToolCall{raw: "tool"},
},
},
{
input: "\n\t",
wantEvents: []qwenEvent{},
},
{
input: "after \n this is a song",
wantEvents: []qwenEvent{
qwenEventContent{content: "after \n this is a song"},
},
},
},
},
{
desc: "multiple tool calls with whitespace between (with content prefill)",
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
steps: []step{
{
input: "<tool_call>first</tool_call> \n <tool_call>second</tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "first"},
qwenEventRawToolCall{raw: "second"},
},
},
},
},
{
desc: "thinking with whitespace then tool call",
steps: []step{
{
input: "thinking</think> \n <tool_call>tool</tool_call> \n content",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking"},
qwenEventRawToolCall{raw: "tool"},
qwenEventContent{content: "content"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, tc.prefillMsg)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}