diff --git a/model/parsers/qwen3vl.go b/model/parsers/qwen3vl.go
index a8e7376c..87f49e89 100644
--- a/model/parsers/qwen3vl.go
+++ b/model/parsers/qwen3vl.go
@@ -16,6 +16,8 @@ const (
CollectingThinkingContent qwenParserState = iota
CollectingContent
CollectingToolContent
+ ThinkingDoneEatingWhitespace
+ ToolCallDoneEatingWhitespace
)
const (
@@ -111,17 +113,28 @@ func (p *Qwen3VLParser) parseEvents() []qwenEvent {
return all
}
-func emitContentBeforeTag(p *Qwen3VLParser, events []qwenEvent, tag string) []qwenEvent {
+func splitAtTag(p *Qwen3VLParser, tag string, trimAfter bool) (string, string) {
split := strings.SplitN(p.buffer.String(), tag, 2)
before := split[0]
before = strings.TrimRightFunc(before, unicode.IsSpace)
- if len(before) > 0 {
- events = append(events, qwenEventContent{content: before})
- }
after := split[1]
+ if trimAfter {
+ after = strings.TrimLeftFunc(after, unicode.IsSpace)
+ }
p.buffer.Reset()
p.buffer.WriteString(after)
- return events
+ return before, after // return events
+}
+
+func (p *Qwen3VLParser) eatLeadingWhitespaceAndTransitionTo(nextState qwenParserState) ([]qwenEvent, bool) {
+ trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
+ p.buffer.Reset()
+ if trimmed == "" {
+ return nil, false
+ }
+ p.state = nextState
+ p.buffer.WriteString(trimmed)
+ return nil, true
}
func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
@@ -130,7 +143,11 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
switch p.state {
case CollectingContent:
if strings.Contains(p.buffer.String(), toolOpenTag) {
- events = emitContentBeforeTag(p, events, toolOpenTag)
+ // events = emitContentBeforeTag(p, events, toolOpenTag)
+ before, _ := splitAtTag(p, toolOpenTag, false)
+ if len(before) > 0 {
+ events = append(events, qwenEventContent{content: before})
+ }
p.state = CollectingToolContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
@@ -167,27 +184,26 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
slog.Warn("qwen tool call closing tag found but no content before it")
}
- after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
+ after := split[1]
events = append(events, qwenEventRawToolCall{raw: before})
p.buffer.Reset()
p.buffer.WriteString(after)
- p.state = CollectingContent
+ p.state = ToolCallDoneEatingWhitespace
return events, true
} else {
return events, false
}
case CollectingThinkingContent:
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
- split := strings.SplitN(p.buffer.String(), thinkingCloseTag, 2)
- // before := split[0]
- before := strings.TrimRightFunc(split[0], unicode.IsSpace)
- after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
- if len(before) > 0 {
- events = append(events, qwenEventThinkingContent{content: before})
+ thinking, remaining := splitAtTag(p, thinkingCloseTag, true)
+ if len(thinking) > 0 {
+ events = append(events, qwenEventThinkingContent{content: thinking})
+ }
+ if remaining == "" {
+ p.state = ThinkingDoneEatingWhitespace
+ } else {
+ p.state = CollectingContent
}
- p.buffer.Reset()
- p.buffer.WriteString(after)
- p.state = CollectingContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
@@ -215,6 +231,10 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
}
return events, false
}
+ case ThinkingDoneEatingWhitespace:
+ return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
+ case ToolCallDoneEatingWhitespace:
+ return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
default:
panic("unreachable")
}
diff --git a/model/parsers/qwen3vl_nonthinking_test.go b/model/parsers/qwen3vl_nonthinking_test.go
index 74392946..e0b9a02b 100644
--- a/model/parsers/qwen3vl_nonthinking_test.go
+++ b/model/parsers/qwen3vl_nonthinking_test.go
@@ -653,3 +653,189 @@ func TestQwen3VLNonThinkingToolParser(t *testing.T) {
}
}
}
+
+func TestQwen3VLNonThinkingToolCallWhitespaceHandling(t *testing.T) {
+ type step struct {
+ input string
+ wantEvents []qwenEvent
+ }
+
+ cases := []struct {
+ desc string
+ steps []step
+ only bool
+ }{
+ {
+ desc: "whitespace inside tool call preserves trailing space",
+ steps: []step{
+ {
+ input: "before tool content after",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "before"},
+ qwenEventRawToolCall{raw: " tool content "},
+ qwenEventContent{content: "after"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace inside tool call preserves trailing space",
+ steps: []step{
+ {
+ input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t tool content \n\n\n\n\n\n\n after",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh"},
+ qwenEventRawToolCall{raw: " tool content "},
+ qwenEventContent{content: "after"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace inside tool call preserves trailing space",
+ steps: []step{
+ {
+ input: " tool content ",
+ wantEvents: []qwenEvent{
+ qwenEventRawToolCall{raw: " tool content "},
+ },
+ },
+ {
+ input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t anotha one \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
+ qwenEventRawToolCall{raw: " anotha one "},
+ qwenEventContent{content: "after \n\n\n\n\n\n blep"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace between content and tool call",
+ steps: []step{
+ {
+ input: "content \n tool \n more content",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "content"},
+ qwenEventRawToolCall{raw: "tool"},
+ qwenEventContent{content: "more content"},
+ },
+ },
+ },
+ },
+ {
+ desc: "consecutive tool calls with whitespace",
+ steps: []step{
+ {
+ input: "first \n second \n third",
+ wantEvents: []qwenEvent{
+ qwenEventRawToolCall{raw: "first"},
+ qwenEventRawToolCall{raw: "second"},
+ qwenEventRawToolCall{raw: "third"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace before and after tool open tag",
+ steps: []step{
+ {
+ input: "text \n content",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "text"},
+ qwenEventRawToolCall{raw: "content"},
+ },
+ },
+ },
+ },
+ {
+ desc: "unicode whitespace around tool calls",
+ steps: []step{
+ {
+ input: "text\u00a0\u3000content\u00a0\u3000text",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "text"},
+ qwenEventRawToolCall{raw: "content"},
+ qwenEventContent{content: "text"},
+ },
+ },
+ },
+ },
+ {
+ desc: "empty tool call with surrounding whitespace",
+ steps: []step{
+ {
+ input: "before after",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "before"},
+ qwenEventRawToolCall{raw: ""},
+ qwenEventContent{content: "after"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace in tool call split across chunks",
+ steps: []step{
+ {
+ input: "before ",
+ wantEvents: []qwenEvent{qwenEventContent{content: "before"}},
+ },
+ {
+ input: "tool",
+ wantEvents: []qwenEvent{},
+ },
+ {
+ input: " after",
+ wantEvents: []qwenEvent{
+ qwenEventRawToolCall{raw: " tool "},
+ qwenEventContent{content: "after"},
+ },
+ },
+ },
+ },
+ {
+ desc: "mixed whitespace types between tool calls",
+ steps: []step{
+ {
+ input: "first \t\n\r second",
+ wantEvents: []qwenEvent{
+ qwenEventRawToolCall{raw: "first"},
+ qwenEventRawToolCall{raw: "second"},
+ },
+ },
+ },
+ },
+ }
+
+ anyOnlies := false
+ for _, tc := range cases {
+ if tc.only {
+ anyOnlies = true
+ }
+ }
+
+ for _, tc := range cases {
+ if anyOnlies && !tc.only {
+ continue
+ }
+
+ t.Run(tc.desc, func(t *testing.T) {
+ parser := Qwen3VLParser{hasThinkingSupport: false}
+ parser.Init([]api.Tool{}, nil)
+
+ for i, step := range tc.steps {
+ parser.buffer.WriteString(step.input)
+ gotEvents := parser.parseEvents()
+
+ if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+ continue
+ }
+
+ if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+ t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+ }
+ }
+ })
+ }
+}
diff --git a/model/parsers/qwen3vl_thinking_test.go b/model/parsers/qwen3vl_thinking_test.go
index d85a60fd..04b2a7db 100644
--- a/model/parsers/qwen3vl_thinking_test.go
+++ b/model/parsers/qwen3vl_thinking_test.go
@@ -546,3 +546,333 @@ func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
}
}
}
+
+func TestQwen3VLThinkingWhitespaceHandling(t *testing.T) {
+ type step struct {
+ input string
+ wantEvents []qwenEvent
+ }
+
+ cases := []struct {
+ desc string
+ steps []step
+ only bool
+ }{
+ {
+ desc: "whitespace after thinking tag is trimmed",
+ steps: []step{
+ {
+ input: "thinking content \n\t content starts here",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "thinking content"},
+ qwenEventContent{content: "content starts here"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace after thinking tag split across chunks",
+ steps: []step{
+ {
+ input: "thinking content ",
+ wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
+ },
+ {
+ input: " \n\t",
+ wantEvents: []qwenEvent{},
+ },
+ {
+ input: "content",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "content"},
+ },
+ },
+ },
+ },
+ {
+ desc: "only whitespace after thinking tag",
+ steps: []step{
+ {
+ input: "thinking content \n\t ",
+ wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
+ },
+ },
+ },
+ {
+ desc: "multiple spaces and tabs after thinking",
+ steps: []step{
+ {
+ input: "think \t\t\n\n text",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "think"},
+ qwenEventContent{content: "text"},
+ },
+ },
+ },
+ },
+ {
+ desc: "trailing whitespace before thinking tag is preserved in content",
+ steps: []step{
+ {
+ input: "thinking with spaces text",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "thinking with spaces"},
+ qwenEventContent{content: "text"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace between thinking and tool call",
+ steps: []step{
+ {
+ input: "thinking \n {\"name\":\"test\"}",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "thinking"},
+ qwenEventRawToolCall{raw: "{\"name\":\"test\"}"},
+ },
+ },
+ },
+ },
+ {
+ desc: "no whitespace after thinking tag",
+ steps: []step{
+ {
+ input: "thinkingcontent",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "thinking"},
+ qwenEventContent{content: "content"},
+ },
+ },
+ },
+ },
+ {
+ desc: "unicode whitespace after thinking tag",
+ steps: []step{
+ {
+ input: "thinking\u00a0\u3000content",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "thinking"},
+ qwenEventContent{content: "content"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace split with partial thinking tag",
+ steps: []step{
+ {
+ input: "thinking \n",
+ wantEvents: []qwenEvent{},
+ },
+ {
+ input: " content",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "content"},
+ },
+ },
+ },
+ },
+ {
+ desc: "empty thinking tag with whitespace after",
+ steps: []step{
+ {
+ input: " \ncontent",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "content"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace inside tool call preserves trailing space",
+ steps: []step{
+ {
+ input: "bruh \n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t tool content \n\n\n\n\n\n\n after",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "bruh"},
+ qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
+ qwenEventRawToolCall{raw: " tool content "},
+ qwenEventContent{content: "after"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace inside tool call preserves trailing space",
+ steps: []step{
+ {
+ input: "bruh shdjfhksdhfj ",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "bruh"},
+ qwenEventContent{content: "shdjfhksdhfj"},
+ },
+ },
+ {
+ input: "another word ",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: " another word"},
+ },
+ },
+ {
+ input: " tool content ",
+ wantEvents: []qwenEvent{
+ qwenEventRawToolCall{raw: " tool content "},
+ },
+ },
+ {
+ input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t anotha one \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
+ qwenEventRawToolCall{raw: " anotha one "},
+ qwenEventContent{content: "after \n\n\n\n\n\n blep"},
+ },
+ },
+ },
+ },
+ }
+
+ anyOnlies := false
+ for _, tc := range cases {
+ if tc.only {
+ anyOnlies = true
+ }
+ }
+
+ for _, tc := range cases {
+ if anyOnlies && !tc.only {
+ continue
+ }
+
+ t.Run(tc.desc, func(t *testing.T) {
+ parser := Qwen3VLParser{hasThinkingSupport: true}
+ parser.Init([]api.Tool{}, nil)
+
+ for i, step := range tc.steps {
+ parser.buffer.WriteString(step.input)
+ gotEvents := parser.parseEvents()
+
+ if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+ continue
+ }
+
+ if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+ t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+ }
+ }
+ })
+ }
+}
+
+func TestQwen3VLToolCallWhitespaceHandling(t *testing.T) {
+ type step struct {
+ input string
+ wantEvents []qwenEvent
+ }
+
+ cases := []struct {
+ desc string
+ steps []step
+ only bool
+ prefillMsg *api.Message // allows starting in content mode instead of thinking mode
+ }{
+ {
+ desc: "whitespace inside tool call is fully preserved (with content prefill)",
+ prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
+ steps: []step{
+ {
+ input: "before tool content \n after",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "before"},
+ qwenEventRawToolCall{raw: " tool content "},
+ qwenEventContent{content: "after"},
+ },
+ },
+ },
+ },
+ {
+ desc: "whitespace after tool call trimmed across chunks (with content prefill)",
+ prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
+ steps: []step{
+ {
+ input: "beforetool ",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "before"},
+ qwenEventRawToolCall{raw: "tool"},
+ },
+ },
+ {
+ input: "\n\t",
+ wantEvents: []qwenEvent{},
+ },
+ {
+ input: "after \n this is a song",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "after \n this is a song"},
+ },
+ },
+ },
+ },
+ {
+ desc: "multiple tool calls with whitespace between (with content prefill)",
+ prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
+ steps: []step{
+ {
+ input: "first \n second",
+ wantEvents: []qwenEvent{
+ qwenEventRawToolCall{raw: "first"},
+ qwenEventRawToolCall{raw: "second"},
+ },
+ },
+ },
+ },
+ {
+ desc: "thinking with whitespace then tool call",
+ steps: []step{
+ {
+ input: "thinking \n tool \n content",
+ wantEvents: []qwenEvent{
+ qwenEventThinkingContent{content: "thinking"},
+ qwenEventRawToolCall{raw: "tool"},
+ qwenEventContent{content: "content"},
+ },
+ },
+ },
+ },
+ }
+
+ anyOnlies := false
+ for _, tc := range cases {
+ if tc.only {
+ anyOnlies = true
+ }
+ }
+
+ for _, tc := range cases {
+ if anyOnlies && !tc.only {
+ continue
+ }
+
+ t.Run(tc.desc, func(t *testing.T) {
+ parser := Qwen3VLParser{hasThinkingSupport: true}
+ parser.Init([]api.Tool{}, tc.prefillMsg)
+
+ for i, step := range tc.steps {
+ parser.buffer.WriteString(step.input)
+ gotEvents := parser.parseEvents()
+
+ if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+ continue
+ }
+
+ if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+ t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+ }
+ }
+ })
+ }
+}