Grace/qwen3 thinking (#12647)

* changing initial status to take into consideration prefill

* Add seperate strings for content and thinking builder

* thinking tests

* remove white space from string before closing think tag
This commit is contained in:
Grace 2025-10-16 15:29:41 -07:00 committed by GitHub
parent 1813ff85a0
commit e2a0b24435
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 231 additions and 15 deletions

View File

@ -45,6 +45,9 @@ func ParserForName(name string) Parser {
case "qwen3-vl-instruct":
parser := &Qwen3VLParser{hasThinkingSupport: false}
return parser
case "qwen3-vl-thinking":
parser := &Qwen3VLParser{hasThinkingSupport: true}
return parser
case "passthrough":
return &PassthroughParser{}
case "harmony":

View File

@ -22,7 +22,6 @@ const (
thinkingCloseTag = "</think>"
)
// TODO(gguo): add a field for isThinking
type Qwen3VLParser struct {
state qwenParserState
buffer strings.Builder
@ -34,21 +33,28 @@ func (p *Qwen3VLParser) HasToolSupport() bool {
return true
}
// TODO(gguo): changes this to reference an objects param
func (p *Qwen3VLParser) HasThinkingSupport() bool {
return p.hasThinkingSupport
}
func (p *Qwen3VLParser) initialState() qwenParserState {
if p.HasThinkingSupport() { // has thinking, start from collecting thinking content
return CollectingThinkingContent
func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
prefill := lastMessage != nil && lastMessage.Role == "assistant"
if !p.HasThinkingSupport() {
p.state = CollectingContent
return
}
return CollectingContent
if prefill && lastMessage.Content != "" {
p.state = CollectingContent
return
}
p.state = CollectingThinkingContent
}
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
p.tools = tools
p.state = p.initialState()
p.setInitialState(lastMessage)
return tools
}
@ -63,7 +69,8 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
events := p.parseEvents()
var toolCalls []api.ToolCall
var sb strings.Builder
var contentSb strings.Builder
var thinkingSb strings.Builder
for _, event := range events {
switch event := event.(type) {
case qwenEventRawToolCall:
@ -74,15 +81,15 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
}
toolCalls = append(toolCalls, toolCall)
case qwenEventThinkingContent:
sb.WriteString(event.content)
thinkingSb.WriteString(event.content)
case qwenEventContent:
// TODO(drifkin): if the same turn contains multiple interleaved content
// events, we naively append them together here.
sb.WriteString(event.content)
contentSb.WriteString(event.content)
}
}
return sb.String(), "", toolCalls, nil
return contentSb.String(), thinkingSb.String(), toolCalls, nil
}
func (p *Qwen3VLParser) parseEvents() []qwenEvent {
@ -155,7 +162,7 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
case CollectingToolContent:
if strings.Contains(p.buffer.String(), toolCloseTag) {
split := strings.SplitN(p.buffer.String(), toolCloseTag, 2)
before := split[0]
before := split[0] // do we also need to do it to tool calls?
if len(before) == 0 {
slog.Warn("qwen tool call closing tag found but no content before it")
}
@ -169,10 +176,11 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
} else {
return events, false
}
case CollectingThinkingContent: // so we want to hip the unambiguous stuff
case CollectingThinkingContent:
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
split := strings.SplitN(p.buffer.String(), thinkingCloseTag, 2)
before := split[0]
// before := split[0]
before := strings.TrimRightFunc(split[0], unicode.IsSpace)
if len(before) == 0 {
slog.Warn("qwen tool call closing tag found but no content before it")
}
@ -184,7 +192,7 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
p.buffer.WriteString(after)
p.state = CollectingContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 { // we see part of a close thinking tag
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen

View File

@ -344,3 +344,205 @@ func TestQwen3VLThinkingToolParser(t *testing.T) {
}
}
}
func TestQwen3VLParserState(t *testing.T) {
cases := []struct {
desc string
hasThinking bool
last *api.Message
wantState qwenParserState
}{
{
desc: "no thinking support => CollectingContent",
hasThinking: false,
last: nil,
wantState: CollectingContent,
},
{
desc: "thinking support, no last message => CollectingThinkingContent",
hasThinking: true,
last: nil,
wantState: CollectingThinkingContent,
},
{
desc: "thinking support, last assistant with empty content => CollectingThinkingContent",
hasThinking: true,
last: &api.Message{Role: "assistant", Content: ""},
wantState: CollectingThinkingContent,
},
{
desc: "thinking support, last assistant with content => CollectingContent",
hasThinking: true,
last: &api.Message{Role: "assistant", Content: "hello"},
wantState: CollectingContent,
},
{
desc: "thinking support, last is user => CollectingThinkingContent",
hasThinking: true,
last: &api.Message{Role: "user", Content: "hi"},
wantState: CollectingThinkingContent,
},
}
for _, tc := range cases {
parser := Qwen3VLParser{hasThinkingSupport: tc.hasThinking}
parser.Init(nil, tc.last)
if parser.state != tc.wantState {
t.Errorf("%s: got state %v, want %v", tc.desc, parser.state, tc.wantState)
}
}
}
func TestQwen3VLThinkingParserWithThinkingPrefill(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "thinking prefill",
steps: []step{
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
},
},
{
desc: "thinking prefill with content",
steps: []step{
{input: "abc</th", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "def"}}},
},
},
{
desc: "thinking prefill with fakeout",
steps: []step{
{input: "abc</think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}}},
{input: ">", wantEvents: []qwenEvent{}},
},
},
{
desc: "thinking prefill with spaces",
steps: []step{
{input: " </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: "starting content"}}},
},
},
}
last := &api.Message{Role: "assistant", Thinking: "i am thinking"} // so if there is thinking the test is still thinking
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
// avoid deep equal on empty vs. nil slices
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwen3VLThinkingParserWithNonThinkingPrefill(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "thinking prefill",
steps: []step{
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
},
},
{
desc: "thinking prefill with content",
steps: []step{
{input: "abc</th", wantEvents: []qwenEvent{qwenEventContent{content: "abc</th"}}},
{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "ink> def"}}},
},
},
{
desc: "thinking prefill with fakeout",
steps: []step{
{input: "abc</think", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}}},
{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventContent{content: " fakeout </think"}}},
{input: ">", wantEvents: []qwenEvent{qwenEventContent{content: ">"}}},
},
},
{
desc: "thinking prefill with spaces",
steps: []step{
{input: " </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: " </think> starting content"}}},
},
},
}
last := &api.Message{Role: "assistant", Thinking: "i am thinking", Content: "i am content"} // so if there is thinking the test is still thinking
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
// avoid deep equal on empty vs. nil slices
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
// last message is assistant with content ⇒ start in CollectingContent
last := &api.Message{Role: "assistant", Content: "has content"}
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
type step struct {
input string
wantEvents []qwenEvent
}
steps := []step{
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
{input: "<tool_call>{\"name\": \"x\", \"arguments\": {}}</tool_call>", wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "{\"name\": \"x\", \"arguments\": {}}"}}},
}
for i, s := range steps {
parser.buffer.WriteString(s.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(s.wantEvents) == 0 {
continue
}
if !reflect.DeepEqual(gotEvents, s.wantEvents) {
t.Fatalf("step %d: input %q: got %#v, want %#v", i, s.input, gotEvents, s.wantEvents)
}
}
}

View File

@ -48,6 +48,9 @@ func rendererForName(name string) Renderer {
case "qwen3-vl-instruct":
renderer := &Qwen3VLRenderer{false}
return renderer
case "qwen3-vl-thinking":
renderer := &Qwen3VLRenderer{true}
return renderer
default:
return nil
}