mirror of
https://github.com/zebrajr/ollama.git
synced 2025-12-06 00:19:51 +01:00
Grace/qwen3 thinking (#12647)
* changing initial status to take into consideration prefill * Add seperate strings for content and thinking builder * thinking tests * remove white space from string before closing think tag
This commit is contained in:
parent
1813ff85a0
commit
e2a0b24435
|
|
@ -45,6 +45,9 @@ func ParserForName(name string) Parser {
|
|||
case "qwen3-vl-instruct":
|
||||
parser := &Qwen3VLParser{hasThinkingSupport: false}
|
||||
return parser
|
||||
case "qwen3-vl-thinking":
|
||||
parser := &Qwen3VLParser{hasThinkingSupport: true}
|
||||
return parser
|
||||
case "passthrough":
|
||||
return &PassthroughParser{}
|
||||
case "harmony":
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ const (
|
|||
thinkingCloseTag = "</think>"
|
||||
)
|
||||
|
||||
// TODO(gguo): add a field for isThinking
|
||||
type Qwen3VLParser struct {
|
||||
state qwenParserState
|
||||
buffer strings.Builder
|
||||
|
|
@ -34,21 +33,28 @@ func (p *Qwen3VLParser) HasToolSupport() bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// TODO(gguo): changes this to reference an objects param
|
||||
func (p *Qwen3VLParser) HasThinkingSupport() bool {
|
||||
return p.hasThinkingSupport
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) initialState() qwenParserState {
|
||||
if p.HasThinkingSupport() { // has thinking, start from collecting thinking content
|
||||
return CollectingThinkingContent
|
||||
func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
|
||||
prefill := lastMessage != nil && lastMessage.Role == "assistant"
|
||||
if !p.HasThinkingSupport() {
|
||||
p.state = CollectingContent
|
||||
return
|
||||
}
|
||||
return CollectingContent
|
||||
|
||||
if prefill && lastMessage.Content != "" {
|
||||
p.state = CollectingContent
|
||||
return
|
||||
}
|
||||
|
||||
p.state = CollectingThinkingContent
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
||||
p.tools = tools
|
||||
p.state = p.initialState()
|
||||
p.setInitialState(lastMessage)
|
||||
return tools
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +69,8 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
|
|||
events := p.parseEvents()
|
||||
|
||||
var toolCalls []api.ToolCall
|
||||
var sb strings.Builder
|
||||
var contentSb strings.Builder
|
||||
var thinkingSb strings.Builder
|
||||
for _, event := range events {
|
||||
switch event := event.(type) {
|
||||
case qwenEventRawToolCall:
|
||||
|
|
@ -74,15 +81,15 @@ func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking strin
|
|||
}
|
||||
toolCalls = append(toolCalls, toolCall)
|
||||
case qwenEventThinkingContent:
|
||||
sb.WriteString(event.content)
|
||||
thinkingSb.WriteString(event.content)
|
||||
case qwenEventContent:
|
||||
// TODO(drifkin): if the same turn contains multiple interleaved content
|
||||
// events, we naively append them together here.
|
||||
sb.WriteString(event.content)
|
||||
contentSb.WriteString(event.content)
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String(), "", toolCalls, nil
|
||||
return contentSb.String(), thinkingSb.String(), toolCalls, nil
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) parseEvents() []qwenEvent {
|
||||
|
|
@ -155,7 +162,7 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
|||
case CollectingToolContent:
|
||||
if strings.Contains(p.buffer.String(), toolCloseTag) {
|
||||
split := strings.SplitN(p.buffer.String(), toolCloseTag, 2)
|
||||
before := split[0]
|
||||
before := split[0] // do we also need to do it to tool calls?
|
||||
if len(before) == 0 {
|
||||
slog.Warn("qwen tool call closing tag found but no content before it")
|
||||
}
|
||||
|
|
@ -169,10 +176,11 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
|||
} else {
|
||||
return events, false
|
||||
}
|
||||
case CollectingThinkingContent: // so we want to hip the unambiguous stuff
|
||||
case CollectingThinkingContent:
|
||||
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
|
||||
split := strings.SplitN(p.buffer.String(), thinkingCloseTag, 2)
|
||||
before := split[0]
|
||||
// before := split[0]
|
||||
before := strings.TrimRightFunc(split[0], unicode.IsSpace)
|
||||
if len(before) == 0 {
|
||||
slog.Warn("qwen tool call closing tag found but no content before it")
|
||||
}
|
||||
|
|
@ -184,7 +192,7 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
|||
p.buffer.WriteString(after)
|
||||
p.state = CollectingContent
|
||||
return events, true
|
||||
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 { // we see part of a close thinking tag
|
||||
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
|
||||
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
|
||||
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
||||
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
||||
|
|
|
|||
|
|
@ -344,3 +344,205 @@ func TestQwen3VLThinkingToolParser(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLParserState(t *testing.T) {
|
||||
cases := []struct {
|
||||
desc string
|
||||
hasThinking bool
|
||||
last *api.Message
|
||||
wantState qwenParserState
|
||||
}{
|
||||
{
|
||||
desc: "no thinking support => CollectingContent",
|
||||
hasThinking: false,
|
||||
last: nil,
|
||||
wantState: CollectingContent,
|
||||
},
|
||||
{
|
||||
desc: "thinking support, no last message => CollectingThinkingContent",
|
||||
hasThinking: true,
|
||||
last: nil,
|
||||
wantState: CollectingThinkingContent,
|
||||
},
|
||||
{
|
||||
desc: "thinking support, last assistant with empty content => CollectingThinkingContent",
|
||||
hasThinking: true,
|
||||
last: &api.Message{Role: "assistant", Content: ""},
|
||||
wantState: CollectingThinkingContent,
|
||||
},
|
||||
{
|
||||
desc: "thinking support, last assistant with content => CollectingContent",
|
||||
hasThinking: true,
|
||||
last: &api.Message{Role: "assistant", Content: "hello"},
|
||||
wantState: CollectingContent,
|
||||
},
|
||||
{
|
||||
desc: "thinking support, last is user => CollectingThinkingContent",
|
||||
hasThinking: true,
|
||||
last: &api.Message{Role: "user", Content: "hi"},
|
||||
wantState: CollectingThinkingContent,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: tc.hasThinking}
|
||||
parser.Init(nil, tc.last)
|
||||
if parser.state != tc.wantState {
|
||||
t.Errorf("%s: got state %v, want %v", tc.desc, parser.state, tc.wantState)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingParserWithThinkingPrefill(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "thinking prefill",
|
||||
steps: []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with content",
|
||||
steps: []step{
|
||||
{input: "abc</th", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "def"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with fakeout",
|
||||
steps: []step{
|
||||
{input: "abc</think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}}},
|
||||
{input: ">", wantEvents: []qwenEvent{}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with spaces",
|
||||
steps: []step{
|
||||
{input: " </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: "starting content"}}},
|
||||
},
|
||||
},
|
||||
}
|
||||
last := &api.Message{Role: "assistant", Thinking: "i am thinking"} // so if there is thinking the test is still thinking
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, last)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingParserWithNonThinkingPrefill(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "thinking prefill",
|
||||
steps: []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with content",
|
||||
steps: []step{
|
||||
{input: "abc</th", wantEvents: []qwenEvent{qwenEventContent{content: "abc</th"}}},
|
||||
{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "ink> def"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with fakeout",
|
||||
steps: []step{
|
||||
{input: "abc</think", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}}},
|
||||
{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventContent{content: " fakeout </think"}}},
|
||||
{input: ">", wantEvents: []qwenEvent{qwenEventContent{content: ">"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with spaces",
|
||||
steps: []step{
|
||||
{input: " </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: " </think> starting content"}}},
|
||||
},
|
||||
},
|
||||
}
|
||||
last := &api.Message{Role: "assistant", Thinking: "i am thinking", Content: "i am content"} // so if there is thinking the test is still thinking
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, last)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
|
||||
// last message is assistant with content ⇒ start in CollectingContent
|
||||
last := &api.Message{Role: "assistant", Content: "has content"}
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, last)
|
||||
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
steps := []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
|
||||
{input: "<tool_call>{\"name\": \"x\", \"arguments\": {}}</tool_call>", wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "{\"name\": \"x\", \"arguments\": {}}"}}},
|
||||
}
|
||||
|
||||
for i, s := range steps {
|
||||
parser.buffer.WriteString(s.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
if len(gotEvents) == 0 && len(s.wantEvents) == 0 {
|
||||
continue
|
||||
}
|
||||
if !reflect.DeepEqual(gotEvents, s.wantEvents) {
|
||||
t.Fatalf("step %d: input %q: got %#v, want %#v", i, s.input, gotEvents, s.wantEvents)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,9 @@ func rendererForName(name string) Renderer {
|
|||
case "qwen3-vl-instruct":
|
||||
renderer := &Qwen3VLRenderer{false}
|
||||
return renderer
|
||||
case "qwen3-vl-thinking":
|
||||
renderer := &Qwen3VLRenderer{true}
|
||||
return renderer
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user