thinking: turn on thinking mode for all reasoning models (#12533)

This commit is contained in:
Patrick Devine 2025-10-08 16:50:13 -07:00 committed by GitHub
parent 1fc35f1260
commit 90d429f5a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 30 additions and 20 deletions

View File

@ -936,7 +936,7 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error {
return nil
}
return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\")")
return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
}
// MarshalJSON implements json.Marshaler

View File

@ -9,6 +9,7 @@ import (
"log/slog"
"math/rand"
"net/http"
"slices"
"strings"
"time"
@ -82,7 +83,7 @@ type StreamOptions struct {
}
type Reasoning struct {
Effort *string `json:"effort,omitempty"`
Effort string `json:"effort,omitempty"`
}
type ChatCompletionRequest struct {
@ -567,13 +568,17 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
var think *api.ThinkValue
if r.Reasoning != nil {
think = &api.ThinkValue{
Value: *r.Reasoning.Effort,
if !slices.Contains([]string{"high", "medium", "low", "none"}, r.Reasoning.Effort) {
return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", or \"none\")", r.Reasoning.Effort)
}
if r.Reasoning.Effort == "none" {
think = &api.ThinkValue{Value: false}
} else {
think = &api.ThinkValue{Value: r.Reasoning.Effort}
}
} else if r.ReasoningEffort != nil {
think = &api.ThinkValue{
Value: *r.ReasoningEffort,
}
think = &api.ThinkValue{Value: *r.ReasoningEffort}
}
return &api.ChatRequest{

View File

@ -330,12 +330,16 @@ func (s *Server) GenerateHandler(c *gin.Context) {
if req.Suffix != "" {
caps = append(caps, model.CapabilityInsert)
}
if req.Think != nil && req.Think.Bool() {
modelCaps := m.Capabilities()
if req.Think != nil {
caps = append(caps, model.CapabilityThinking)
// TODO(drifkin): consider adding a warning if it's false and the model
// doesn't support thinking. It's not strictly required, but it can be a
// hint that the user is on an older qwen3/r1 model that doesn't have an
// updated template supporting thinking
} else {
// add thinking if the model supports it
if slices.Contains(modelCaps, model.CapabilityThinking) {
caps = append(caps, model.CapabilityThinking)
req.Think = &api.ThinkValue{Value: true}
}
}
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
@ -1871,8 +1875,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
if len(req.Tools) > 0 {
caps = append(caps, model.CapabilityTools)
}
if req.Think != nil && req.Think.Bool() {
modelCaps := m.Capabilities()
if req.Think != nil {
caps = append(caps, model.CapabilityThinking)
} else {
// add thinking if the model supports it
if slices.Contains(modelCaps, model.CapabilityThinking) {
caps = append(caps, model.CapabilityThinking)
req.Think = &api.ThinkValue{Value: true}
}
}
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)

View File

@ -1120,13 +1120,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
"The answer is 4.",
true)
testChatRequest(t, "thinking disabled but template still adds think tag",
"Simple question",
" My thoughts </think> The answer.",
"",
" My thoughts </think> The answer.",
false)
// Test streaming response with template-added <think>
t.Run("streaming with thinking", func(t *testing.T) {
var wg sync.WaitGroup