thinking: turn on thinking mode for all reasoning models (#12533)

2025-12-06 12:19:56 +01:00 · 2025-10-08 16:50:13 -07:00 · 2025-10-08 16:50:13 -07:00 · 90d429f5a8
commit 90d429f5a8
parent 1fc35f1260
4 changed files with 30 additions and 20 deletions
--- a/api/types.go
+++ b/api/types.go
@ -936,7 +936,7 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error {
 		return nil
 	}

-	return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\")")
+	return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
 }

 // MarshalJSON implements json.Marshaler
--- a/openai/openai.go
+++ b/openai/openai.go
@ -9,6 +9,7 @@ import (
 	"log/slog"
 	"math/rand"
 	"net/http"
+	"slices"
 	"strings"
 	"time"

@ -82,7 +83,7 @@ type StreamOptions struct {
 }

 type Reasoning struct {
-	Effort *string `json:"effort,omitempty"`
+	Effort string `json:"effort,omitempty"`
 }

 type ChatCompletionRequest struct {
@ -567,13 +568,17 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {

 	var think *api.ThinkValue
 	if r.Reasoning != nil {
-		think = &api.ThinkValue{
-			Value: *r.Reasoning.Effort,
+		if !slices.Contains([]string{"high", "medium", "low", "none"}, r.Reasoning.Effort) {
+			return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", or \"none\")", r.Reasoning.Effort)
+		}
+
+		if r.Reasoning.Effort == "none" {
+			think = &api.ThinkValue{Value: false}
+		} else {
+			think = &api.ThinkValue{Value: r.Reasoning.Effort}
 		}
 	} else if r.ReasoningEffort != nil {
-		think = &api.ThinkValue{
-			Value: *r.ReasoningEffort,
-		}
+		think = &api.ThinkValue{Value: *r.ReasoningEffort}
 	}

 	return &api.ChatRequest{
--- a/server/routes.go
+++ b/server/routes.go
@ -330,12 +330,16 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	if req.Suffix != "" {
 		caps = append(caps, model.CapabilityInsert)
 	}
-	if req.Think != nil && req.Think.Bool() {
+
+	modelCaps := m.Capabilities()
+	if req.Think != nil {
 		caps = append(caps, model.CapabilityThinking)
-		// TODO(drifkin): consider adding a warning if it's false and the model
-		// doesn't support thinking. It's not strictly required, but it can be a
-		// hint that the user is on an older qwen3/r1 model that doesn't have an
-		// updated template supporting thinking
+	} else {
+		// add thinking if the model supports it
+		if slices.Contains(modelCaps, model.CapabilityThinking) {
+			caps = append(caps, model.CapabilityThinking)
+			req.Think = &api.ThinkValue{Value: true}
+		}
 	}

 	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
@ -1871,8 +1875,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	if len(req.Tools) > 0 {
 		caps = append(caps, model.CapabilityTools)
 	}
-	if req.Think != nil && req.Think.Bool() {
+
+	modelCaps := m.Capabilities()
+	if req.Think != nil {
 		caps = append(caps, model.CapabilityThinking)
+	} else {
+		// add thinking if the model supports it
+		if slices.Contains(modelCaps, model.CapabilityThinking) {
+			caps = append(caps, model.CapabilityThinking)
+			req.Think = &api.ThinkValue{Value: true}
+		}
 	}

 	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@ -1120,13 +1120,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
 		"The answer is 4.",
 		true)

-	testChatRequest(t, "thinking disabled but template still adds think tag",
-		"Simple question",
-		" My thoughts </think> The answer.",
-		"",
-		" My thoughts </think> The answer.",
-		false)
-
 	// Test streaming response with template-added <think>
 	t.Run("streaming with thinking", func(t *testing.T) {
 		var wg sync.WaitGroup