feat: support adaptive thinking
This commit is contained in:
@@ -26,6 +26,7 @@ type AwsClaudeRequest struct {
|
|||||||
Tools any `json:"tools,omitempty"`
|
Tools any `json:"tools,omitempty"`
|
||||||
ToolChoice any `json:"tool_choice,omitempty"`
|
ToolChoice any `json:"tool_choice,omitempty"`
|
||||||
Thinking *dto.Thinking `json:"thinking,omitempty"`
|
Thinking *dto.Thinking `json:"thinking,omitempty"`
|
||||||
|
OutputConfig json.RawMessage `json:"output_config,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func formatRequest(requestBody io.Reader, requestHeader http.Header) (*AwsClaudeRequest, error) {
|
func formatRequest(requestBody io.Reader, requestHeader http.Header) (*AwsClaudeRequest, error) {
|
||||||
|
|||||||
@@ -21,7 +21,10 @@ var ModelList = []string{
|
|||||||
"claude-opus-4-5-20251101",
|
"claude-opus-4-5-20251101",
|
||||||
"claude-opus-4-5-20251101-thinking",
|
"claude-opus-4-5-20251101-thinking",
|
||||||
"claude-opus-4-6",
|
"claude-opus-4-6",
|
||||||
"claude-opus-4-6-thinking",
|
"claude-opus-4-6-max",
|
||||||
|
"claude-opus-4-6-high",
|
||||||
|
"claude-opus-4-6-medium",
|
||||||
|
"claude-opus-4-6-low",
|
||||||
}
|
}
|
||||||
|
|
||||||
var ChannelName = "claude"
|
var ChannelName = "claude"
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ import (
|
|||||||
"github.com/QuantumNous/new-api/relay/reasonmap"
|
"github.com/QuantumNous/new-api/relay/reasonmap"
|
||||||
"github.com/QuantumNous/new-api/service"
|
"github.com/QuantumNous/new-api/service"
|
||||||
"github.com/QuantumNous/new-api/setting/model_setting"
|
"github.com/QuantumNous/new-api/setting/model_setting"
|
||||||
|
"github.com/QuantumNous/new-api/setting/reasoning"
|
||||||
"github.com/QuantumNous/new-api/types"
|
"github.com/QuantumNous/new-api/types"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
@@ -141,7 +142,16 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
|
|||||||
claudeRequest.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(textRequest.Model))
|
claudeRequest.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(textRequest.Model))
|
||||||
}
|
}
|
||||||
|
|
||||||
if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
|
if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" &&
|
||||||
|
strings.HasPrefix(textRequest.Model, "claude-opus-4-6") {
|
||||||
|
claudeRequest.Model = baseModel
|
||||||
|
claudeRequest.Thinking = &dto.Thinking{
|
||||||
|
Type: "adaptive",
|
||||||
|
}
|
||||||
|
claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
|
||||||
|
claudeRequest.TopP = 0
|
||||||
|
claudeRequest.Temperature = common.GetPointer[float64](1.0)
|
||||||
|
} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
|
||||||
strings.HasSuffix(textRequest.Model, "-thinking") {
|
strings.HasSuffix(textRequest.Model, "-thinking") {
|
||||||
|
|
||||||
// 因为BudgetTokens 必须大于1024
|
// 因为BudgetTokens 必须大于1024
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package vertex
|
package vertex
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
"github.com/QuantumNous/new-api/dto"
|
"github.com/QuantumNous/new-api/dto"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,6 +19,7 @@ type VertexAIClaudeRequest struct {
|
|||||||
Tools any `json:"tools,omitempty"`
|
Tools any `json:"tools,omitempty"`
|
||||||
ToolChoice any `json:"tool_choice,omitempty"`
|
ToolChoice any `json:"tool_choice,omitempty"`
|
||||||
Thinking *dto.Thinking `json:"thinking,omitempty"`
|
Thinking *dto.Thinking `json:"thinking,omitempty"`
|
||||||
|
OutputConfig json.RawMessage `json:"output_config,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest {
|
func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest {
|
||||||
@@ -33,5 +36,6 @@ func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest
|
|||||||
Tools: req.Tools,
|
Tools: req.Tools,
|
||||||
ToolChoice: req.ToolChoice,
|
ToolChoice: req.ToolChoice,
|
||||||
Thinking: req.Thinking,
|
Thinking: req.Thinking,
|
||||||
|
OutputConfig: req.OutputConfig,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package relay
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -14,6 +15,7 @@ import (
|
|||||||
"github.com/QuantumNous/new-api/relay/helper"
|
"github.com/QuantumNous/new-api/relay/helper"
|
||||||
"github.com/QuantumNous/new-api/service"
|
"github.com/QuantumNous/new-api/service"
|
||||||
"github.com/QuantumNous/new-api/setting/model_setting"
|
"github.com/QuantumNous/new-api/setting/model_setting"
|
||||||
|
"github.com/QuantumNous/new-api/setting/reasoning"
|
||||||
"github.com/QuantumNous/new-api/types"
|
"github.com/QuantumNous/new-api/types"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
@@ -49,7 +51,15 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
|
|||||||
request.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(request.Model))
|
request.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(request.Model))
|
||||||
}
|
}
|
||||||
|
|
||||||
if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
|
if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" &&
|
||||||
|
strings.HasPrefix(request.Model, "claude-opus-4-6") {
|
||||||
|
request.Model = baseModel
|
||||||
|
request.Thinking = &dto.Thinking{
|
||||||
|
Type: "adaptive",
|
||||||
|
}
|
||||||
|
request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
|
||||||
|
info.UpstreamModelName = request.Model
|
||||||
|
} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
|
||||||
strings.HasSuffix(request.Model, "-thinking") {
|
strings.HasSuffix(request.Model, "-thinking") {
|
||||||
if request.Thinking == nil {
|
if request.Thinking == nil {
|
||||||
// 因为BudgetTokens 必须大于1024
|
// 因为BudgetTokens 必须大于1024
|
||||||
|
|||||||
@@ -62,6 +62,10 @@ var defaultCacheRatio = map[string]float64{
|
|||||||
"claude-opus-4-5-20251101-thinking": 0.1,
|
"claude-opus-4-5-20251101-thinking": 0.1,
|
||||||
"claude-opus-4-6": 0.1,
|
"claude-opus-4-6": 0.1,
|
||||||
"claude-opus-4-6-thinking": 0.1,
|
"claude-opus-4-6-thinking": 0.1,
|
||||||
|
"claude-opus-4-6-max": 0.1,
|
||||||
|
"claude-opus-4-6-high": 0.1,
|
||||||
|
"claude-opus-4-6-medium": 0.1,
|
||||||
|
"claude-opus-4-6-low": 0.1,
|
||||||
}
|
}
|
||||||
|
|
||||||
var defaultCreateCacheRatio = map[string]float64{
|
var defaultCreateCacheRatio = map[string]float64{
|
||||||
@@ -86,6 +90,10 @@ var defaultCreateCacheRatio = map[string]float64{
|
|||||||
"claude-opus-4-5-20251101-thinking": 1.25,
|
"claude-opus-4-5-20251101-thinking": 1.25,
|
||||||
"claude-opus-4-6": 1.25,
|
"claude-opus-4-6": 1.25,
|
||||||
"claude-opus-4-6-thinking": 1.25,
|
"claude-opus-4-6-thinking": 1.25,
|
||||||
|
"claude-opus-4-6-max": 1.25,
|
||||||
|
"claude-opus-4-6-high": 1.25,
|
||||||
|
"claude-opus-4-6-medium": 1.25,
|
||||||
|
"claude-opus-4-6-low": 1.25,
|
||||||
}
|
}
|
||||||
|
|
||||||
//var defaultCreateCacheRatio = map[string]float64{}
|
//var defaultCreateCacheRatio = map[string]float64{}
|
||||||
|
|||||||
@@ -143,6 +143,10 @@ var defaultModelRatio = map[string]float64{
|
|||||||
"claude-sonnet-4-5-20250929": 1.5,
|
"claude-sonnet-4-5-20250929": 1.5,
|
||||||
"claude-opus-4-5-20251101": 2.5,
|
"claude-opus-4-5-20251101": 2.5,
|
||||||
"claude-opus-4-6": 2.5,
|
"claude-opus-4-6": 2.5,
|
||||||
|
"claude-opus-4-6-max": 2.5,
|
||||||
|
"claude-opus-4-6-high": 2.5,
|
||||||
|
"claude-opus-4-6-medium": 2.5,
|
||||||
|
"claude-opus-4-6-low": 2.5,
|
||||||
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
|
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
|
||||||
"claude-opus-4-20250514": 7.5,
|
"claude-opus-4-20250514": 7.5,
|
||||||
"claude-opus-4-1-20250805": 7.5,
|
"claude-opus-4-1-20250805": 7.5,
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import (
|
|||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
)
|
)
|
||||||
|
|
||||||
var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"}
|
var EffortSuffixes = []string{"-max", "-high", "-medium", "-low", "-minimal"}
|
||||||
|
|
||||||
// TrimEffortSuffix -> modelName level(low) exists
|
// TrimEffortSuffix -> modelName level(low) exists
|
||||||
func TrimEffortSuffix(modelName string) (string, string, bool) {
|
func TrimEffortSuffix(modelName string) (string, string, bool) {
|
||||||
|
|||||||
Reference in New Issue
Block a user