feat: add minimax tts

This commit is contained in:
feitianbubu
2025-10-20 13:54:53 +08:00
parent 8ac801261d
commit 0a8e54de20
4 changed files with 363 additions and 1 deletions

View File

@@ -0,0 +1,212 @@
package minimax
import (
"encoding/base64"
"encoding/json"
"errors"
"io"
"net/http"
"github.com/QuantumNous/new-api/dto"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type MiniMaxTTSRequest struct {
Model string `json:"model"`
Text string `json:"text"`
VoiceID string `json:"voice_id"`
Speed float64 `json:"speed,omitempty"`
Vol float64 `json:"vol,omitempty"`
Pitch int `json:"pitch,omitempty"`
AudioSampleRate int `json:"audio_sample_rate,omitempty"`
OutputFormat string `json:"output_format,omitempty"`
}
type MiniMaxTTSResponse struct {
Created int `json:"created"`
Data []MiniMaxTTSData `json:"data"`
ID string `json:"id"`
Model string `json:"model"`
Object string `json:"object"`
Usage MiniMaxTTSUsage `json:"usage"`
}
type MiniMaxTTSData struct {
Index int `json:"index"`
Audio string `json:"audio"`
Text string `json:"text"`
FinishReason string `json:"finish_reason"`
}
type MiniMaxTTSUsage struct {
TotalTokens int `json:"total_tokens"`
}
type MiniMaxTTSErrorResponse struct {
Error MiniMaxTTSError `json:"error"`
}
type MiniMaxTTSError struct {
Code string `json:"code"`
Message string `json:"message"`
Type string `json:"type"`
}
// OpenAI voice to MiniMax voice_id mapping
var openAIToMiniMaxVoiceMap = map[string]string{
"alloy": "male-qn-qingse",
"echo": "male-qn-jingying",
"fable": "female-shaonv",
"onyx": "male-qn-badao",
"nova": "female-shaonv-jingpin",
"shimmer": "female-yujie",
// Add some standard MiniMax voice IDs
"voice-1": "male-qn-qingse",
"voice-2": "female-shaonv",
}
// OpenAI response format to MiniMax output format mapping
var responseFormatToOutputFormatMap = map[string]string{
"mp3": "mp3",
"opus": "mp3",
"aac": "aac",
"flac": "flac",
"wav": "wav",
"pcm": "pcm",
}
// TTS model mapping - MiniMax uses speech-01 or speech-01-turbo
var modelToTTSModelMap = map[string]string{
"speech-01": "speech-01",
"speech-01-turbo": "speech-01-turbo",
"tts-1": "speech-01-turbo",
"tts-1-hd": "speech-01",
}
func mapVoiceType(openAIVoice string) string {
if voice, ok := openAIToMiniMaxVoiceMap[openAIVoice]; ok {
return voice
}
return "female-shaonv" // default voice
}
func mapOutputFormat(responseFormat string) string {
if format, ok := responseFormatToOutputFormatMap[responseFormat]; ok {
return format
}
return "mp3" // default format
}
func getTTSModel(modelName string) string {
if ttsModel, ok := modelToTTSModelMap[modelName]; ok {
return ttsModel
}
return "speech-01-turbo" // default model
}
func getContentTypeByFormat(format string) string {
contentTypeMap := map[string]string{
"mp3": "audio/mpeg",
"wav": "audio/wav",
"flac": "audio/flac",
"aac": "audio/aac",
"pcm": "audio/pcm",
}
if ct, ok := contentTypeMap[format]; ok {
return ct
}
return "audio/mpeg" // default to mp3
}
func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
body, readErr := io.ReadAll(resp.Body)
if readErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to read minimax response"),
types.ErrorCodeReadResponseBodyFailed,
http.StatusInternalServerError,
)
}
defer resp.Body.Close()
// First try to parse as error response
var errorResp MiniMaxTTSErrorResponse
if unmarshalErr := json.Unmarshal(body, &errorResp); unmarshalErr == nil && errorResp.Error.Code != "" {
return nil, types.NewErrorWithStatusCode(
errors.New(errorResp.Error.Message),
types.ErrorCodeBadResponse,
http.StatusBadRequest,
)
}
// Parse as successful response
var minimaxResp MiniMaxTTSResponse
if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to parse minimax response"),
types.ErrorCodeBadResponseBody,
http.StatusInternalServerError,
)
}
// Check if we have audio data
if len(minimaxResp.Data) == 0 || minimaxResp.Data[0].Audio == "" {
return nil, types.NewErrorWithStatusCode(
errors.New("no audio data in response"),
types.ErrorCodeBadResponse,
http.StatusBadRequest,
)
}
// Decode base64 audio data
audioData, decodeErr := base64.StdEncoding.DecodeString(minimaxResp.Data[0].Audio)
if decodeErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to decode audio data"),
types.ErrorCodeBadResponseBody,
http.StatusInternalServerError,
)
}
// Get output format from context or default to mp3
outputFormat := c.GetString("response_format")
if outputFormat == "" {
outputFormat = "mp3"
}
contentType := getContentTypeByFormat(outputFormat)
c.Header("Content-Type", contentType)
c.Data(http.StatusOK, contentType, audioData)
usage = &dto.Usage{
PromptTokens: info.PromptTokens,
CompletionTokens: 0,
TotalTokens: minimaxResp.Usage.TotalTokens,
}
return usage, nil
}
func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
body, readErr := io.ReadAll(resp.Body)
if readErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to read minimax response"),
types.ErrorCodeReadResponseBodyFailed,
http.StatusInternalServerError,
)
}
defer resp.Body.Close()
// Set response headers
for key, values := range resp.Header {
for _, value := range values {
c.Header(key, value)
}
}
c.Data(resp.StatusCode, "application/json", body)
return nil, nil
}