feat: add doubao audio tts
This commit is contained in:
184
relay/channel/volcengine/tts.go
Normal file
184
relay/channel/volcengine/tts.go
Normal file
@@ -0,0 +1,184 @@
|
||||
package volcengine
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
"github.com/QuantumNous/new-api/types"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type VolcengineTTSRequest struct {
|
||||
App VolcengineTTSApp `json:"app"`
|
||||
User VolcengineTTSUser `json:"user"`
|
||||
Audio VolcengineTTSAudio `json:"audio"`
|
||||
Request VolcengineTTSReqInfo `json:"request"`
|
||||
}
|
||||
|
||||
type VolcengineTTSApp struct {
|
||||
AppID string `json:"appid"`
|
||||
Token string `json:"token"`
|
||||
Cluster string `json:"cluster"`
|
||||
}
|
||||
|
||||
type VolcengineTTSUser struct {
|
||||
UID string `json:"uid"`
|
||||
}
|
||||
|
||||
type VolcengineTTSAudio struct {
|
||||
VoiceType string `json:"voice_type"`
|
||||
Encoding string `json:"encoding"`
|
||||
SpeedRatio float64 `json:"speed_ratio"`
|
||||
Rate int `json:"rate"`
|
||||
LoudnessRatio float64 `json:"loudness_ratio,omitempty"`
|
||||
}
|
||||
|
||||
type VolcengineTTSReqInfo struct {
|
||||
ReqID string `json:"reqid"`
|
||||
Text string `json:"text"`
|
||||
Operation string `json:"operation"`
|
||||
}
|
||||
|
||||
type VolcengineTTSResponse struct {
|
||||
ReqID string `json:"reqid"`
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Sequence int `json:"sequence"`
|
||||
Data string `json:"data"`
|
||||
Addition *VolcengineTTSAdditionInfo `json:"addition,omitempty"`
|
||||
}
|
||||
|
||||
type VolcengineTTSAdditionInfo struct {
|
||||
Duration string `json:"duration"`
|
||||
}
|
||||
|
||||
var openAIToVolcengineVoiceMap = map[string]string{
|
||||
"alloy": "zh_male_M392_conversation_wvae_bigtts",
|
||||
"echo": "zh_male_wenhao_mars_bigtts",
|
||||
"fable": "zh_female_tianmei_mars_bigtts",
|
||||
"onyx": "zh_male_zhibei_mars_bigtts",
|
||||
"nova": "zh_female_shuangkuaisisi_mars_bigtts",
|
||||
"shimmer": "zh_female_cancan_mars_bigtts",
|
||||
}
|
||||
|
||||
var responseFormatToEncodingMap = map[string]string{
|
||||
"mp3": "mp3",
|
||||
"opus": "ogg_opus",
|
||||
"aac": "mp3",
|
||||
"flac": "mp3",
|
||||
"wav": "wav",
|
||||
"pcm": "pcm",
|
||||
}
|
||||
|
||||
func parseVolcengineAuth(apiKey string) (appID, token string, err error) {
|
||||
parts := strings.Split(apiKey, "|")
|
||||
if len(parts) != 2 {
|
||||
return "", "", errors.New("invalid api key format, expected: appid:access_token")
|
||||
}
|
||||
return parts[0], parts[1], nil
|
||||
}
|
||||
|
||||
func mapVoiceType(openAIVoice string) string {
|
||||
if voice, ok := openAIToVolcengineVoiceMap[openAIVoice]; ok {
|
||||
return voice
|
||||
}
|
||||
return "zh_male_M392_conversation_wvae_bigtts"
|
||||
}
|
||||
|
||||
func mapSpeedRatio(speed float64) float64 {
|
||||
if speed == 0 {
|
||||
return 1.0
|
||||
}
|
||||
if speed < 0.1 {
|
||||
return 0.1
|
||||
}
|
||||
if speed > 2.0 {
|
||||
return 2.0
|
||||
}
|
||||
return speed
|
||||
}
|
||||
|
||||
func mapEncoding(responseFormat string) string {
|
||||
if responseFormat == "" {
|
||||
return "mp3"
|
||||
}
|
||||
if encoding, ok := responseFormatToEncodingMap[responseFormat]; ok {
|
||||
return encoding
|
||||
}
|
||||
return "mp3"
|
||||
}
|
||||
|
||||
func getContentTypeByEncoding(encoding string) string {
|
||||
contentTypeMap := map[string]string{
|
||||
"mp3": "audio/mpeg",
|
||||
"ogg_opus": "audio/ogg",
|
||||
"wav": "audio/wav",
|
||||
"pcm": "audio/pcm",
|
||||
}
|
||||
if ct, ok := contentTypeMap[encoding]; ok {
|
||||
return ct
|
||||
}
|
||||
return "application/octet-stream"
|
||||
}
|
||||
|
||||
func handleTTSResponse(c *gin.Context, resp *http.Response, encoding string) (usage any, err *types.NewAPIError) {
|
||||
body, readErr := io.ReadAll(resp.Body)
|
||||
if readErr != nil {
|
||||
return nil, types.NewErrorWithStatusCode(
|
||||
errors.New("failed to read volcengine response"),
|
||||
types.ErrorCodeReadResponseBodyFailed,
|
||||
http.StatusInternalServerError,
|
||||
)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var volcResp VolcengineTTSResponse
|
||||
if unmarshalErr := json.Unmarshal(body, &volcResp); unmarshalErr != nil {
|
||||
return nil, types.NewErrorWithStatusCode(
|
||||
errors.New("failed to parse volcengine response"),
|
||||
types.ErrorCodeBadResponseBody,
|
||||
http.StatusInternalServerError,
|
||||
)
|
||||
}
|
||||
|
||||
if volcResp.Code != 3000 {
|
||||
return nil, types.NewErrorWithStatusCode(
|
||||
errors.New(volcResp.Message),
|
||||
types.ErrorCodeBadResponse,
|
||||
http.StatusBadRequest,
|
||||
)
|
||||
}
|
||||
|
||||
audioData, decodeErr := base64.StdEncoding.DecodeString(volcResp.Data)
|
||||
if decodeErr != nil {
|
||||
return nil, types.NewErrorWithStatusCode(
|
||||
errors.New("failed to decode audio data"),
|
||||
types.ErrorCodeBadResponseBody,
|
||||
http.StatusInternalServerError,
|
||||
)
|
||||
}
|
||||
|
||||
contentType := getContentTypeByEncoding(encoding)
|
||||
c.Header("Content-Type", contentType)
|
||||
c.Data(http.StatusOK, contentType, audioData)
|
||||
|
||||
info := c.MustGet("relay_info").(*relaycommon.RelayInfo)
|
||||
usage = &dto.Usage{
|
||||
PromptTokens: info.PromptTokens,
|
||||
CompletionTokens: 0,
|
||||
TotalTokens: info.PromptTokens,
|
||||
}
|
||||
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
func generateRequestID() string {
|
||||
return uuid.New().String()
|
||||
}
|
||||
Reference in New Issue
Block a user