Compare commits
5 Commits
dev
...
v0.5.12-al
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b9d3cb0c45 | ||
|
|
ea407f0054 | ||
|
|
26e2e646cb | ||
|
|
4f214c48c6 | ||
|
|
2d760d4a01 |
@@ -134,12 +134,12 @@ The initial account username is `root` and password is `123456`.
|
||||
git clone https://github.com/songquanpeng/one-api.git
|
||||
|
||||
# Build the frontend
|
||||
cd one-api/web
|
||||
cd one-api/web/default
|
||||
npm install
|
||||
npm run build
|
||||
|
||||
# Build the backend
|
||||
cd ..
|
||||
cd ../..
|
||||
go mod download
|
||||
go build -ldflags "-s -w" -o one-api
|
||||
```
|
||||
|
||||
@@ -135,12 +135,12 @@ sudo service nginx restart
|
||||
git clone https://github.com/songquanpeng/one-api.git
|
||||
|
||||
# フロントエンドのビルド
|
||||
cd one-api/web
|
||||
cd one-api/web/default
|
||||
npm install
|
||||
npm run build
|
||||
|
||||
# バックエンドのビルド
|
||||
cd ..
|
||||
cd ../..
|
||||
go mod download
|
||||
go build -ldflags "-s -w" -o one-api
|
||||
```
|
||||
|
||||
@@ -174,12 +174,12 @@ docker-compose ps
|
||||
git clone https://github.com/songquanpeng/one-api.git
|
||||
|
||||
# 构建前端
|
||||
cd one-api/web
|
||||
cd one-api/web/default
|
||||
npm install
|
||||
npm run build
|
||||
|
||||
# 构建后端
|
||||
cd ..
|
||||
cd ../..
|
||||
go mod download
|
||||
go build -ldflags "-s -w" -o one-api
|
||||
````
|
||||
|
||||
@@ -45,6 +45,8 @@ var ModelRatio = map[string]float64{
|
||||
"gpt-4-32k-0314": 30,
|
||||
"gpt-4-32k-0613": 30,
|
||||
"gpt-4-1106-preview": 5, // $0.01 / 1K tokens
|
||||
"gpt-4-0125-preview": 5, // $0.01 / 1K tokens
|
||||
"gpt-4-turbo-preview": 5, // $0.01 / 1K tokens
|
||||
"gpt-4-vision-preview": 5, // $0.01 / 1K tokens
|
||||
"gpt-3.5-turbo": 0.75, // $0.0015 / 1K tokens
|
||||
"gpt-3.5-turbo-0301": 0.75,
|
||||
@@ -53,6 +55,7 @@ var ModelRatio = map[string]float64{
|
||||
"gpt-3.5-turbo-16k-0613": 1.5,
|
||||
"gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens
|
||||
"gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens
|
||||
"gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens
|
||||
"davinci-002": 1, // $0.002 / 1K tokens
|
||||
"babbage-002": 0.2, // $0.0004 / 1K tokens
|
||||
"text-ada-001": 0.2,
|
||||
@@ -72,6 +75,8 @@ var ModelRatio = map[string]float64{
|
||||
"babbage": 10,
|
||||
"ada": 10,
|
||||
"text-embedding-ada-002": 0.05,
|
||||
"text-embedding-3-small": 0.01,
|
||||
"text-embedding-3-large": 0.065,
|
||||
"text-search-ada-doc-001": 10,
|
||||
"text-moderation-stable": 0.1,
|
||||
"text-moderation-latest": 0.1,
|
||||
@@ -130,8 +135,31 @@ func GetModelRatio(name string) float64 {
|
||||
return ratio
|
||||
}
|
||||
|
||||
var CompletionRatio = map[string]float64{}
|
||||
|
||||
func CompletionRatio2JSONString() string {
|
||||
jsonBytes, err := json.Marshal(CompletionRatio)
|
||||
if err != nil {
|
||||
logger.SysError("error marshalling completion ratio: " + err.Error())
|
||||
}
|
||||
return string(jsonBytes)
|
||||
}
|
||||
|
||||
func UpdateCompletionRatioByJSONString(jsonStr string) error {
|
||||
CompletionRatio = make(map[string]float64)
|
||||
return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
|
||||
}
|
||||
|
||||
func GetCompletionRatio(name string) float64 {
|
||||
if ratio, ok := CompletionRatio[name]; ok {
|
||||
return ratio
|
||||
}
|
||||
if strings.HasPrefix(name, "gpt-3.5") {
|
||||
if strings.HasSuffix(name, "0125") {
|
||||
// https://openai.com/blog/new-embedding-models-and-api-updates
|
||||
// Updated GPT-3.5 Turbo model and lower pricing
|
||||
return 3
|
||||
}
|
||||
if strings.HasSuffix(name, "1106") {
|
||||
return 2
|
||||
}
|
||||
|
||||
@@ -171,6 +171,15 @@ func init() {
|
||||
Root: "gpt-3.5-turbo-1106",
|
||||
Parent: nil,
|
||||
},
|
||||
{
|
||||
Id: "gpt-3.5-turbo-0125",
|
||||
Object: "model",
|
||||
Created: 1706232090,
|
||||
OwnedBy: "openai",
|
||||
Permission: permission,
|
||||
Root: "gpt-3.5-turbo-0125",
|
||||
Parent: nil,
|
||||
},
|
||||
{
|
||||
Id: "gpt-3.5-turbo-instruct",
|
||||
Object: "model",
|
||||
@@ -243,6 +252,24 @@ func init() {
|
||||
Root: "gpt-4-1106-preview",
|
||||
Parent: nil,
|
||||
},
|
||||
{
|
||||
Id: "gpt-4-0125-preview",
|
||||
Object: "model",
|
||||
Created: 1706232090,
|
||||
OwnedBy: "openai",
|
||||
Permission: permission,
|
||||
Root: "gpt-4-0125-preview",
|
||||
Parent: nil,
|
||||
},
|
||||
{
|
||||
Id: "gpt-4-turbo-preview",
|
||||
Object: "model",
|
||||
Created: 1706232090,
|
||||
OwnedBy: "openai",
|
||||
Permission: permission,
|
||||
Root: "gpt-4-turbo-preview",
|
||||
Parent: nil,
|
||||
},
|
||||
{
|
||||
Id: "gpt-4-vision-preview",
|
||||
Object: "model",
|
||||
@@ -261,6 +288,24 @@ func init() {
|
||||
Root: "text-embedding-ada-002",
|
||||
Parent: nil,
|
||||
},
|
||||
{
|
||||
Id: "text-embedding-3-small",
|
||||
Object: "model",
|
||||
Created: 1706232090,
|
||||
OwnedBy: "openai",
|
||||
Permission: permission,
|
||||
Root: "text-embedding-3-small",
|
||||
Parent: nil,
|
||||
},
|
||||
{
|
||||
Id: "text-embedding-3-large",
|
||||
Object: "model",
|
||||
Created: 1706232090,
|
||||
OwnedBy: "openai",
|
||||
Permission: permission,
|
||||
Root: "text-embedding-3-large",
|
||||
Parent: nil,
|
||||
},
|
||||
{
|
||||
Id: "text-davinci-003",
|
||||
Object: "model",
|
||||
|
||||
@@ -29,7 +29,7 @@ func Relay(c *gin.Context) {
|
||||
case constant.RelayModeAudioTranscription:
|
||||
err = controller.RelayAudioHelper(c, relayMode)
|
||||
default:
|
||||
err = controller.RelayTextHelper(c, relayMode)
|
||||
err = controller.RelayTextHelper(c)
|
||||
}
|
||||
if err != nil {
|
||||
requestId := c.GetString(logger.RequestIdKey)
|
||||
|
||||
@@ -66,6 +66,7 @@ func InitOptionMap() {
|
||||
config.OptionMap["PreConsumedQuota"] = strconv.Itoa(config.PreConsumedQuota)
|
||||
config.OptionMap["ModelRatio"] = common.ModelRatio2JSONString()
|
||||
config.OptionMap["GroupRatio"] = common.GroupRatio2JSONString()
|
||||
config.OptionMap["CompletionRatio"] = common.CompletionRatio2JSONString()
|
||||
config.OptionMap["TopUpLink"] = config.TopUpLink
|
||||
config.OptionMap["ChatLink"] = config.ChatLink
|
||||
config.OptionMap["QuotaPerUnit"] = strconv.FormatFloat(config.QuotaPerUnit, 'f', -1, 64)
|
||||
@@ -198,6 +199,8 @@ func updateOptionMap(key string, value string) (err error) {
|
||||
err = common.UpdateModelRatioByJSONString(value)
|
||||
case "GroupRatio":
|
||||
err = common.UpdateGroupRatioByJSONString(value)
|
||||
case "CompletionRatio":
|
||||
err = common.UpdateCompletionRatioByJSONString(value)
|
||||
case "TopUpLink":
|
||||
config.TopUpLink = value
|
||||
case "ChatLink":
|
||||
|
||||
146
relay/controller/helper.go
Normal file
146
relay/controller/helper.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package controller
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/gin-gonic/gin"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
"one-api/common/config"
|
||||
"one-api/common/logger"
|
||||
"one-api/model"
|
||||
"one-api/relay/channel/openai"
|
||||
"one-api/relay/constant"
|
||||
"one-api/relay/util"
|
||||
)
|
||||
|
||||
func getAndValidateTextRequest(c *gin.Context, relayMode int) (*openai.GeneralOpenAIRequest, error) {
|
||||
textRequest := &openai.GeneralOpenAIRequest{}
|
||||
err := common.UnmarshalBodyReusable(c, textRequest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if relayMode == constant.RelayModeModerations && textRequest.Model == "" {
|
||||
textRequest.Model = "text-moderation-latest"
|
||||
}
|
||||
if relayMode == constant.RelayModeEmbeddings && textRequest.Model == "" {
|
||||
textRequest.Model = c.Param("model")
|
||||
}
|
||||
err = util.ValidateTextRequest(textRequest, relayMode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return textRequest, nil
|
||||
}
|
||||
|
||||
func getPromptTokens(textRequest *openai.GeneralOpenAIRequest, relayMode int) int {
|
||||
switch relayMode {
|
||||
case constant.RelayModeChatCompletions:
|
||||
return openai.CountTokenMessages(textRequest.Messages, textRequest.Model)
|
||||
case constant.RelayModeCompletions:
|
||||
return openai.CountTokenInput(textRequest.Prompt, textRequest.Model)
|
||||
case constant.RelayModeModerations:
|
||||
return openai.CountTokenInput(textRequest.Input, textRequest.Model)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func getPreConsumedQuota(textRequest *openai.GeneralOpenAIRequest, promptTokens int, ratio float64) int {
|
||||
preConsumedTokens := config.PreConsumedQuota
|
||||
if textRequest.MaxTokens != 0 {
|
||||
preConsumedTokens = promptTokens + textRequest.MaxTokens
|
||||
}
|
||||
return int(float64(preConsumedTokens) * ratio)
|
||||
}
|
||||
|
||||
func preConsumeQuota(ctx context.Context, textRequest *openai.GeneralOpenAIRequest, promptTokens int, ratio float64, meta *util.RelayMeta) (int, *openai.ErrorWithStatusCode) {
|
||||
preConsumedQuota := getPreConsumedQuota(textRequest, promptTokens, ratio)
|
||||
|
||||
userQuota, err := model.CacheGetUserQuota(meta.UserId)
|
||||
if err != nil {
|
||||
return preConsumedQuota, openai.ErrorWrapper(err, "get_user_quota_failed", http.StatusInternalServerError)
|
||||
}
|
||||
if userQuota-preConsumedQuota < 0 {
|
||||
return preConsumedQuota, openai.ErrorWrapper(errors.New("user quota is not enough"), "insufficient_user_quota", http.StatusForbidden)
|
||||
}
|
||||
err = model.CacheDecreaseUserQuota(meta.UserId, preConsumedQuota)
|
||||
if err != nil {
|
||||
return preConsumedQuota, openai.ErrorWrapper(err, "decrease_user_quota_failed", http.StatusInternalServerError)
|
||||
}
|
||||
if userQuota > 100*preConsumedQuota {
|
||||
// in this case, we do not pre-consume quota
|
||||
// because the user has enough quota
|
||||
preConsumedQuota = 0
|
||||
logger.Info(ctx, fmt.Sprintf("user %d has enough quota %d, trusted and no need to pre-consume", meta.UserId, userQuota))
|
||||
}
|
||||
if preConsumedQuota > 0 {
|
||||
err := model.PreConsumeTokenQuota(meta.TokenId, preConsumedQuota)
|
||||
if err != nil {
|
||||
return preConsumedQuota, openai.ErrorWrapper(err, "pre_consume_token_quota_failed", http.StatusForbidden)
|
||||
}
|
||||
}
|
||||
return preConsumedQuota, nil
|
||||
}
|
||||
|
||||
func postConsumeQuota(ctx context.Context, usage *openai.Usage, meta *util.RelayMeta, textRequest *openai.GeneralOpenAIRequest, ratio float64, preConsumedQuota int, modelRatio float64, groupRatio float64) {
|
||||
if usage == nil {
|
||||
logger.Error(ctx, "usage is nil, which is unexpected")
|
||||
return
|
||||
}
|
||||
quota := 0
|
||||
completionRatio := common.GetCompletionRatio(textRequest.Model)
|
||||
promptTokens := usage.PromptTokens
|
||||
completionTokens := usage.CompletionTokens
|
||||
quota = int(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
|
||||
if ratio != 0 && quota <= 0 {
|
||||
quota = 1
|
||||
}
|
||||
totalTokens := promptTokens + completionTokens
|
||||
if totalTokens == 0 {
|
||||
// in this case, must be some error happened
|
||||
// we cannot just return, because we may have to return the pre-consumed quota
|
||||
quota = 0
|
||||
}
|
||||
quotaDelta := quota - preConsumedQuota
|
||||
err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
|
||||
if err != nil {
|
||||
logger.Error(ctx, "error consuming token remain quota: "+err.Error())
|
||||
}
|
||||
err = model.CacheUpdateUserQuota(meta.UserId)
|
||||
if err != nil {
|
||||
logger.Error(ctx, "error update user quota cache: "+err.Error())
|
||||
}
|
||||
if quota != 0 {
|
||||
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f,补全倍率 %.2f", modelRatio, groupRatio, completionRatio)
|
||||
model.RecordConsumeLog(ctx, meta.UserId, meta.ChannelId, promptTokens, completionTokens, textRequest.Model, meta.TokenName, quota, logContent)
|
||||
model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
|
||||
model.UpdateChannelUsedQuota(meta.ChannelId, quota)
|
||||
}
|
||||
}
|
||||
|
||||
func doRequest(ctx context.Context, c *gin.Context, meta *util.RelayMeta, isStream bool, fullRequestURL string, requestBody io.Reader) (*http.Response, error) {
|
||||
req, err := http.NewRequest(c.Request.Method, fullRequestURL, requestBody)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
SetupRequestHeaders(c, req, meta, isStream)
|
||||
resp, err := util.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if resp == nil {
|
||||
return nil, errors.New("resp is nil")
|
||||
}
|
||||
err = req.Body.Close()
|
||||
if err != nil {
|
||||
logger.Warnf(ctx, "close req.Body failed: %+v", err)
|
||||
}
|
||||
err = c.Request.Body.Close()
|
||||
if err != nil {
|
||||
logger.Warnf(ctx, "close c.Request.Body failed: %+v", err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
@@ -24,9 +24,9 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func GetRequestURL(requestURL string, apiType int, relayMode int, meta *util.RelayMeta, textRequest *openai.GeneralOpenAIRequest) (string, error) {
|
||||
func GetRequestURL(requestURL string, meta *util.RelayMeta, textRequest *openai.GeneralOpenAIRequest) (string, error) {
|
||||
fullRequestURL := util.GetFullRequestURL(meta.BaseURL, requestURL, meta.ChannelType)
|
||||
switch apiType {
|
||||
switch meta.APIType {
|
||||
case constant.APITypeOpenAI:
|
||||
if meta.ChannelType == common.ChannelTypeAzure {
|
||||
// https://learn.microsoft.com/en-us/azure/cognitive-services/openai/chatgpt-quickstart?pivots=rest-api&tabs=command-line#rest-api
|
||||
@@ -81,7 +81,7 @@ func GetRequestURL(requestURL string, apiType int, relayMode int, meta *util.Rel
|
||||
fullRequestURL = fmt.Sprintf("https://open.bigmodel.cn/api/paas/v3/model-api/%s/%s", textRequest.Model, method)
|
||||
case constant.APITypeAli:
|
||||
fullRequestURL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
|
||||
if relayMode == constant.RelayModeEmbeddings {
|
||||
if meta.Mode == constant.RelayModeEmbeddings {
|
||||
fullRequestURL = "https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding"
|
||||
}
|
||||
case constant.APITypeTencent:
|
||||
@@ -191,8 +191,8 @@ func GetRequestBody(c *gin.Context, textRequest openai.GeneralOpenAIRequest, isM
|
||||
return requestBody, nil
|
||||
}
|
||||
|
||||
func SetupRequestHeaders(c *gin.Context, req *http.Request, apiType int, meta *util.RelayMeta, isStream bool) {
|
||||
SetupAuthHeaders(c, req, apiType, meta, isStream)
|
||||
func SetupRequestHeaders(c *gin.Context, req *http.Request, meta *util.RelayMeta, isStream bool) {
|
||||
SetupAuthHeaders(c, req, meta, isStream)
|
||||
req.Header.Set("Content-Type", c.Request.Header.Get("Content-Type"))
|
||||
req.Header.Set("Accept", c.Request.Header.Get("Accept"))
|
||||
if isStream && c.Request.Header.Get("Accept") == "" {
|
||||
@@ -200,9 +200,9 @@ func SetupRequestHeaders(c *gin.Context, req *http.Request, apiType int, meta *u
|
||||
}
|
||||
}
|
||||
|
||||
func SetupAuthHeaders(c *gin.Context, req *http.Request, apiType int, meta *util.RelayMeta, isStream bool) {
|
||||
func SetupAuthHeaders(c *gin.Context, req *http.Request, meta *util.RelayMeta, isStream bool) {
|
||||
apiKey := meta.APIKey
|
||||
switch apiType {
|
||||
switch meta.APIType {
|
||||
case constant.APITypeOpenAI:
|
||||
if meta.ChannelType == common.ChannelTypeAzure {
|
||||
req.Header.Set("api-key", apiKey)
|
||||
@@ -1,115 +1,61 @@
|
||||
package controller
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/gin-gonic/gin"
|
||||
"math"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
"one-api/common/config"
|
||||
"one-api/common/logger"
|
||||
"one-api/model"
|
||||
"one-api/relay/channel/openai"
|
||||
"one-api/relay/constant"
|
||||
"one-api/relay/util"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func RelayTextHelper(c *gin.Context, relayMode int) *openai.ErrorWithStatusCode {
|
||||
func RelayTextHelper(c *gin.Context) *openai.ErrorWithStatusCode {
|
||||
ctx := c.Request.Context()
|
||||
meta := util.GetRelayMeta(c)
|
||||
var textRequest openai.GeneralOpenAIRequest
|
||||
err := common.UnmarshalBodyReusable(c, &textRequest)
|
||||
if err != nil {
|
||||
return openai.ErrorWrapper(err, "bind_request_body_failed", http.StatusBadRequest)
|
||||
}
|
||||
if relayMode == constant.RelayModeModerations && textRequest.Model == "" {
|
||||
textRequest.Model = "text-moderation-latest"
|
||||
}
|
||||
if relayMode == constant.RelayModeEmbeddings && textRequest.Model == "" {
|
||||
textRequest.Model = c.Param("model")
|
||||
}
|
||||
err = util.ValidateTextRequest(&textRequest, relayMode)
|
||||
// get & validate textRequest
|
||||
textRequest, err := getAndValidateTextRequest(c, meta.Mode)
|
||||
if err != nil {
|
||||
logger.Errorf(ctx, "getAndValidateTextRequest failed: %s", err.Error())
|
||||
return openai.ErrorWrapper(err, "invalid_text_request", http.StatusBadRequest)
|
||||
}
|
||||
// map model name
|
||||
var isModelMapped bool
|
||||
textRequest.Model, isModelMapped = util.GetMappedModelName(textRequest.Model, meta.ModelMapping)
|
||||
apiType := constant.ChannelType2APIType(meta.ChannelType)
|
||||
fullRequestURL, err := GetRequestURL(c.Request.URL.String(), apiType, relayMode, meta, &textRequest)
|
||||
if err != nil {
|
||||
logger.Error(ctx, fmt.Sprintf("util.GetRequestURL failed: %s", err.Error()))
|
||||
return openai.ErrorWrapper(fmt.Errorf("util.GetRequestURL failed"), "get_request_url_failed", http.StatusInternalServerError)
|
||||
}
|
||||
var promptTokens int
|
||||
var completionTokens int
|
||||
switch relayMode {
|
||||
case constant.RelayModeChatCompletions:
|
||||
promptTokens = openai.CountTokenMessages(textRequest.Messages, textRequest.Model)
|
||||
case constant.RelayModeCompletions:
|
||||
promptTokens = openai.CountTokenInput(textRequest.Prompt, textRequest.Model)
|
||||
case constant.RelayModeModerations:
|
||||
promptTokens = openai.CountTokenInput(textRequest.Input, textRequest.Model)
|
||||
}
|
||||
preConsumedTokens := config.PreConsumedQuota
|
||||
if textRequest.MaxTokens != 0 {
|
||||
preConsumedTokens = promptTokens + textRequest.MaxTokens
|
||||
}
|
||||
// get model ratio & group ratio
|
||||
modelRatio := common.GetModelRatio(textRequest.Model)
|
||||
groupRatio := common.GetGroupRatio(meta.Group)
|
||||
ratio := modelRatio * groupRatio
|
||||
preConsumedQuota := int(float64(preConsumedTokens) * ratio)
|
||||
userQuota, err := model.CacheGetUserQuota(meta.UserId)
|
||||
if err != nil {
|
||||
return openai.ErrorWrapper(err, "get_user_quota_failed", http.StatusInternalServerError)
|
||||
// pre-consume quota
|
||||
promptTokens := getPromptTokens(textRequest, meta.Mode)
|
||||
preConsumedQuota, bizErr := preConsumeQuota(ctx, textRequest, promptTokens, ratio, meta)
|
||||
if bizErr != nil {
|
||||
logger.Warnf(ctx, "preConsumeQuota failed: %+v", *bizErr)
|
||||
return bizErr
|
||||
}
|
||||
if userQuota-preConsumedQuota < 0 {
|
||||
return openai.ErrorWrapper(errors.New("user quota is not enough"), "insufficient_user_quota", http.StatusForbidden)
|
||||
}
|
||||
err = model.CacheDecreaseUserQuota(meta.UserId, preConsumedQuota)
|
||||
if err != nil {
|
||||
return openai.ErrorWrapper(err, "decrease_user_quota_failed", http.StatusInternalServerError)
|
||||
}
|
||||
if userQuota > 100*preConsumedQuota {
|
||||
// in this case, we do not pre-consume quota
|
||||
// because the user has enough quota
|
||||
preConsumedQuota = 0
|
||||
logger.Info(c.Request.Context(), fmt.Sprintf("user %d has enough quota %d, trusted and no need to pre-consume", meta.UserId, userQuota))
|
||||
}
|
||||
if preConsumedQuota > 0 {
|
||||
err := model.PreConsumeTokenQuota(meta.TokenId, preConsumedQuota)
|
||||
if err != nil {
|
||||
return openai.ErrorWrapper(err, "pre_consume_token_quota_failed", http.StatusForbidden)
|
||||
}
|
||||
}
|
||||
requestBody, err := GetRequestBody(c, textRequest, isModelMapped, apiType, relayMode)
|
||||
|
||||
// get request body
|
||||
requestBody, err := GetRequestBody(c, *textRequest, isModelMapped, meta.APIType, meta.Mode)
|
||||
if err != nil {
|
||||
return openai.ErrorWrapper(err, "get_request_body_failed", http.StatusInternalServerError)
|
||||
}
|
||||
var req *http.Request
|
||||
// do request
|
||||
var resp *http.Response
|
||||
isStream := textRequest.Stream
|
||||
if meta.APIType != constant.APITypeXunfei { // cause xunfei use websocket
|
||||
fullRequestURL, err := GetRequestURL(c.Request.URL.String(), meta, textRequest)
|
||||
if err != nil {
|
||||
logger.Error(ctx, fmt.Sprintf("util.GetRequestURL failed: %s", err.Error()))
|
||||
return openai.ErrorWrapper(fmt.Errorf("util.GetRequestURL failed"), "get_request_url_failed", http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
if apiType != constant.APITypeXunfei { // cause xunfei use websocket
|
||||
req, err = http.NewRequest(c.Request.Method, fullRequestURL, requestBody)
|
||||
if err != nil {
|
||||
return openai.ErrorWrapper(err, "new_request_failed", http.StatusInternalServerError)
|
||||
}
|
||||
SetupRequestHeaders(c, req, apiType, meta, isStream)
|
||||
resp, err = util.HTTPClient.Do(req)
|
||||
resp, err = doRequest(ctx, c, meta, isStream, fullRequestURL, requestBody)
|
||||
if err != nil {
|
||||
logger.Errorf(ctx, "doRequest failed: %s", err.Error())
|
||||
return openai.ErrorWrapper(err, "do_request_failed", http.StatusInternalServerError)
|
||||
}
|
||||
err = req.Body.Close()
|
||||
if err != nil {
|
||||
return openai.ErrorWrapper(err, "close_request_body_failed", http.StatusInternalServerError)
|
||||
}
|
||||
err = c.Request.Body.Close()
|
||||
if err != nil {
|
||||
return openai.ErrorWrapper(err, "close_request_body_failed", http.StatusInternalServerError)
|
||||
}
|
||||
isStream = isStream || strings.HasPrefix(resp.Header.Get("Content-Type"), "text/event-stream")
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
@@ -117,57 +63,14 @@ func RelayTextHelper(c *gin.Context, relayMode int) *openai.ErrorWithStatusCode
|
||||
return util.RelayErrorHandler(resp)
|
||||
}
|
||||
}
|
||||
|
||||
var respErr *openai.ErrorWithStatusCode
|
||||
var usage *openai.Usage
|
||||
|
||||
defer func(ctx context.Context) {
|
||||
// Why we use defer here? Because if error happened, we will have to return the pre-consumed quota.
|
||||
if respErr != nil {
|
||||
logger.Errorf(ctx, "respErr is not nil: %+v", respErr)
|
||||
util.ReturnPreConsumedQuota(ctx, preConsumedQuota, meta.TokenId)
|
||||
return
|
||||
}
|
||||
if usage == nil {
|
||||
logger.Error(ctx, "usage is nil, which is unexpected")
|
||||
return
|
||||
}
|
||||
|
||||
go func() {
|
||||
quota := 0
|
||||
completionRatio := common.GetCompletionRatio(textRequest.Model)
|
||||
promptTokens = usage.PromptTokens
|
||||
completionTokens = usage.CompletionTokens
|
||||
quota = int(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
|
||||
if ratio != 0 && quota <= 0 {
|
||||
quota = 1
|
||||
}
|
||||
totalTokens := promptTokens + completionTokens
|
||||
if totalTokens == 0 {
|
||||
// in this case, must be some error happened
|
||||
// we cannot just return, because we may have to return the pre-consumed quota
|
||||
quota = 0
|
||||
}
|
||||
quotaDelta := quota - preConsumedQuota
|
||||
err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
|
||||
if err != nil {
|
||||
logger.Error(ctx, "error consuming token remain quota: "+err.Error())
|
||||
}
|
||||
err = model.CacheUpdateUserQuota(meta.UserId)
|
||||
if err != nil {
|
||||
logger.Error(ctx, "error update user quota cache: "+err.Error())
|
||||
}
|
||||
if quota != 0 {
|
||||
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio)
|
||||
model.RecordConsumeLog(ctx, meta.UserId, meta.ChannelId, promptTokens, completionTokens, textRequest.Model, meta.TokenName, quota, logContent)
|
||||
model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
|
||||
model.UpdateChannelUsedQuota(meta.ChannelId, quota)
|
||||
}
|
||||
}()
|
||||
}(ctx)
|
||||
usage, respErr = DoResponse(c, &textRequest, resp, relayMode, apiType, isStream, promptTokens)
|
||||
// do response
|
||||
usage, respErr := DoResponse(c, textRequest, resp, meta.Mode, meta.APIType, isStream, promptTokens)
|
||||
if respErr != nil {
|
||||
logger.Errorf(ctx, "respErr is not nil: %+v", respErr)
|
||||
util.ReturnPreConsumedQuota(ctx, preConsumedQuota, meta.TokenId)
|
||||
return respErr
|
||||
}
|
||||
// post-consume quota
|
||||
go postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -3,10 +3,12 @@ package util
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"one-api/common"
|
||||
"one-api/relay/constant"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type RelayMeta struct {
|
||||
Mode int
|
||||
ChannelType int
|
||||
ChannelId int
|
||||
TokenId int
|
||||
@@ -17,11 +19,13 @@ type RelayMeta struct {
|
||||
BaseURL string
|
||||
APIVersion string
|
||||
APIKey string
|
||||
APIType int
|
||||
Config map[string]string
|
||||
}
|
||||
|
||||
func GetRelayMeta(c *gin.Context) *RelayMeta {
|
||||
meta := RelayMeta{
|
||||
Mode: constant.Path2RelayMode(c.Request.URL.Path),
|
||||
ChannelType: c.GetInt("channel"),
|
||||
ChannelId: c.GetInt("channel_id"),
|
||||
TokenId: c.GetInt("token_id"),
|
||||
@@ -40,5 +44,6 @@ func GetRelayMeta(c *gin.Context) *RelayMeta {
|
||||
if meta.BaseURL == "" {
|
||||
meta.BaseURL = common.ChannelBaseURLs[meta.ChannelType]
|
||||
}
|
||||
meta.APIType = constant.ChannelType2APIType(meta.ChannelType)
|
||||
return &meta
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ const OperationSetting = () => {
|
||||
QuotaRemindThreshold: 0,
|
||||
PreConsumedQuota: 0,
|
||||
ModelRatio: "",
|
||||
CompletionRatio: "",
|
||||
GroupRatio: "",
|
||||
TopUpLink: "",
|
||||
ChatLink: "",
|
||||
@@ -52,9 +53,12 @@ const OperationSetting = () => {
|
||||
if (success) {
|
||||
let newInputs = {};
|
||||
data.forEach((item) => {
|
||||
if (item.key === "ModelRatio" || item.key === "GroupRatio") {
|
||||
if (item.key === "ModelRatio" || item.key === "GroupRatio" || item.key === "CompletionRatio") {
|
||||
item.value = JSON.stringify(JSON.parse(item.value), null, 2);
|
||||
}
|
||||
if (item.value === '{}') {
|
||||
item.value = '';
|
||||
}
|
||||
newInputs[item.key] = item.value;
|
||||
});
|
||||
setInputs(newInputs);
|
||||
@@ -133,6 +137,13 @@ const OperationSetting = () => {
|
||||
}
|
||||
await updateOption("GroupRatio", inputs.GroupRatio);
|
||||
}
|
||||
if (originInputs['CompletionRatio'] !== inputs.CompletionRatio) {
|
||||
if (!verifyJSON(inputs.CompletionRatio)) {
|
||||
showError('补全倍率不是合法的 JSON 字符串');
|
||||
return;
|
||||
}
|
||||
await updateOption('CompletionRatio', inputs.CompletionRatio);
|
||||
}
|
||||
break;
|
||||
case "quota":
|
||||
if (originInputs["QuotaForNewUser"] !== inputs.QuotaForNewUser) {
|
||||
@@ -500,7 +511,20 @@ const OperationSetting = () => {
|
||||
placeholder="为一个 JSON 文本,键为模型名称,值为倍率"
|
||||
/>
|
||||
</FormControl>
|
||||
|
||||
<FormControl fullWidth>
|
||||
<TextField
|
||||
multiline
|
||||
maxRows={15}
|
||||
id="channel-CompletionRatio-label"
|
||||
label="补全倍率"
|
||||
value={inputs.CompletionRatio}
|
||||
name="CompletionRatio"
|
||||
onChange={handleInputChange}
|
||||
aria-describedby="helper-text-channel-CompletionRatio-label"
|
||||
minRows={5}
|
||||
placeholder="为一个 JSON 文本,键为模型名称,值为倍率,此处的倍率设置是模型补全倍率相较于提示倍率的比例,使用该设置可强制覆盖 One API 的内部比例"
|
||||
/>
|
||||
</FormControl>
|
||||
<FormControl fullWidth>
|
||||
<TextField
|
||||
multiline
|
||||
|
||||
@@ -192,7 +192,7 @@ export default function TokensTableRow({ item, manageToken, handleOpenModal, set
|
||||
id={`switch-${item.id}`}
|
||||
checked={statusSwitch === 1}
|
||||
onChange={handleStatus}
|
||||
// disabled={statusSwitch !== 1 && statusSwitch !== 2}
|
||||
// disabled={statusSwitch !== 1 && statusSwitch !== 2}
|
||||
/>
|
||||
</Tooltip>
|
||||
</TableCell>
|
||||
@@ -222,7 +222,7 @@ export default function TokensTableRow({ item, manageToken, handleOpenModal, set
|
||||
</Button>
|
||||
</ButtonGroup>
|
||||
<ButtonGroup size="small" aria-label="split button">
|
||||
<Button color="primary">聊天</Button>
|
||||
<Button color="primary" onClick={(e) => handleCopy(COPY_OPTIONS[0], 'link')}>聊天</Button>
|
||||
<Button size="small" onClick={(e) => handleOpenMenu(e, 'link')}>
|
||||
<IconCaretDownFilled size={'16px'} />
|
||||
</Button>
|
||||
|
||||
@@ -11,6 +11,7 @@ const OperationSetting = () => {
|
||||
QuotaRemindThreshold: 0,
|
||||
PreConsumedQuota: 0,
|
||||
ModelRatio: '',
|
||||
CompletionRatio: '',
|
||||
GroupRatio: '',
|
||||
TopUpLink: '',
|
||||
ChatLink: '',
|
||||
@@ -34,9 +35,12 @@ const OperationSetting = () => {
|
||||
if (success) {
|
||||
let newInputs = {};
|
||||
data.forEach((item) => {
|
||||
if (item.key === 'ModelRatio' || item.key === 'GroupRatio') {
|
||||
if (item.key === 'ModelRatio' || item.key === 'GroupRatio' || item.key === 'CompletionRatio') {
|
||||
item.value = JSON.stringify(JSON.parse(item.value), null, 2);
|
||||
}
|
||||
if (item.value === '{}') {
|
||||
item.value = '';
|
||||
}
|
||||
newInputs[item.key] = item.value;
|
||||
});
|
||||
setInputs(newInputs);
|
||||
@@ -101,6 +105,13 @@ const OperationSetting = () => {
|
||||
}
|
||||
await updateOption('GroupRatio', inputs.GroupRatio);
|
||||
}
|
||||
if (originInputs['CompletionRatio'] !== inputs.CompletionRatio) {
|
||||
if (!verifyJSON(inputs.CompletionRatio)) {
|
||||
showError('补全倍率不是合法的 JSON 字符串');
|
||||
return;
|
||||
}
|
||||
await updateOption('CompletionRatio', inputs.CompletionRatio);
|
||||
}
|
||||
break;
|
||||
case 'quota':
|
||||
if (originInputs['QuotaForNewUser'] !== inputs.QuotaForNewUser) {
|
||||
@@ -271,10 +282,10 @@ const OperationSetting = () => {
|
||||
onChange={handleInputChange}
|
||||
/>
|
||||
<Form.Checkbox
|
||||
checked={inputs.AutomaticEnableChannelEnabled === 'true'}
|
||||
label='成功时自动启用通道'
|
||||
name='AutomaticEnableChannelEnabled'
|
||||
onChange={handleInputChange}
|
||||
checked={inputs.AutomaticEnableChannelEnabled === 'true'}
|
||||
label='成功时自动启用通道'
|
||||
name='AutomaticEnableChannelEnabled'
|
||||
onChange={handleInputChange}
|
||||
/>
|
||||
</Form.Group>
|
||||
<Form.Button onClick={() => {
|
||||
@@ -344,6 +355,17 @@ const OperationSetting = () => {
|
||||
placeholder='为一个 JSON 文本,键为模型名称,值为倍率'
|
||||
/>
|
||||
</Form.Group>
|
||||
<Form.Group widths='equal'>
|
||||
<Form.TextArea
|
||||
label='补全倍率'
|
||||
name='CompletionRatio'
|
||||
onChange={handleInputChange}
|
||||
style={{ minHeight: 250, fontFamily: 'JetBrains Mono, Consolas' }}
|
||||
autoComplete='new-password'
|
||||
value={inputs.CompletionRatio}
|
||||
placeholder='为一个 JSON 文本,键为模型名称,值为倍率,此处的倍率设置是模型补全倍率相较于提示倍率的比例,使用该设置可强制覆盖 One API 的内部比例'
|
||||
/>
|
||||
</Form.Group>
|
||||
<Form.Group widths='equal'>
|
||||
<Form.TextArea
|
||||
label='分组倍率'
|
||||
|
||||
Reference in New Issue
Block a user