create transactions from AI receipt image recognition results

This commit is contained in:
MaysWind
2025-09-21 04:00:56 +08:00
parent 00f1d0418f
commit 5d88287ae2
50 changed files with 2356 additions and 22 deletions

View File

@@ -30,6 +30,7 @@ Live Demo: [https://ezbookkeeping-demo.mayswind.net](https://ezbookkeeping-demo.
- PWA support for native-like mobile experience
- Dark mode
- **AI-Powered Features**
- Receipt image recognition
- Supports MCP (Model Context Protocol) for AI integration
- **Powerful Bookkeeping**
- Two-level accounts and categories

View File

@@ -9,6 +9,7 @@ import (
"github.com/mayswind/ezbookkeeping/pkg/datastore"
"github.com/mayswind/ezbookkeeping/pkg/duplicatechecker"
"github.com/mayswind/ezbookkeeping/pkg/exchangerates"
"github.com/mayswind/ezbookkeeping/pkg/llm"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/mail"
"github.com/mayswind/ezbookkeeping/pkg/settings"
@@ -90,6 +91,15 @@ func initializeSystem(c *core.CliContext) (*settings.Config, error) {
return nil, err
}
err = llm.InitializeLargeLanguageModelProvider(config)
if err != nil {
if !isDisableBootLog {
log.BootErrorf(c, "[initializer.initializeSystem] initializes large language model provider failed, because %s", err.Error())
}
return nil, err
}
err = uuid.InitializeUuidGenerator(config)
if err != nil {
@@ -155,6 +165,9 @@ func getConfigWithoutSensitiveData(config *settings.Config) *settings.Config {
clonedConfig.DatabaseConfig.DatabasePassword = "****"
clonedConfig.SMTPConfig.SMTPPasswd = "****"
clonedConfig.MinIOConfig.SecretAccessKey = "****"
clonedConfig.OpenAIAPIKey = "****"
clonedConfig.OpenAICompatibleAPIKey = "****"
clonedConfig.OpenRouterAPIKey = "****"
clonedConfig.SecretKey = "****"
clonedConfig.AmapApplicationSecret = "****"

View File

@@ -396,6 +396,13 @@ func startWebServer(c *core.CliContext) error {
apiV1Route.POST("/transaction/templates/move.json", bindApi(api.TransactionTemplates.TemplateMoveHandler))
apiV1Route.POST("/transaction/templates/delete.json", bindApi(api.TransactionTemplates.TemplateDeleteHandler))
// Large Language Models
if config.LLMProvider != "" {
if config.TransactionFromAIImageRecognition {
apiV1Route.POST("/llm/transactions/recognize_receipt_image.json", bindApi(api.LargeLanguageModels.RecognizeReceiptImageHandler))
}
}
// Exchange Rates
apiV1Route.GET("/exchange_rates/latest.json", bindApi(api.ExchangeRates.LatestExchangeRateHandler))
apiV1Route.POST("/exchange_rates/user_custom/update.json", bindApi(api.ExchangeRates.UserCustomExchangeRateUpdateHandler))

View File

@@ -164,6 +164,53 @@ webdav_proxy = system
# For "webdav" storage only, set to true to skip tls verification when connect webdav
webdav_skip_tls_verify = false
[llm]
# Large Language Model (LLM) provider, supports the following types: "openai", "openai_compatible", "openrouter", "ollama"
llm_provider =
# For "openai" llm provider only, OpenAI API secret key, please visit https://platform.openai.com/api-keys for more information
openai_api_key =
# For "openai" llm provider only, receipt image recognition model for creating transactions from images
openai_receipt_image_recognition_model_id =
# For "openai_compatible" llm provider only, OpenAI compatible API base url, e.g. "https://api.openai.com/v1/"
openai_compatible_base_url =
# For "openai_compatible" llm provider only, OpenAI compatible API secret key
openai_compatible_api_key =
# For "openai_compatible" llm provider only, receipt image recognition model for creating transactions from images
openai_compatible_receipt_image_recognition_model_id =
# For "openrouter" llm provider only, OpenRouter API key, please visit https://openrouter.ai/settings/keys for more information
openrouter_api_key =
# For "openrouter" llm provider only, receipt image recognition model for creating transactions from images
openrouter_receipt_image_recognition_model_id =
# For "ollama" llm provider only, Ollama server url, e.g. "http://127.0.0.1:11434/"
ollama_server_url =
# For "ollama" llm provider only, receipt image recognition model for creating transactions from images
ollama_receipt_image_recognition_model_id =
# Set to true to enable creating transactions from AI image recognition results, requires llm_provider and its related receipt image recognition model to be configured properly
transaction_from_ai_image_recognition = false
# Maximum allowed AI recognition picture file size (1 - 4294967295 bytes)
max_ai_recognition_picture_size = 10485760
# Requesting large language model api timeout (0 - 4294967295 milliseconds)
# Set to 0 to disable timeout for requesting large language model api, default is 60000 (60 seconds)
request_timeout = 60000
# Proxy for ezbookkeeping server requesting large language model api, supports "system" (use system proxy), "none" (do not use proxy), or proxy URL which starts with "http://", "https://" or "socks5://", default is "system"
proxy = system
# Set to true to skip tls verification when request large language model api
skip_tls_verify = false
[uuid]
# Uuid generator type, supports "internal" currently
generator_type = internal

View File

@@ -0,0 +1,345 @@
package api
import (
"bytes"
"encoding/json"
"io"
"strings"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/llm"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/models"
"github.com/mayswind/ezbookkeeping/pkg/services"
"github.com/mayswind/ezbookkeeping/pkg/settings"
"github.com/mayswind/ezbookkeeping/pkg/templates"
"github.com/mayswind/ezbookkeeping/pkg/utils"
)
// LargeLanguageModelsApi represents large language models api
type LargeLanguageModelsApi struct {
ApiUsingConfig
transactionCategories *services.TransactionCategoryService
transactionTags *services.TransactionTagService
accounts *services.AccountService
users *services.UserService
}
// Initialize a large language models api singleton instance
var (
LargeLanguageModels = &LargeLanguageModelsApi{
ApiUsingConfig: ApiUsingConfig{
container: settings.Container,
},
transactionCategories: services.TransactionCategories,
transactionTags: services.TransactionTags,
accounts: services.Accounts,
users: services.Users,
}
)
// RecognizeReceiptImageHandler returns the recognized receipt image result
func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext) (any, *errs.Error) {
if a.CurrentConfig().LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition {
return nil, errs.ErrLargeLanguageModelProviderNotEnabled
}
utcOffset, err := c.GetClientTimezoneOffset()
if err != nil {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] cannot get client timezone offset, because %s", err.Error())
return nil, errs.ErrClientTimezoneOffsetInvalid
}
uid := c.GetCurrentUid()
user, err := a.users.GetUserById(c, uid)
if err != nil {
if !errs.IsCustomError(err) {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get user for user \"uid:%d\", because %s", uid, err.Error())
}
return false, errs.ErrUserNotFound
}
if user.FeatureRestriction.Contains(core.USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION) {
return false, errs.ErrNotPermittedToPerformThisAction
}
form, err := c.MultipartForm()
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get multi-part form data for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrParameterInvalid
}
imageFiles := form.File["image"]
if len(imageFiles) < 1 {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] there is no image in request for user \"uid:%d\"", uid)
return nil, errs.ErrNoAIRecognitionImage
}
if imageFiles[0].Size < 1 {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the size of image in request is zero for user \"uid:%d\"", uid)
return nil, errs.ErrAIRecognitionImageIsEmpty
}
if imageFiles[0].Size > int64(a.CurrentConfig().MaxAIRecognitionPictureFileSize) {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the upload file size \"%d\" exceeds the maximum size \"%d\" of image for user \"uid:%d\"", imageFiles[0].Size, a.CurrentConfig().MaxAIRecognitionPictureFileSize, uid)
return nil, errs.ErrExceedMaxAIRecognitionImageFileSize
}
fileExtension := utils.GetFileNameExtension(imageFiles[0].Filename)
if utils.GetImageContentType(fileExtension) == "" {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the file extension \"%s\" of image in request is not supported for user \"uid:%d\"", fileExtension, uid)
return nil, errs.ErrImageTypeNotSupported
}
imageFile, err := imageFiles[0].Open()
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get image file from request for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
defer imageFile.Close()
imageData, err := io.ReadAll(imageFile)
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to read image file from request for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
accounts, err := a.accounts.GetAllAccountsByUid(c, uid)
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get all accounts for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.Or(err, errs.ErrOperationFailed)
}
accountMap := a.accounts.GetVisibleAccountNameMapByList(accounts)
accountNames := make([]string, 0, len(accounts))
for i := 0; i < len(accounts); i++ {
if accounts[i].Hidden || accounts[i].Type == models.ACCOUNT_TYPE_MULTI_SUB_ACCOUNTS {
continue
}
accountNames = append(accountNames, accounts[i].Name)
}
categories, err := a.transactionCategories.GetAllCategoriesByUid(c, uid, 0, -1)
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get categories for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.Or(err, errs.ErrOperationFailed)
}
incomeCategoryMap := make(map[string]*models.TransactionCategory)
incomeCategoryNames := make([]string, 0)
expenseCategoryMap := make(map[string]*models.TransactionCategory)
expenseCategoryNames := make([]string, 0)
transferCategoryMap := make(map[string]*models.TransactionCategory)
transferCategoryNames := make([]string, 0)
for i := 0; i < len(categories); i++ {
category := categories[i]
if category.Hidden || category.ParentCategoryId == models.LevelOneTransactionCategoryParentId {
continue
}
if category.Type == models.CATEGORY_TYPE_INCOME {
incomeCategoryMap[category.Name] = category
incomeCategoryNames = append(incomeCategoryNames, category.Name)
} else if category.Type == models.CATEGORY_TYPE_EXPENSE {
expenseCategoryMap[category.Name] = category
expenseCategoryNames = append(expenseCategoryNames, category.Name)
} else if category.Type == models.CATEGORY_TYPE_TRANSFER {
transferCategoryMap[category.Name] = category
transferCategoryNames = append(transferCategoryNames, category.Name)
}
}
tags, err := a.transactionTags.GetAllTagsByUid(c, uid)
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get tags for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.Or(err, errs.ErrOperationFailed)
}
tagMap := a.transactionTags.GetVisibleTagNameMapByList(tags)
tagNames := make([]string, 0, len(tags))
for i := 0; i < len(tags); i++ {
if tags[i].Hidden {
continue
}
tagNames = append(tagNames, tags[i].Name)
}
systemPrompt, err := templates.GetTemplate(templates.SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION)
if err != nil {
return nil, errs.Or(err, errs.ErrOperationFailed)
}
systemPromptParams := map[string]any{
"AllExpenseCategoryNames": strings.Join(expenseCategoryNames, "\n"),
"AllIncomeCategoryNames": strings.Join(incomeCategoryNames, "\n"),
"AllTransferCategoryNames": strings.Join(transferCategoryNames, "\n"),
"AllAccountNames": strings.Join(accountNames, "\n"),
"AllTagNames": strings.Join(tagNames, "\n"),
}
var bodyBuffer bytes.Buffer
err = systemPrompt.Execute(&bodyBuffer, systemPromptParams)
if err != nil {
return nil, errs.Or(err, errs.ErrOperationFailed)
}
llmRequest := &llm.LargeLanguageModelRequest{
Stream: false,
SystemPrompt: strings.ReplaceAll(bodyBuffer.String(), "\r\n", "\n"),
UserPrompt: imageData,
UserPromptType: llm.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
}
llmResponse, err := llm.Container.GetJsonResponseByReceiptImageRecognitionModel(c, c.GetCurrentUid(), a.CurrentConfig(), llmRequest)
if err != nil {
return nil, errs.Or(err, errs.ErrOperationFailed)
}
var result *models.RecognizedReceiptImageResult
if err := json.Unmarshal([]byte(llmResponse.Content), &result); err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to unmarshal recognized receipt image result from llm response \"%s\" for user \"uid:%d\", because %s", llmResponse.Content, uid, err.Error())
return nil, errs.Or(err, errs.ErrOperationFailed)
}
return a.parseRecognizedReceiptImageResponse(c, uid, utcOffset, result, accountMap, expenseCategoryMap, incomeCategoryMap, transferCategoryMap, tagMap)
}
func (a *LargeLanguageModelsApi) parseRecognizedReceiptImageResponse(c *core.WebContext, uid int64, utcOffset int16, recognizedResult *models.RecognizedReceiptImageResult, accountMap map[string]*models.Account, expenseCategoryMap map[string]*models.TransactionCategory, incomeCategoryMap map[string]*models.TransactionCategory, transferCategoryMap map[string]*models.TransactionCategory, tagMap map[string]*models.TransactionTag) (*models.RecognizedReceiptImageResponse, *errs.Error) {
recognizedReceiptImageResponse := &models.RecognizedReceiptImageResponse{
Type: models.TRANSACTION_TYPE_EXPENSE,
}
if recognizedResult == nil {
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed result is null")
return nil, errs.ErrOperationFailed
}
if recognizedResult.Type == "income" {
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_INCOME
if len(recognizedResult.CategoryName) > 0 {
category, exists := incomeCategoryMap[recognizedResult.CategoryName]
if exists {
recognizedReceiptImageResponse.CategoryId = category.CategoryId
}
}
} else if recognizedResult.Type == "expense" {
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_EXPENSE
if len(recognizedResult.CategoryName) > 0 {
category, exists := expenseCategoryMap[recognizedResult.CategoryName]
if exists {
recognizedReceiptImageResponse.CategoryId = category.CategoryId
}
}
} else if recognizedResult.Type == "transfer" {
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_TRANSFER
if len(recognizedResult.CategoryName) > 0 {
category, exists := transferCategoryMap[recognizedResult.CategoryName]
if exists {
recognizedReceiptImageResponse.CategoryId = category.CategoryId
}
}
} else {
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed transaction type \"%s\" is invalid", recognizedResult.Type)
return nil, errs.ErrOperationFailed
}
if len(recognizedResult.Time) > 0 {
timestamp, err := utils.ParseFromLongDateTime(recognizedResult.Time, utcOffset)
if err != nil {
log.Warnf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed time \"%s\" is invalid", recognizedResult.Time)
} else {
recognizedReceiptImageResponse.Time = timestamp.Unix()
}
}
if len(recognizedResult.Amount) > 0 {
amount, err := utils.ParseAmount(recognizedResult.Amount)
if err != nil {
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed amount \"%s\" is invalid", recognizedResult.Amount)
return nil, errs.ErrOperationFailed
}
recognizedReceiptImageResponse.SourceAmount = amount
if recognizedReceiptImageResponse.Type == models.TRANSACTION_TYPE_TRANSFER && len(recognizedResult.DestinationAmount) > 0 {
destinationAmount, err := utils.ParseAmount(recognizedResult.DestinationAmount)
if err != nil {
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed destination amount \"%s\" is invalid", recognizedResult.DestinationAmount)
return nil, errs.ErrOperationFailed
}
recognizedReceiptImageResponse.DestinationAmount = destinationAmount
}
}
if len(recognizedResult.AccountName) > 0 {
account, exists := accountMap[recognizedResult.AccountName]
if exists {
recognizedReceiptImageResponse.SourceAccountId = account.AccountId
}
}
if len(recognizedResult.DestinationAccountName) > 0 {
account, exists := accountMap[recognizedResult.DestinationAccountName]
if exists {
recognizedReceiptImageResponse.DestinationAccountId = account.AccountId
}
}
if len(recognizedResult.TagNames) > 0 {
tagIds := make([]string, 0, len(recognizedResult.TagNames))
for i := 0; i < len(recognizedResult.TagNames); i++ {
tagName := recognizedResult.TagNames[i]
tag, exists := tagMap[tagName]
if exists {
tagIds = append(tagIds, utils.Int64ToString(tag.TagId))
}
}
recognizedReceiptImageResponse.TagIds = tagIds
}
if len(recognizedResult.Description) > 0 {
recognizedReceiptImageResponse.Comment = recognizedResult.Description
}
return recognizedReceiptImageResponse, nil
}

View File

@@ -47,6 +47,12 @@ func (a *ServerSettingsApi) ServerSettingsJavascriptHandler(c *core.WebContext)
a.appendBooleanSetting(builder, "mcp", config.EnableMCPServer)
}
if config.LLMProvider != "" {
if config.TransactionFromAIImageRecognition {
a.appendBooleanSetting(builder, "llmt", config.TransactionFromAIImageRecognition)
}
}
if config.LoginPageTips.Enabled {
a.appendMultiLanguageTipSetting(builder, "lpt", config.LoginPageTips)
}

View File

@@ -76,19 +76,20 @@ type UserFeatureRestrictionType uint64
// User Feature Restriction Type
const (
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1
USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3
USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4
USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5
USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6
USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7
USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8
USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9
USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10
USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11
USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12
USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1
USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3
USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4
USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5
USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6
USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7
USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8
USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9
USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10
USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11
USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12
USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13
USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION UserFeatureRestrictionType = 14
)
const userFeatureRestrictionTypeMinValue UserFeatureRestrictionType = USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD

View File

@@ -40,6 +40,7 @@ const (
NormalSubcategoryConverter = 12
NormalSubcategoryUserCustomExchangeRate = 13
NormalSubcategoryModelContextProtocol = 14
NormalSubcategoryLargeLanguageModel = 15
)
// Error represents the specific error returned to user

View File

@@ -0,0 +1,11 @@
package errs
import "net/http"
// Error codes related to large language model features
var (
ErrLargeLanguageModelProviderNotEnabled = NewNormalError(NormalSubcategoryLargeLanguageModel, 0, http.StatusBadRequest, "llm provider is not enabled")
ErrNoAIRecognitionImage = NewNormalError(NormalSubcategoryLargeLanguageModel, 1, http.StatusBadRequest, "no image for AI recognition")
ErrAIRecognitionImageIsEmpty = NewNormalError(NormalSubcategoryLargeLanguageModel, 2, http.StatusBadRequest, "image for AI recognition is empty")
ErrExceedMaxAIRecognitionImageFileSize = NewNormalError(NormalSubcategoryLargeLanguageModel, 3, http.StatusBadRequest, "exceed the maximum size of image file for AI recognition")
)

View File

@@ -24,4 +24,6 @@ var (
ErrInvalidPasswordResetTokenExpiredTime = NewSystemError(SystemSubcategorySetting, 17, http.StatusInternalServerError, "invalid password reset token expired time")
ErrInvalidExchangeRatesDataSource = NewSystemError(SystemSubcategorySetting, 18, http.StatusInternalServerError, "invalid exchange rates data source")
ErrInvalidIpAddressPattern = NewSystemError(SystemSubcategorySetting, 19, http.StatusInternalServerError, "invalid ip address pattern")
ErrInvalidLLMProvider = NewSystemError(SystemSubcategorySetting, 20, http.StatusInternalServerError, "invalid llm provider")
ErrInvalidLLMModelId = NewSystemError(SystemSubcategorySetting, 21, http.StatusInternalServerError, "invalid llm model id")
)

View File

@@ -0,0 +1,91 @@
package llm
import (
"crypto/tls"
"io"
"net/http"
"time"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/settings"
"github.com/mayswind/ezbookkeeping/pkg/utils"
)
// HttpLargeLanguageModelProvider defines the structure of http large language model provider
type HttpLargeLanguageModelProvider interface {
// BuildTextualRequest returns the http request by the provider api definition
BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error)
// ParseTextualResponse returns the textual response entity by the provider api definition
ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error)
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
GetReceiptImageRecognitionModelID() string
}
// CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider
type CommonHttpLargeLanguageModelProvider struct {
LargeLanguageModelProvider
provider HttpLargeLanguageModelProvider
}
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the OpenAI common compatible large language model provider
func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
return p.getTextualResponse(c, uid, currentConfig, request, p.provider.GetReceiptImageRecognitionModelID(), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
}
func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
if modelId == "" {
return nil, errs.ErrInvalidLLMModelId
}
transport := http.DefaultTransport.(*http.Transport).Clone()
utils.SetProxyUrl(transport, currentConfig.LargeLanguageModelAPIProxy)
if currentConfig.LargeLanguageModelAPISkipTLSVerify {
transport.TLSClientConfig = &tls.Config{
InsecureSkipVerify: true,
}
}
client := &http.Client{
Transport: transport,
Timeout: time.Duration(currentConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond,
}
httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, modelId, responseType)
if err != nil {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
httpRequest.Header.Set("User-Agent", settings.GetUserAgent())
resp, err := client.Do(httpRequest)
if err != nil {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to request large language model api for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
log.Debugf(c, "[http_large_language_model_provider.getTextualResponse] response is %s", body)
if resp.StatusCode != 200 {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to get large language model api response for user \"uid:%d\", because response code is %d", uid, resp.StatusCode)
return nil, errs.ErrFailedToRequestRemoteApi
}
return p.provider.ParseTextualResponse(c, uid, body, responseType)
}
func newCommonHttpLargeLanguageModelProvider(provider HttpLargeLanguageModelProvider) *CommonHttpLargeLanguageModelProvider {
return &CommonHttpLargeLanguageModelProvider{
provider: provider,
}
}

View File

@@ -0,0 +1,33 @@
package llm
import "reflect"
type LargeLanguageModelRequestPromptType byte
// Large Language Model Request Prompt Type
const (
LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_TEXT LargeLanguageModelRequestPromptType = 0
LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL LargeLanguageModelRequestPromptType = 1
)
type LargeLanguageModelResponseFormat byte
// Large Language Model Response Format
const (
LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_TEXT LargeLanguageModelResponseFormat = 0
LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON LargeLanguageModelResponseFormat = 1
)
// LargeLanguageModelRequest represents a request to a large language model
type LargeLanguageModelRequest struct {
Stream bool
SystemPrompt string
UserPrompt []byte
UserPromptType LargeLanguageModelRequestPromptType
ResponseJsonObjectType reflect.Type
}
// LargeLanguageModelTextualResponse represents a textual response from a large language model
type LargeLanguageModelTextualResponse struct {
Content string
}

View File

@@ -0,0 +1,12 @@
package llm
import (
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// LargeLanguageModelProvider defines the structure of large language model provider
type LargeLanguageModelProvider interface {
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the large language model provider by receipt image recognition model
GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error)
}

View File

@@ -0,0 +1,45 @@
package llm
import (
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// LargeLanguageModelProviderContainer contains the current large language model provider
type LargeLanguageModelProviderContainer struct {
current LargeLanguageModelProvider
}
// Initialize a large language model provider container singleton instance
var (
Container = &LargeLanguageModelProviderContainer{}
)
// InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config
func InitializeLargeLanguageModelProvider(config *settings.Config) error {
if config.LLMProvider == settings.OpenAILLMProvider {
Container.current = NewOpenAILargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OpenAICompatibleLLMProvider {
Container.current = NewOpenAICompatibleLargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OpenRouterLLMProvider {
Container.current = NewOpenRouterLargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OllamaLLMProvider {
Container.current = NewOllamaLargeLanguageModelProvider(config)
return nil
}
return errs.ErrInvalidLLMProvider
}
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model
func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
if Container.current == nil {
return nil, errs.ErrInvalidLLMProvider
}
return l.current.GetJsonResponseByReceiptImageRecognitionModel(c, uid, currentConfig, request)
}

View File

@@ -0,0 +1,153 @@
package llm
import (
"bytes"
"encoding/base64"
"encoding/json"
"net/http"
"strings"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
const ollamaChatCompletionsPath = "api/chat"
// OllamaLargeLanguageModelProvider defines the structure of Ollama large language model provider
type OllamaLargeLanguageModelProvider struct {
CommonHttpLargeLanguageModelProvider
OllamaServerURL string
ReceiptImageRecognitionModelID string
}
// BuildTextualRequest returns the http request by Ollama provider
func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
if err != nil {
return nil, err
}
httpRequest, err := http.NewRequest("POST", p.getOllamaRequestUrl(), bytes.NewReader(requestBody))
if err != nil {
return nil, err
}
httpRequest.Header.Set("Content-Type", "application/json")
return httpRequest, nil
}
// ParseTextualResponse returns the textual response by Ollama provider
func (p *OllamaLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
responseBody := make(map[string]any)
err := json.Unmarshal(body, &responseBody)
if err != nil {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
message, ok := responseBody["message"].(map[string]any)
if !ok {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no message found in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
content, ok := message["content"].(string)
if !ok {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```json")
content = strings.TrimSuffix(content, "```")
} else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```")
content = strings.TrimSuffix(content, "```")
}
}
textualResponse := &LargeLanguageModelTextualResponse{
Content: content,
}
return textualResponse, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of Ollama provider
func (p *OllamaLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
requestMessages := make([]any, 0)
if request.SystemPrompt != "" {
requestMessages = append(requestMessages, map[string]string{
"role": "system",
"content": request.SystemPrompt,
})
}
if len(request.UserPrompt) > 0 {
imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt)
if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
requestMessages = append(requestMessages, map[string]any{
"role": "user",
"content": "",
"images": []string{imageBase64Data},
})
} else {
requestMessages = append(requestMessages, map[string]string{
"role": "user",
"content": string(request.UserPrompt),
})
}
}
requestBody := make(map[string]any)
requestBody["model"] = modelId
requestBody["stream"] = request.Stream
requestBody["messages"] = requestMessages
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
requestBody["format"] = "json"
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
log.Errorf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
log.Debugf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
return requestBodyBytes, nil
}
func (p *OllamaLargeLanguageModelProvider) getOllamaRequestUrl() string {
url := p.OllamaServerURL
if url[len(url)-1] != '/' {
url += "/"
}
url += ollamaChatCompletionsPath
return url
}
// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance
func NewOllamaLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{
OllamaServerURL: config.OllamaServerURL,
ReceiptImageRecognitionModelID: config.OllamaReceiptImageRecognitionModelID,
})
}

View File

@@ -0,0 +1,138 @@
package llm
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/mayswind/ezbookkeeping/pkg/core"
)
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
request := &LargeLanguageModelRequest{
SystemPrompt: "You are a helpful assistant.",
UserPrompt: []byte("Hello, how are you?"),
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
}
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
request := &LargeLanguageModelRequest{
SystemPrompt: "What's in this image?",
UserPrompt: []byte("fakedata"),
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":\"\",\"images\":[\"ZmFrZWRhdGE=\"],\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant",
"content": "This is a test response"
}
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "This is a test response", result.Content)
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant",
"content": ""
}
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "", result.Content)
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {}
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant"
}
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := "error"
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_GetOllamaRequestUrl(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://localhost:11434/",
}
url := provider.getOllamaRequestUrl()
assert.Equal(t, "http://localhost:11434/api/chat", url)
provider = &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://localhost:11434",
}
url = provider.getOllamaRequestUrl()
assert.Equal(t, "http://localhost:11434/api/chat", url)
provider = &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://example.com/ollama/",
}
url = provider.getOllamaRequestUrl()
assert.Equal(t, "http://example.com/ollama/api/chat", url)
}

View File

@@ -0,0 +1,187 @@
package llm
import (
"bytes"
"encoding/base64"
"encoding/json"
"io"
"net/http"
"strings"
"github.com/invopop/jsonschema"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
)
// OpenAIChatCompletionsLargeLanguageModelProvider defines the structure of OpenAI chat completions compatible large language model provider
type OpenAIChatCompletionsLargeLanguageModelProvider interface {
// BuildChatCompletionsHttpRequest returns the chat completions http request
BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error)
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
GetReceiptImageRecognitionModelID() string
}
// OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api
type OpenAICommonChatCompletionsHttpLargeLanguageModelProvider struct {
CommonHttpLargeLanguageModelProvider
provider OpenAIChatCompletionsLargeLanguageModelProvider
}
// BuildTextualRequest returns the http request by OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
if err != nil {
return nil, err
}
httpRequest, err := p.provider.BuildChatCompletionsHttpRequest(c, uid)
if err != nil {
return nil, err
}
httpRequest.Body = io.NopCloser(bytes.NewReader(requestBody))
httpRequest.Header.Set("Content-Type", "application/json")
return httpRequest, nil
}
// ParseTextualResponse returns the textual response by OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
responseBody := make(map[string]any)
err := json.Unmarshal(body, &responseBody)
if err != nil {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
choices, ok := responseBody["choices"].([]any)
if !ok || len(choices) < 1 {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no choices found in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
firstChoice, ok := choices[0].(map[string]any)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] invalid choice format in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
message, ok := firstChoice["message"].(map[string]any)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no message found in choice for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
content, ok := message["content"].(string)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```json")
content = strings.TrimSuffix(content, "```")
} else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```")
content = strings.TrimSuffix(content, "```")
}
}
textualResponse := &LargeLanguageModelTextualResponse{
Content: content,
}
return textualResponse, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.provider.GetReceiptImageRecognitionModelID()
}
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
requestMessages := make([]any, 0)
if request.SystemPrompt != "" {
requestMessages = append(requestMessages, map[string]string{
"role": "system",
"content": request.SystemPrompt,
})
}
if len(request.UserPrompt) > 0 {
if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
imageBase64Data := "data:image/png;base64," + base64.StdEncoding.EncodeToString(request.UserPrompt)
requestMessages = append(requestMessages, map[string]any{
"role": "user",
"content": []any{
core.O{
"type": "image_url",
"image_url": core.O{
"url": imageBase64Data,
},
},
},
})
} else {
requestMessages = append(requestMessages, map[string]string{
"role": "user",
"content": string(request.UserPrompt),
})
}
}
requestBody := make(map[string]any)
requestBody["model"] = modelId
requestBody["stream"] = request.Stream
requestBody["messages"] = requestMessages
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if request.ResponseJsonObjectType != nil {
schemeGenerator := jsonschema.Reflector{
Anonymous: true,
DoNotReference: true,
ExpandedStruct: true,
}
schema := schemeGenerator.ReflectFromType(request.ResponseJsonObjectType)
schema.Version = ""
requestBody["response_format"] = core.O{
"type": "json_schema",
"json_schema": schema,
}
} else {
requestBody["response_format"] = core.O{
"type": "json_object",
}
}
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
log.Debugf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
return requestBodyBytes, nil
}
func newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(provider OpenAIChatCompletionsLargeLanguageModelProvider) LargeLanguageModelProvider {
return newCommonHttpLargeLanguageModelProvider(&OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: provider,
})
}

View File

@@ -0,0 +1,157 @@
package llm
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/mayswind/ezbookkeeping/pkg/core"
)
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
request := &LargeLanguageModelRequest{
SystemPrompt: "You are a helpful assistant.",
UserPrompt: []byte("Hello, how are you?"),
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
request := &LargeLanguageModelRequest{
SystemPrompt: "What's in this image?",
UserPrompt: []byte("fakedata"),
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":[{\"image_url\":{\"url\":\"\"},\"type\":\"image_url\"}],\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"created": 1234567890,
"model": "test",
"usage": {
"prompt_tokens": 13,
"completion_tokens": 7,
"total_tokens": 20
},
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": "This is a test response"
}
}
]
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "This is a test response", result.Content)
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": ""
}
}
]
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "", result.Content)
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"choices": []
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "chatcmpl-123",
"object": "chat.completion",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant"
}
}
]
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := "error"
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}

View File

@@ -0,0 +1,58 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
const openAICompatibleChatCompletionsPath = "chat/completions"
// OpenAICompatibleLargeLanguageModelProvider defines the structure of OpenAI compatible large language model provider
type OpenAICompatibleLargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenAICompatibleBaseURL string
OpenAICompatibleAPIKey string
ReceiptImageRecognitionModelID string
}
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider
func (p *OpenAICompatibleLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", p.getFinalChatCompletionsRequestUrl(), nil)
if err != nil {
return nil, err
}
if p.OpenAICompatibleAPIKey != "" {
req.Header.Set("Authorization", "Bearer "+p.OpenAICompatibleAPIKey)
}
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI compatible provider
func (p *OpenAICompatibleLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string {
url := p.OpenAICompatibleBaseURL
if url[len(url)-1] != '/' {
url += "/"
}
url += openAICompatibleChatCompletionsPath
return url
}
// NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance
func NewOpenAICompatibleLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: config.OpenAICompatibleBaseURL,
OpenAICompatibleAPIKey: config.OpenAICompatibleAPIKey,
ReceiptImageRecognitionModelID: config.OpenAICompatibleReceiptImageRecognitionModelID,
})
}

View File

@@ -0,0 +1,27 @@
package llm
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestOpenAICompatibleLargeLanguageModelProvider_GetFinalRequestUrl(t *testing.T) {
provider := &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://api.example.com/v1/",
}
url := provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
provider = &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://api.example.com/v1",
}
url = provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
provider = &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://example.com/api",
}
url = provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://example.com/api/chat/completions", url)
}

View File

@@ -0,0 +1,43 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// OpenAILargeLanguageModelProvider defines the structure of OpenAI large language model provider
type OpenAILargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenAIAPIKey string
ReceiptImageRecognitionModelID string
}
const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions"
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI provider
func (p *OpenAILargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", openAIChatCompletionsUrl, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+p.OpenAIAPIKey)
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI provider
func (p *OpenAILargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
// NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance
func NewOpenAILargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{
OpenAIAPIKey: config.OpenAIAPIKey,
ReceiptImageRecognitionModelID: config.OpenAIReceiptImageRecognitionModelID,
})
}

View File

@@ -0,0 +1,45 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// OpenRouterLargeLanguageModelProvider defines the structure of OpenRouter large language model provider
type OpenRouterLargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenRouterAPIKey string
ReceiptImageRecognitionModelID string
}
const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions"
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenRouter provider
func (p *OpenRouterLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", openRouterChatCompletionsUrl, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+p.OpenRouterAPIKey)
req.Header.Set("HTTP-Referer", "https://ezbookkeeping.mayswind.net/")
req.Header.Set("X-Title", "ezBookkeeping")
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenRouter provider
func (p *OpenRouterLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
// NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance
func NewOpenRouterLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{
OpenRouterAPIKey: config.OpenRouterAPIKey,
ReceiptImageRecognitionModelID: config.OpenRouterReceiptImageRecognitionModelID,
})
}

View File

@@ -0,0 +1,27 @@
package models
// RecognizedReceiptImageResponse represents a view-object of recognized receipt image response
type RecognizedReceiptImageResponse struct {
Type TransactionType `json:"type"`
Time int64 `json:"time,omitempty"`
CategoryId int64 `json:"categoryId,string,omitempty"`
SourceAccountId int64 `json:"sourceAccountId,string,omitempty"`
DestinationAccountId int64 `json:"destinationAccountId,string,omitempty"`
SourceAmount int64 `json:"sourceAmount,omitempty"`
DestinationAmount int64 `json:"destinationAmount,omitempty"`
TagIds []string `json:"tagIds,omitempty"`
Comment string `json:"comment,omitempty"`
}
// RecognizedReceiptImageResult represents the result of recognized receipt image
type RecognizedReceiptImageResult struct {
Type string `json:"type,omitempty" jsonschema:"enum=income,enum=expense,enum=transfer" jsonschema_description:"Transaction type (income, expense, transfer)"`
Time string `json:"time" jsonschema:"format=date-time" jsonschema_description:"Transaction time in long date time format (YYYY-MM-DD HH:mm:ss, e.g. 2023-01-01 12:00:00)"`
Amount string `json:"amount,omitempty" jsonschema_description:"Transaction amount"`
AccountName string `json:"account,omitempty" jsonschema_description:"Account name for the transaction"`
CategoryName string `json:"category,omitempty" jsonschema_description:"Category name for the transaction"`
TagNames []string `json:"tags,omitempty" jsonschema_description:"List of tags associated with the transaction (maximum 10 tags allowed)"`
Description string `json:"description,omitempty" jsonschema_description:"Transaction description"`
DestinationAmount string `json:"destination_amount,omitempty" jsonschema_description:"Destination amount for transfer transactions"`
DestinationAccountName string `json:"destination_account,omitempty" jsonschema_description:"Destination account name for transfer transactions"`
}

View File

@@ -66,6 +66,13 @@ const (
WebDAVStorageType string = "webdav"
)
const (
OpenAILLMProvider string = "openai"
OpenAICompatibleLLMProvider string = "openai_compatible"
OpenRouterLLMProvider string = "openrouter"
OllamaLLMProvider string = "ollama"
)
// Uuid generator types
const (
InternalUuidGeneratorType string = "internal"
@@ -140,6 +147,9 @@ const (
defaultWebDAVRequestTimeout uint32 = 10000 // 10 seconds
defaultAIRecognitionPictureMaxSize uint32 = 10485760 // 10MB
defaultLargeLanguageModelAPIRequestTimeout uint32 = 60000 // 60 seconds
defaultInMemoryDuplicateCheckerCleanupInterval uint32 = 60 // 1 minutes
defaultDuplicateSubmissionsInterval uint32 = 300 // 5 minutes
@@ -281,6 +291,23 @@ type Config struct {
MinIOConfig *MinIOConfig
WebDAVConfig *WebDAVConfig
// Large Language Model
LLMProvider string
OpenAIAPIKey string
OpenAIReceiptImageRecognitionModelID string
OpenAICompatibleBaseURL string
OpenAICompatibleAPIKey string
OpenAICompatibleReceiptImageRecognitionModelID string
OpenRouterAPIKey string
OpenRouterReceiptImageRecognitionModelID string
OllamaServerURL string
OllamaReceiptImageRecognitionModelID string
TransactionFromAIImageRecognition bool
MaxAIRecognitionPictureFileSize uint32
LargeLanguageModelAPIRequestTimeout uint32
LargeLanguageModelAPIProxy string
LargeLanguageModelAPISkipTLSVerify bool
// Uuid
UuidGeneratorType string
UuidServerId uint8
@@ -426,6 +453,12 @@ func LoadConfiguration(configFilePath string) (*Config, error) {
return nil, err
}
err = loadLLMConfiguration(config, cfgFile, "llm")
if err != nil {
return nil, err
}
err = loadUuidConfiguration(config, cfgFile, "uuid")
if err != nil {
@@ -751,6 +784,46 @@ func loadStorageConfiguration(config *Config, configFile *ini.File, sectionName
return nil
}
func loadLLMConfiguration(config *Config, configFile *ini.File, sectionName string) error {
llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider")
if llmProvider == "" {
config.LLMProvider = ""
} else if llmProvider == OpenAILLMProvider {
config.LLMProvider = OpenAILLMProvider
} else if llmProvider == OpenAICompatibleLLMProvider {
config.LLMProvider = OpenAICompatibleLLMProvider
} else if llmProvider == OpenRouterLLMProvider {
config.LLMProvider = OpenRouterLLMProvider
} else if llmProvider == OllamaLLMProvider {
config.LLMProvider = OllamaLLMProvider
} else {
return errs.ErrInvalidLLMProvider
}
config.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key")
config.OpenAIReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_receipt_image_recognition_model_id")
config.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url")
config.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key")
config.OpenAICompatibleReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_receipt_image_recognition_model_id")
config.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key")
config.OpenRouterReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_receipt_image_recognition_model_id")
config.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url")
config.OllamaReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "ollama_receipt_image_recognition_model_id")
config.TransactionFromAIImageRecognition = getConfigItemBoolValue(configFile, sectionName, "transaction_from_ai_image_recognition", false)
config.MaxAIRecognitionPictureFileSize = getConfigItemUint32Value(configFile, sectionName, "max_ai_recognition_picture_size", defaultAIRecognitionPictureMaxSize)
config.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system")
config.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout)
config.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false)
return nil
}
func loadUuidConfiguration(config *Config, configFile *ini.File, sectionName string) error {
if getConfigItemStringValue(configFile, sectionName, "generator_type") == InternalUuidGeneratorType {
config.UuidGeneratorType = InternalUuidGeneratorType

View File

@@ -4,6 +4,7 @@ type KnownTemplate string
// Known templates
const (
TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email"
TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset"
TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email"
TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset"
SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION KnownTemplate = "prompt/receipt_image_recognition"
)

View File

@@ -0,0 +1,181 @@
<template>
<f7-sheet swipe-to-close swipe-handler=".swipe-handler" style="height:auto"
:opened="show" @sheet:open="onSheetOpen" @sheet:closed="onSheetClosed">
<f7-toolbar>
<div class="swipe-handler"></div>
<div class="left">
<f7-link :class="{ 'disabled': loading || recognizing }" :text="tt('Choose from Library')" @click="showOpenImage"></f7-link>
</div>
<div class="right">
<f7-link :class="{ 'disabled': loading || recognizing }" :text="tt('Take Photo')" @click="showCamera"></f7-link>
</div>
</f7-toolbar>
<f7-page-content class="margin-top no-padding-top">
<div class="padding-horizontal padding-bottom">
<div class="image-container display-flex justify-content-center width-100 margin-bottom" style="height: 240px">
<img height="240px" :src="imageSrc" v-if="imageSrc" />
<div class="image-container-background display-flex justify-content-center align-items-center" v-if="!imageSrc">
<span>{{ tt('Please select a receipt or transaction image first') }}</span>
</div>
</div>
<f7-button large fill color="primary"
:class="{ 'disabled': loading || recognizing || !imageFile }"
:text="tt('Recognize')"
@click="confirm">
</f7-button>
<div class="margin-top text-align-center">
<f7-link :class="{ 'disabled': loading || recognizing }" @click="cancel" :text="tt('Cancel')"></f7-link>
</div>
</div>
</f7-page-content>
<input ref="imageInput" type="file" style="display: none" :accept="SUPPORTED_IMAGE_EXTENSIONS" @change="openImage($event)" />
<input ref="cameraInput" type="file" style="display: none" :accept="SUPPORTED_IMAGE_EXTENSIONS" capture="environment" @change="openImage($event)" />
</f7-sheet>
</template>
<script setup lang="ts">
import { ref, useTemplateRef } from 'vue';
import { useI18n } from '@/locales/helpers.ts';
import { useI18nUIComponents, showLoading, hideLoading } from '@/lib/ui/mobile.ts';
import { useTransactionsStore } from '@/stores/transaction.ts';
import { KnownFileType } from '@/core/file.ts';
import { SUPPORTED_IMAGE_EXTENSIONS } from '@/consts/file.ts';
import type { RecognizedReceiptImageResponse } from '@/models/large_language_model.ts';
import { compressJpgImage } from '@/lib/ui/common.ts';
import logger from '@/lib/logger.ts';
defineProps<{
show: boolean;
}>();
const emit = defineEmits<{
(e: 'update:show', value: boolean): void;
(e: 'recognition:change', value: RecognizedReceiptImageResponse): void;
}>();
const { tt } = useI18n();
const { showToast } = useI18nUIComponents();
const transactionsStore = useTransactionsStore();
const imageInput = useTemplateRef<HTMLInputElement>('imageInput');
const cameraInput = useTemplateRef<HTMLInputElement>('cameraInput');
const loading = ref<boolean>(false);
const recognizing = ref<boolean>(false);
const imageFile = ref<File | null>(null);
const imageSrc = ref<string | undefined>(undefined);
function loadImage(file: File): void {
compressJpgImage(file, 1280, 1280, 0.8).then(blob => {
imageFile.value = KnownFileType.JPG.createFileFromBlob(blob, "image");
imageSrc.value = URL.createObjectURL(blob);
}).catch(error => {
imageFile.value = null;
imageSrc.value = undefined;
logger.error('failed to compress image', error);
showToast('Unable to load image');
});
}
function showOpenImage(): void {
if (loading.value || recognizing.value) {
return;
}
imageInput.value?.click();
}
function showCamera(): void {
if (loading.value || recognizing.value) {
return;
}
cameraInput.value?.click();
}
function openImage(event: Event): void {
if (!event || !event.target) {
return;
}
const el = event.target as HTMLInputElement;
if (!el.files || !el.files.length || !el.files[0]) {
return;
}
const image = el.files[0] as File;
el.value = '';
loadImage(image);
}
function confirm(): void {
if (loading.value || recognizing.value || !imageFile.value) {
return;
}
recognizing.value = true;
showLoading(() => recognizing.value);
transactionsStore.recognizeReceiptImage({
imageFile: imageFile.value
}).then(response => {
recognizing.value = false;
hideLoading();
emit('update:show', false);
emit('recognition:change', response);
}).catch(error => {
recognizing.value = false;
hideLoading();
if (!error.processed) {
showToast(error.message || error);
}
});
}
function cancel(): void {
close();
}
function close(): void {
emit('update:show', false);
loading.value = false;
recognizing.value = false;
imageFile.value = null;
imageSrc.value = undefined;
}
function onSheetOpen(): void {
loading.value = false;
recognizing.value = false;
imageFile.value = null;
imageSrc.value = undefined;
}
function onSheetClosed(): void {
close();
}
</script>
<style>
.image-container {
border: 1px solid var(--f7-page-master-border-color);
}
.image-container-background {
width: 100%;
height: 100%;
background-color: var(--f7-page-bg-color);
}
</style>

View File

@@ -7,6 +7,7 @@ export const DEFAULT_API_TIMEOUT: number = 10000; // 10s
export const DEFAULT_UPLOAD_API_TIMEOUT: number = 30000; // 30s
export const DEFAULT_EXPORT_API_TIMEOUT: number = 180000; // 180s
export const DEFAULT_IMPORT_API_TIMEOUT: number = 1800000; // 1800s
export const DEFAULT_LLM_API_TIMEOUT: number = 600000; // 600s
export const GOOGLE_MAP_JAVASCRIPT_URL: string = 'https://maps.googleapis.com/maps/api/js';
export const BAIDU_MAP_JAVASCRIPT_URL: string = 'https://api.map.baidu.com/api?v=3.0';

View File

@@ -6,6 +6,7 @@ export class KnownFileType {
public static readonly TSV = new KnownFileType('tsv', 'text/tab-separated-values');
public static readonly MARKDOWN = new KnownFileType('md', 'text/markdown');
public static readonly JS = new KnownFileType('js', 'application/javascript');
public static readonly JPG = new KnownFileType('jpg', 'image/jpeg');
public readonly extension: string;
public readonly contentType: string;
@@ -37,6 +38,12 @@ export class KnownFileType {
});
}
public createFileFromBlob(blob: Blob, fileName: string): File {
return new File([blob], this.formatFileName(fileName), {
type: this.contentType,
});
}
public static parse(extension: string): KnownFileType | undefined {
return KnownFileType.allInstancesByExtension[extension];
}

View File

@@ -35,6 +35,10 @@ export function isMCPServerEnabled(): boolean {
return getServerSetting('mcp') === 1;
}
export function isTransactionFromAIImageRecognitionEnabled(): boolean {
return getServerSetting('llmt') === 1;
}
export function getLoginPageTips(): Record<string, string>{
return getServerSetting('lpt') as Record<string, string>;
}

View File

@@ -21,6 +21,7 @@ import {
DEFAULT_UPLOAD_API_TIMEOUT,
DEFAULT_EXPORT_API_TIMEOUT,
DEFAULT_IMPORT_API_TIMEOUT,
DEFAULT_LLM_API_TIMEOUT,
GOOGLE_MAP_JAVASCRIPT_URL,
BAIDU_MAP_JAVASCRIPT_URL,
AMAP_JAVASCRIPT_URL
@@ -134,6 +135,9 @@ import type {
import type {
UserApplicationCloudSettingsUpdateRequest
} from '@/models/user_app_cloud_setting.ts';
import type {
RecognizedReceiptImageResponse
} from '@/models/large_language_model.ts';
import {
getCurrentToken,
@@ -635,6 +639,13 @@ export default {
deleteTransactionTemplate: (req: TransactionTemplateDeleteRequest): ApiResponsePromise<boolean> => {
return axios.post<ApiResponse<boolean>>('v1/transaction/templates/delete.json', req);
},
recognizeReceiptImage: ({ imageFile }: { imageFile: File }): ApiResponsePromise<RecognizedReceiptImageResponse> => {
return axios.postForm<ApiResponse<RecognizedReceiptImageResponse>>('v1/llm/transactions/recognize_receipt_image.json', {
image: imageFile
}, {
timeout: DEFAULT_LLM_API_TIMEOUT
});
},
getLatestExchangeRates: (param: { ignoreError?: boolean }): ApiResponsePromise<LatestExchangeRateResponse> => {
return axios.get<ApiResponse<LatestExchangeRateResponse>>('v1/exchange_rates/latest.json', {
ignoreError: !!param.ignoreError,

View File

@@ -3,6 +3,7 @@ import Clipboard from 'clipboard';
import { ThemeType } from '@/core/theme.ts';
import { type AmountColor, PresetAmountColor } from '@/core/color.ts';
import { KnownFileType } from '@/core/file.ts';
import logger from '../logger.ts';
@@ -134,6 +135,64 @@ export function startDownloadFile(fileName: string, fileData: Blob): void {
dataLink.click();
}
export function compressJpgImage(file: File, maxWidth: number, maxHeight: number, quality: number): Promise<Blob> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (event) => {
const img = new Image();
img.onload = () => {
let width = img.width;
let height = img.height;
if (width > maxWidth || height > maxHeight) {
const scale = Math.min(maxWidth / width, maxHeight / height);
width = Math.floor(width * scale);
height = Math.floor(height * scale);
}
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
if (!ctx) {
reject(new Error('failed to get canvas context'));
return;
}
canvas.width = width;
canvas.height = height;
ctx.drawImage(img, 0, 0, width, height);
canvas.toBlob((blob) => {
if (blob) {
resolve(blob);
} else {
reject(new Error('failed to compress image'));
}
}, KnownFileType.JPG.contentType, quality);
};
img.onerror = (error) => {
reject(error);
};
if (event.target && event.target.result) {
img.src = event.target.result as string;
} else {
reject(new Error('failed to read file'));
}
};
reader.onerror = (error) => {
reject(error);
};
reader.readAsDataURL(file);
});
}
export function clearBrowserCaches(): Promise<void> {
if (!window.caches) {
logger.error('caches API is not supported in this browser');

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Abfrageelemente dürfen nicht leer sein",
"query items too much": "Zu viele Abfrageelemente",
"query items have invalid item": "Ungültiges Element in Abfrageelementen",
@@ -1389,6 +1393,7 @@
"Refresh": "Aktualisieren",
"Clear": "Löschen",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "Keine",
"Unspecified": "Nicht angegeben",
"Not set": "Nicht festgelegt",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Kategorie",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "There are no query items",
"query items too much": "There are too many query items",
"query items have invalid item": "There is invalid item in query items",
@@ -1389,6 +1393,7 @@
"Refresh": "Refresh",
"Clear": "Clear",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "None",
"Unspecified": "Unspecified",
"Not set": "Not set",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Category",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "--",
"query items too much": "--",
"query items have invalid item": "Hay un elemento no válido en los elementos de consulta",
@@ -1389,6 +1393,7 @@
"Refresh": "Refrescar",
"Clear": "Claro",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "Ninguno",
"Unspecified": "No especificado",
"Not set": "No establecido",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Categoría",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Non ci sono elementi di query",
"query items too much": "Ci sono troppi elementi di query",
"query items have invalid item": "C'è un elemento non valido negli elementi di query",
@@ -1389,6 +1393,7 @@
"Refresh": "Aggiorna",
"Clear": "Pulisci",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "Nessuno",
"Unspecified": "Non specificato",
"Not set": "Non impostato",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplica (con ora)",
"Duplicate (With Geographic Location)": "Duplica (con posizione geografica)",
"Duplicate (With Time and Geographic Location)": "Duplica (con ora e posizione geografica)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Categoria",
"Secondary Category": "Categoria secondaria",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "クエリ項目がありません",
"query items too much": "クエリ項目が多すぎます",
"query items have invalid item": "クエリ項目に無効な項目があります",
@@ -1389,6 +1393,7 @@
"Refresh": "リフレッシュ",
"Clear": "消去",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "なし",
"Unspecified": "不特定",
"Not set": "セットしていない",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "複製(時間含む)",
"Duplicate (With Geographic Location)": "複製(地理座標を含む)",
"Duplicate (With Time and Geographic Location)": "複製(時間と地理座標を含む)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "カテゴリ",
"Secondary Category": "二次カテゴリ",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Wisselkoersgegevens voor basisvaluta kunnen niet worden bijgewerkt",
"cannot delete exchange rate data for base currency": "Wisselkoersgegevens voor basisvaluta kunnen niet worden verwijderd",
"mcp server is not enabled": "MCP-server is niet ingeschakeld",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Geen zoekitems opgegeven",
"query items too much": "Te veel zoekitems",
"query items have invalid item": "Ongeldig item in zoekitems",
@@ -1389,6 +1393,7 @@
"Refresh": "Vernieuwen",
"Clear": "Wissen",
"Generate": "Genereren",
"Recognize": "Recognize",
"None": "Geen",
"Unspecified": "Niet gespecificeerd",
"Not set": "Niet ingesteld",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Dupliceren (met tijd)",
"Duplicate (With Geographic Location)": "Dupliceren (met geografische locatie)",
"Duplicate (With Time and Geographic Location)": "Dupliceren (met tijd en locatie)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Categorie",
"Secondary Category": "Secundaire categorie",
"Expense Category": "Uitgavecategorie",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Não é possível atualizar dados de taxa de câmbio para a moeda base",
"cannot delete exchange rate data for base currency": "Não é possível excluir dados de taxa de câmbio para a moeda base",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Não há itens de consulta",
"query items too much": "Há muitos itens de consulta",
"query items have invalid item": "Há item inválido nos itens de consulta",
@@ -1389,6 +1393,7 @@
"Refresh": "Atualizar",
"Clear": "Limpar",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "Nenhum",
"Unspecified": "Não especificado",
"Not set": "Não definido",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicar (Com Tempo)",
"Duplicate (With Geographic Location)": "Duplicar (Com Localização Geográfica)",
"Duplicate (With Time and Geographic Location)": "Duplicar (Com Tempo e Localização Geográfica)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Categoria",
"Secondary Category": "Categoria Secundária",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Нет элементов запроса",
"query items too much": "Слишком много элементов запроса",
"query items have invalid item": "В элементах запроса присутствует недопустимый элемент",
@@ -1389,6 +1393,7 @@
"Refresh": "Обновить",
"Clear": "Очистить",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "Нет",
"Unspecified": "Не указано",
"Not set": "Не установлено",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Категория",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Елементи запиту не можуть бути порожніми",
"query items too much": "Занадто багато елементів запиту",
"query items have invalid item": "Запит містить недійсний елемент",
@@ -1389,6 +1393,7 @@
"Refresh": "Оновити",
"Clear": "Очистити",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "Немає",
"Unspecified": "Не вказано",
"Not set": "Не встановлено",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Дублювати (з часом)",
"Duplicate (With Geographic Location)": "Дублювати (з геолокацією)",
"Duplicate (With Time and Geographic Location)": "Дублювати (з часом і геолокацією)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Категорія",
"Secondary Category": "Вторинна категорія",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
"llm provider is not enabled": "Large Language Model provider is not enabled",
"no image for AI recognition": "There is no image for AI recognition",
"image for AI recognition is empty": "Image for AI recognition file is empty",
"exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Không có mục truy vấn",
"query items too much": "Có quá nhiều mục truy vấn",
"query items have invalid item": "Có mục không hợp lệ trong các mục truy vấn",
@@ -1389,6 +1393,7 @@
"Refresh": "Làm mới",
"Clear": "Xóa",
"Generate": "Generate",
"Recognize": "Recognize",
"None": "Không có",
"Unspecified": "Không xác định",
"Not set": "Not set",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
"AI Image Recognition": "AI Image Recognition",
"Choose from Library": "Choose from Library",
"Take Photo": "Take Photo",
"Unable to load image": "Unable to load image",
"Unable to recognize image": "Unable to recognize image",
"Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
"Release to load image": "Release to load image",
"Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Danh mục",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "不能更新默认货币的汇率数据",
"cannot delete exchange rate data for base currency": "不能删除默认货币的汇率数据",
"mcp server is not enabled": "MCP 服务器没有启用",
"llm provider is not enabled": "大语言模型服务提供者没有启用",
"no image for AI recognition": "没有用于AI识别的图片",
"image for AI recognition is empty": "用于AI识别的图片为空",
"exceed the maximum size of image file for AI recognition": "用于AI识别的图片超出了允许的最大文件大小",
"query items cannot be blank": "请求项目不能为空",
"query items too much": "请求项目过多",
"query items have invalid item": "请求项目中有非法项目",
@@ -1389,6 +1393,7 @@
"Refresh": "刷新",
"Clear": "清除",
"Generate": "生成",
"Recognize": "识别",
"None": "无",
"Unspecified": "未指定",
"Not set": "未设置",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "复制 (含时间)",
"Duplicate (With Geographic Location)": "复制 (含地理位置)",
"Duplicate (With Time and Geographic Location)": "复制 (含时间和地理位置)",
"AI Image Recognition": "AI识图",
"Choose from Library": "从图库选择",
"Take Photo": "拍照",
"Unable to load image": "无法加载图片",
"Unable to recognize image": "无法识别图片",
"Drag and drop a receipt or transaction image here, or click to select one": "拖拽收据或交易图片到此处,或点击选择图片",
"Release to load image": "释放以加载图片",
"Please select a receipt or transaction image first": "请先选择收据或交易图片",
"Category": "分类",
"Secondary Category": "二级分类",
"Expense Category": "支出分类",

View File

@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "不能更新基準貨幣的匯率資料",
"cannot delete exchange rate data for base currency": "不能刪除基準貨幣的匯率資料",
"mcp server is not enabled": "MCP 伺服器未啟用",
"llm provider is not enabled": "大型語言模型服務提供者未啟用",
"no image for AI recognition": "沒有用於AI識別的圖片檔案",
"image for AI recognition is empty": "用於AI識別的圖片檔案為空",
"exceed the maximum size of image file for AI recognition": "用於AI識別的圖片超出了允許的最大檔案大小",
"query items cannot be blank": "查詢項目不能為空",
"query items too much": "查詢項目過多",
"query items have invalid item": "查詢項目中有非法項目",
@@ -1389,6 +1393,7 @@
"Refresh": "重新載入",
"Clear": "清除",
"Generate": "產生",
"Recognize": "識別",
"None": "無",
"Unspecified": "未指定",
"Not set": "未設置",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "複製 (含時間)",
"Duplicate (With Geographic Location)": "複製 (含地理位置)",
"Duplicate (With Time and Geographic Location)": "複製 (含時間和地理位置)",
"AI Image Recognition": "AI識圖",
"Choose from Library": "從相簿選擇",
"Take Photo": "拍照",
"Unable to load image": "無法載入圖片",
"Unable to recognize image": "無法識別圖片",
"Drag and drop a receipt or transaction image here, or click to select one": "將收據或交易圖片拖放到此處,或點擊以選擇圖片",
"Release to load image": "放開以載入圖片",
"Please select a receipt or transaction image first": "請先選擇收據或交易圖片",
"Category": "分類",
"Secondary Category": "次分類",
"Expense Category": "支出分類",

View File

@@ -79,6 +79,7 @@ import MapSheet from '@/components/mobile/MapSheet.vue';
import TransactionTagSelectionSheet from '@/components/mobile/TransactionTagSelectionSheet.vue';
import ScheduleFrequencySheet from '@/components/mobile/ScheduleFrequencySheet.vue';
import AccountBalanceTrendsBarChart from '@/components/mobile/AccountBalanceTrendsBarChart.vue';
import AIImageRecognitionSheet from '@/components/mobile/AIImageRecognitionSheet.vue';
import TextareaAutoSize from '@/directives/mobile/textareaAutoSize.ts';
@@ -170,8 +171,9 @@ app.component('InformationSheet', InformationSheet);
app.component('NumberPadSheet', NumberPadSheet);
app.component('MapSheet', MapSheet);
app.component('TransactionTagSelectionSheet', TransactionTagSelectionSheet);
app.component('AccountBalanceTrendsBarChart', AccountBalanceTrendsBarChart);
app.component('ScheduleFrequencySheet', ScheduleFrequencySheet);
app.component('AccountBalanceTrendsBarChart', AccountBalanceTrendsBarChart);
app.component('AIImageRecognitionSheet', AIImageRecognitionSheet);
app.directive('TextareaAutoSize', TextareaAutoSize);

View File

@@ -0,0 +1,11 @@
export interface RecognizedReceiptImageResponse {
readonly type: number;
readonly time?: number;
readonly categoryId?: string;
readonly sourceAccountId?: string;
readonly destinationAccountId?: string;
readonly sourceAmount?: number;
readonly destinationAmount?: number;
readonly tagIds?: string[];
readonly comment?: string;
}

View File

@@ -33,6 +33,9 @@ import {
import {
type ExportTransactionDataRequest
} from '@/models/data_management.ts';
import type {
RecognizedReceiptImageResponse
} from '@/models/large_language_model.ts';
import {
getUserTransactionDraft,
@@ -1157,6 +1160,31 @@ export const useTransactionsStore = defineStore('transactions', () => {
});
}
function recognizeReceiptImage({ imageFile }: { imageFile: File }): Promise<RecognizedReceiptImageResponse> {
return new Promise((resolve, reject) => {
services.recognizeReceiptImage({ imageFile }).then(response => {
const data = response.data;
if (!data || !data.success || !data.result) {
reject({ message: 'Unable to recognize image' });
return;
}
resolve(data.result);
}).catch(error => {
logger.error('failed to recognize image', error);
if (error.response && error.response.data && error.response.data.errorMessage) {
reject({ error: error.response.data });
} else if (!error.processed) {
reject({ message: 'Unable to recognize image' });
} else {
reject(error);
}
});
});
}
function parseImportDsvFile({ fileType, fileEncoding, importFile }: { fileType: string, fileEncoding?: string, importFile: File }): Promise<string[][]> {
return new Promise((resolve, reject) => {
services.parseImportDsvFile({ fileType, fileEncoding, importFile }).then(response => {
@@ -1370,6 +1398,7 @@ export const useTransactionsStore = defineStore('transactions', () => {
getTransaction,
saveTransaction,
deleteTransaction,
recognizeReceiptImage,
parseImportDsvFile,
parseImportTransaction,
importTransactions,

View File

@@ -63,11 +63,16 @@
<v-btn class="ms-3" color="default" variant="outlined"
:disabled="loading || !canAddTransaction" @click="add()">
{{ tt('Add') }}
<v-menu activator="parent" :open-on-hover="true" v-if="allTransactionTemplates && allTransactionTemplates.length">
<v-menu activator="parent" :open-on-hover="true" v-if="isTransactionFromAIImageRecognitionEnabled() || (allTransactionTemplates && allTransactionTemplates.length)">
<v-list>
<v-list-item :title="template.name"
<v-list-item key="AIImageRecognition"
:title="tt('AI Image Recognition')"
:prepend-icon="mdiMagicStaff"
v-if="isTransactionFromAIImageRecognitionEnabled()"
@click="addByRecognizingImage"></v-list-item>
<v-list-item :key="template.id"
:title="template.name"
:prepend-icon="mdiTextBoxOutline"
:key="template.id"
v-for="template in allTransactionTemplates"
@click="add(template)"></v-list-item>
</v-list>
@@ -620,6 +625,7 @@
@error="onShowDateRangeError" />
<edit-dialog ref="editDialog" :type="TransactionEditPageType.Transaction" />
<a-i-image-recognition-dialog ref="aiImageRecognitionDialog" />
<import-dialog ref="importDialog" :persistent="true" />
<v-dialog width="800" v-model="showFilterAccountDialog">
@@ -647,6 +653,7 @@ import PaginationButtons from '@/components/desktop/PaginationButtons.vue';
import ConfirmDialog from '@/components/desktop/ConfirmDialog.vue';
import SnackBar from '@/components/desktop/SnackBar.vue';
import EditDialog from './list/dialogs/EditDialog.vue';
import AIImageRecognitionDialog from './list/dialogs/AIImageRecognitionDialog.vue';
import ImportDialog from './import/ImportDialog.vue';
import AccountFilterSettingsCard from '@/views/desktop/common/cards/AccountFilterSettingsCard.vue';
import CategoryFilterSettingsCard from '@/views/desktop/common/cards/CategoryFilterSettingsCard.vue';
@@ -716,7 +723,7 @@ import {
categoryTypeToTransactionType,
transactionTypeToCategoryType
} from '@/lib/category.ts';
import { isDataExportingEnabled, isDataImportingEnabled } from '@/lib/server_settings.ts';
import { isDataExportingEnabled, isDataImportingEnabled, isTransactionFromAIImageRecognitionEnabled } from '@/lib/server_settings.ts';
import { startDownloadFile } from '@/lib/ui/common.ts';
import { scrollToSelectedItem } from '@/lib/ui/desktop.ts';
import logger from '@/lib/logger.ts';
@@ -738,6 +745,7 @@ import {
mdiMinusBoxMultipleOutline,
mdiCloseBoxMultipleOutline,
mdiPound,
mdiMagicStaff,
mdiTextBoxOutline
} from '@mdi/js';
@@ -760,6 +768,7 @@ const props = defineProps<TransactionListProps>();
type ConfirmDialogType = InstanceType<typeof ConfirmDialog>;
type SnackBarType = InstanceType<typeof SnackBar>;
type EditDialogType = InstanceType<typeof EditDialog>;
type AIImageRecognitionDialogType = InstanceType<typeof AIImageRecognitionDialog>;
type ImportDialogType = InstanceType<typeof ImportDialog>;
interface TransactionTemplateWithIcon {
@@ -859,6 +868,7 @@ const tagFilterMenu = useTemplateRef<VMenu>('tagFilterMenu');
const confirmDialog = useTemplateRef<ConfirmDialogType>('confirmDialog');
const snackbar = useTemplateRef<SnackBarType>('snackbar');
const editDialog = useTemplateRef<EditDialogType>('editDialog');
const aiImageRecognitionDialog = useTemplateRef<AIImageRecognitionDialogType>('aiImageRecognitionDialog');
const importDialog = useTemplateRef<ImportDialogType>('importDialog');
const activeTab = ref<string>('transactionPage');
@@ -1597,6 +1607,33 @@ function add(template?: TransactionTemplate): void {
});
}
function addByRecognizingImage(): void {
aiImageRecognitionDialog.value?.open().then(result => {
editDialog.value?.open({
time: result.time,
type: result.type,
categoryId: result.categoryId,
accountId: result.sourceAccountId,
destinationAccountId: result.destinationAccountId,
amount: result.sourceAmount,
destinationAmount: result.destinationAmount,
tagIds: result.tagIds ? result.tagIds.join(',') : undefined,
comment: result.comment,
noTransactionDraft: true
}).then(result => {
if (result && result.message) {
snackbar.value?.showMessage(result.message);
}
reload(false, false);
}).catch(error => {
if (error) {
snackbar.value?.showError(error);
}
});
});
}
function importTransaction(): void {
importDialog.value?.open().then(() => {
reload(false, false);

View File

@@ -0,0 +1,208 @@
<template>
<v-dialog width="800" :persistent="loading || recognizing || !!imageFile" v-model="showState">
<v-card class="pa-2 pa-sm-4 pa-md-4">
<template #title>
<div class="d-flex align-center justify-center">
<h4 class="text-h4">{{ tt('AI Image Recognition') }}</h4>
</div>
</template>
<v-card-text class="d-flex justify-center w-100 my-md-4 pt-0">
<div class="w-100 border position-relative"
@dragenter.prevent="onDragEnter"
@dragover.prevent
@dragleave.prevent="onDragLeave"
@drop.prevent="onDrop">
<div class="d-flex w-100 fill-height justify-center align-center justify-content-center"
:class="{ 'dropzone': true, 'dropzone-dragover': isDragOver }" style="height: 480px">
<h3 v-if="!imageFile && !isDragOver">{{ tt('Drag and drop a receipt or transaction image here, or click to select one') }}</h3>
<h3 v-if="isDragOver">{{ tt('Release to load image') }}</h3>
</div>
<v-img height="480px" :class="{ 'cursor-pointer': !loading || !recognizing || !isDragOver }"
:src="imageSrc" @click="showOpenImageDialog">
<template #placeholder>
<div class="w-100 fill-height bg-grey-200"></div>
</template>
</v-img>
</div>
</v-card-text>
<v-card-text class="overflow-y-visible">
<div ref="buttonContainer" class="w-100 d-flex justify-center gap-4">
<v-btn :disabled="loading || recognizing || !imageFile" @click="recognize">
{{ tt('Recognize') }}
<v-progress-circular indeterminate size="22" class="ms-2" v-if="recognizing"></v-progress-circular>
</v-btn>
<v-btn color="secondary" variant="tonal" :disabled="loading || recognizing"
@click="cancel">{{ tt('Cancel') }}</v-btn>
</div>
</v-card-text>
</v-card>
</v-dialog>
<snack-bar ref="snackbar" />
<input ref="imageInput" type="file" style="display: none" :accept="SUPPORTED_IMAGE_EXTENSIONS" @change="openImage($event)" />
</template>
<script setup lang="ts">
import SnackBar from '@/components/desktop/SnackBar.vue';
import { ref, useTemplateRef } from 'vue';
import { useI18n } from '@/locales/helpers.ts';
import { useTransactionsStore } from '@/stores/transaction.ts';
import { KnownFileType } from '@/core/file.ts';
import { SUPPORTED_IMAGE_EXTENSIONS } from '@/consts/file.ts';
import type { RecognizedReceiptImageResponse } from '@/models/large_language_model.ts';
import { compressJpgImage } from '@/lib/ui/common.ts';
import logger from '@/lib/logger.ts';
type SnackBarType = InstanceType<typeof SnackBar>;
const { tt } = useI18n();
const transactionsStore = useTransactionsStore();
const snackbar = useTemplateRef<SnackBarType>('snackbar');
const imageInput = useTemplateRef<HTMLInputElement>('imageInput');
let resolveFunc: ((response: RecognizedReceiptImageResponse) => void) | null = null;
let rejectFunc: ((reason?: unknown) => void) | null = null;
const showState = ref<boolean>(false);
const loading = ref<boolean>(false);
const recognizing = ref<boolean>(false);
const imageFile = ref<File | null>(null);
const imageSrc = ref<string | undefined>(undefined);
const isDragOver = ref<boolean>(false);
function loadImage(file: File): void {
compressJpgImage(file, 1280, 1280, 0.8).then(blob => {
imageFile.value = KnownFileType.JPG.createFileFromBlob(blob, "image");
imageSrc.value = URL.createObjectURL(blob);
}).catch(error => {
imageFile.value = null;
imageSrc.value = undefined;
logger.error('failed to compress image', error);
snackbar.value?.showError('Unable to load image');
});
}
function open(): Promise<RecognizedReceiptImageResponse> {
showState.value = true;
loading.value = false;
recognizing.value = false;
imageFile.value = null;
imageSrc.value = undefined;
return new Promise((resolve, reject) => {
resolveFunc = resolve;
rejectFunc = reject;
});
}
function showOpenImageDialog(): void {
if (loading.value || recognizing.value || isDragOver.value) {
return;
}
imageInput.value?.click();
}
function openImage(event: Event): void {
if (!event || !event.target) {
return;
}
const el = event.target as HTMLInputElement;
if (!el.files || !el.files.length || !el.files[0]) {
return;
}
const image = el.files[0] as File;
el.value = '';
loadImage(image);
}
function recognize(): void {
if (loading.value || recognizing.value || !imageFile.value) {
return;
}
recognizing.value = true;
transactionsStore.recognizeReceiptImage({
imageFile: imageFile.value
}).then(response => {
resolveFunc?.(response);
showState.value = false;
recognizing.value = false;
}).catch(error => {
recognizing.value = false;
if (!error.processed) {
snackbar.value?.showError(error);
}
});
}
function cancel(): void {
rejectFunc?.();
showState.value = false;
loading.value = false;
recognizing.value = false;
imageFile.value = null;
imageSrc.value = undefined;
}
function onDragEnter(): void {
if (loading.value || recognizing.value) {
return;
}
isDragOver.value = true;
}
function onDragLeave(): void {
isDragOver.value = false;
}
function onDrop(event: DragEvent): void {
if (loading.value || recognizing.value) {
return;
}
isDragOver.value = false;
if (event.dataTransfer && event.dataTransfer.files && event.dataTransfer.files.length && event.dataTransfer.files[0]) {
loadImage(event.dataTransfer.files[0] as File);
}
}
defineExpose({
open
});
</script>
<style>
.dropzone {
position: absolute;
top: 0;
left: 0;
width: 100%;
pointer-events: none;
border-radius: 8px;
z-index: 10;
}
.dropzone-dragover {
border: 6px dashed rgba(var(--v-border-color),var(--v-border-opacity));
}
</style>

View File

@@ -188,7 +188,14 @@
<f7-popover class="template-popover-menu" target-el="#homepage-add-button"
v-model:opened="showTransactionTemplatePopover">
<f7-list dividers v-if="allTransactionTemplates">
<f7-list-item :title="template.name" :key="template.id"
<f7-list-item key="AIImageRecognition" :title="tt('AI Image Recognition')"
@click="showAIReceiptImageRecognitionSheet = true; showTransactionTemplatePopover = false"
v-if="isTransactionFromAIImageRecognitionEnabled()">
<template #media>
<f7-icon f7="wand_stars"></f7-icon>
</template>
</f7-list-item>
<f7-list-item :key="template.id" :title="template.name"
:link="'/transaction/add?templateId=' + template.id"
v-for="template in allTransactionTemplates">
<template #media>
@@ -197,11 +204,15 @@
</f7-list-item>
</f7-list>
</f7-popover>
<a-i-image-recognition-sheet v-model:show="showAIReceiptImageRecognitionSheet"
@recognition:change="onReceiptRecognitionChanged"/>
</f7-page>
</template>
<script setup lang="ts">
import { ref, computed } from 'vue';
import type { Router } from 'framework7/types';
import { useI18n } from '@/locales/helpers.ts';
import { useI18nUIComponents } from '@/lib/ui/mobile.ts';
@@ -215,8 +226,14 @@ import { useOverviewStore } from '@/stores/overview.ts';
import { DateRange } from '@/core/datetime.ts';
import { TemplateType } from '@/core/template.ts';
import { TransactionTemplate } from '@/models/transaction_template.ts';
import type { RecognizedReceiptImageResponse } from '@/models/large_language_model.ts';
import { isUserLogined, isUserUnlocked } from '@/lib/userstate.ts';
import { isTransactionFromAIImageRecognitionEnabled } from '@/lib/server_settings.ts';
const props = defineProps<{
f7router: Router.Router;
}>();
const { tt } = useI18n();
const { showToast } = useI18nUIComponents();
@@ -236,6 +253,7 @@ const overviewStore = useOverviewStore();
const loading = ref<boolean>(true);
const showTransactionTemplatePopover = ref<boolean>(false);
const showAIReceiptImageRecognitionSheet = ref<boolean>(false);
const allTransactionTemplates = computed<TransactionTemplate[]>(() => {
const allTemplates = transactionTemplatesStore.allVisibleTemplates;
@@ -243,7 +261,7 @@ const allTransactionTemplates = computed<TransactionTemplate[]>(() => {
});
function openTransactionTemplatePopover(): void {
if (allTransactionTemplates.value && allTransactionTemplates.value.length) {
if (isTransactionFromAIImageRecognitionEnabled() || (allTransactionTemplates.value && allTransactionTemplates.value.length)) {
showTransactionTemplatePopover.value = true;
}
}
@@ -291,6 +309,48 @@ function reload(done?: () => void): void {
});
}
function onReceiptRecognitionChanged(result: RecognizedReceiptImageResponse): void {
const params: string[] = [];
if (result.type) {
params.push(`type=${result.type}`);
}
if (result.time) {
params.push(`time=${result.time}`);
}
if (result.categoryId) {
params.push(`categoryId=${result.categoryId}`);
}
if (result.sourceAccountId) {
params.push(`accountId=${result.sourceAccountId}`);
}
if (result.destinationAccountId) {
params.push(`destinationAccountId=${result.destinationAccountId}`);
}
if (result.sourceAmount) {
params.push(`amount=${result.sourceAmount}`);
}
if (result.destinationAmount) {
params.push(`destinationAmount=${result.destinationAmount}`);
}
if (result.tagIds) {
params.push(`tagIds=${result.tagIds.join(',')}`);
}
if (result.comment) {
params.push(`comment=${encodeURIComponent(result.comment)}`);
}
props.f7router.navigate(`/transaction/add?${params.join('&')}`);
}
function onPageAfterIn(): void {
if (!loading.value) {
reload();

View File

@@ -0,0 +1,44 @@
## Role
You are a financial assistant.
Your task is to extract structured transaction data from images provided by the user (such as receipts, transaction records, or vouchers).
## Output
1. Format: JSON only
2. No explanations, comments, or extra text outside JSON
## JSON Schema (with field descriptions)
```
{
"type": "string (transaction type: expense | income | transfer)",
"time": "string (transaction time, format: YYYY-MM-DD HH:mm:ss)",
"amount": "string (transaction amount, numeric, up to 2 decimals)",
"account": "string (source account name)",
"category": "string (transaction category)",
"tags": ["string (tag name, max 10 allowed)"],
"description": "string (transaction description)",
"destination_amount": "string (destination amount, numeric, up to 2 decimals, only for transfer)",
"destination_account": "string (destination account name, only for transfer)"
}
```
## Important rules
1. Only include fields you can confidently identify.
2. If unsure about a value, omit the field (do not guess).
3. If the image contains multiple items, please combine them into a single transaction.
4. Always return valid JSON.
## Options
### Expense categories:
{{.AllExpenseCategoryNames}}
### Income categories:
{{.AllIncomeCategoryNames}}
### Transfer categories:
{{.AllTransferCategoryNames}}
### Account names:
{{.AllAccountNames}}
### Tags:
{{.AllTagNames}}