From 93c69a639ab802e73a62674273721ba67baae866 Mon Sep 17 00:00:00 2001
From: Oscar Shaw <ocetars@gmail.com>
Date: Sat, 29 Nov 2025 23:53:32 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E7=BE=A4=E8=81=8A?=
 =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=B8=8B=E7=9A=84=E4=B8=93=E7=94=A8=E5=9B=BE?=
 =?UTF-8?q?=E7=89=87=E8=BD=AC=E8=BF=B0=E6=A8=A1=E5=9E=8B=E9=85=8D=E7=BD=AE?=
 =?UTF-8?q?=20(#3822)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: add image caption provider configuration for group chat

- Introduced `image_caption_provider_id` to allow separate configuration for group chat image understanding.
- Updated metadata and hints in English and Chinese for clarity on new settings.
- Adjusted logic in long term memory to utilize the new provider ID for image captioning.

* fix: format

* Fix logic for image caption and active reply settings

* Fix indentation and formatting in long_term_memory.py

* chore: ruff format

---------

Co-authored-by: Soulter <37870767+Soulter@users.noreply.github.com>
Co-authored-by: Soulter <905617992@qq.com>
---
 astrbot/core/config/default.py                    | 15 ++++++++++++++-
 .../locales/en-US/features/config-metadata.json   |  6 +++++-
 .../locales/zh-CN/features/config-metadata.json   |  6 +++++-
 packages/astrbot/long_term_memory.py              | 15 ++++++---------
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
index 3af62d26..25ec2438 100644
--- a/astrbot/core/config/default.py
+++ b/astrbot/core/config/default.py
@@ -90,6 +90,7 @@ DEFAULT_CONFIG = {
         "group_icl_enable": False,
         "group_message_max_cnt": 300,
         "image_caption": False,
+        "image_caption_provider_id": "",
         "active_reply": {
             "enable": False,
             "method": "possibility_reply",
@@ -2109,6 +2110,9 @@ CONFIG_METADATA_2 = {
                     "image_caption": {
                         "type": "bool",
                     },
+                    "image_caption_provider_id": {
+                        "type": "string",
+                    },
                     "image_caption_prompt": {
                         "type": "string",
                     },
@@ -2785,7 +2789,16 @@ CONFIG_METADATA_3 = {
                     "provider_ltm_settings.image_caption": {
                         "description": "自动理解图片",
                         "type": "bool",
-                        "hint": "需要设置默认图片转述模型。",
+                        "hint": "需要设置群聊图片转述模型。",
+                    },
+                    "provider_ltm_settings.image_caption_provider_id": {
+                        "description": "群聊图片转述模型",
+                        "type": "string",
+                        "_special": "select_provider",
+                        "hint": "用于群聊上下文感知的图片理解，与默认图片转述模型分开配置。",
+                        "condition": {
+                            "provider_ltm_settings.image_caption": True,
+                        },
                     },
                     "provider_ltm_settings.active_reply.enable": {
                         "description": "主动回复",
diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
index afa0ad09..a4a72f61 100644
--- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json
+++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
@@ -379,7 +379,11 @@
         },
         "image_caption": {
           "description": "Auto-understand Images",
-          "hint": "Requires setting a default image caption model."
+          "hint": "Requires setting a group chat image caption model."
+        },
+        "image_caption_provider_id": {
+          "description": "Group Chat Image Caption Model",
+          "hint": "Used for image understanding in group chat context awareness, configured separately from the default image caption model."
         },
         "active_reply": {
           "enable": {
diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
index b7da8c34..0aee49df 100644
--- a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
+++ b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
@@ -379,7 +379,11 @@
         },
         "image_caption": {
           "description": "自动理解图片",
-          "hint": "需要设置默认图片转述模型。"
+          "hint": "需要设置群聊图片转述模型。"
+        },
+        "image_caption_provider_id": {
+          "description": "群聊图片转述模型",
+          "hint": "用于群聊上下文感知的图片理解，与默认图片转述模型分开配置。"
         },
         "active_reply": {
           "enable": {
diff --git a/packages/astrbot/long_term_memory.py b/packages/astrbot/long_term_memory.py
index 1e5ea4f1..e0a601be 100644
--- a/packages/astrbot/long_term_memory.py
+++ b/packages/astrbot/long_term_memory.py
@@ -30,16 +30,13 @@ class LongTermMemory:
         except BaseException as e:
             logger.error(e)
             max_cnt = 300
-        image_caption = (
-            True
-            if cfg["provider_settings"]["default_image_caption_provider_id"]
-            and cfg["provider_ltm_settings"]["image_caption"]
-            else False
-        )
         image_caption_prompt = cfg["provider_settings"]["image_caption_prompt"]
-        image_caption_provider_id = cfg["provider_settings"][
-            "default_image_caption_provider_id"
-        ]
+        image_caption_provider_id = cfg["provider_ltm_settings"].get(
+            "image_caption_provider_id"
+        )
+        image_caption = cfg["provider_ltm_settings"]["image_caption"] and bool(
+            image_caption_provider_id
+        )
         active_reply = cfg["provider_ltm_settings"]["active_reply"]
         enable_active_reply = active_reply.get("enable", False)
         ar_method = active_reply["method"]