feat: 新增群聊模式下的专用图片转述模型配置 (#3822)

* feat: add image caption provider configuration for group chat - Introduced `image_caption_provider_id` to allow separate configuration for group chat image understanding. - Updated metadata and hints in English and Chinese for clarity on new settings. - Adjusted logic in long term memory to utilize the new provider ID for image captioning. * fix: format * Fix logic for image caption and active reply settings * Fix indentation and formatting in long_term_memory.py * chore: ruff format --------- Co-authored-by: Soulter <37870767+Soulter@users.noreply.github.com> Co-authored-by: Soulter <905617992@qq.com>
2025-11-29 23:53:32 +08:00
parent a7fdc98b29
commit 93c69a639a
4 changed files with 30 additions and 12 deletions
@@ -90,6 +90,7 @@ DEFAULT_CONFIG = {
        "group_icl_enable": False,
        "group_message_max_cnt": 300,
        "image_caption": False,
+        "image_caption_provider_id": "",
        "active_reply": {
            "enable": False,
            "method": "possibility_reply",
@@ -2109,6 +2110,9 @@ CONFIG_METADATA_2 = {
                    "image_caption": {
                        "type": "bool",
                    },
+                    "image_caption_provider_id": {
+                        "type": "string",
+                    },
                    "image_caption_prompt": {
                        "type": "string",
                    },
@@ -2785,7 +2789,16 @@ CONFIG_METADATA_3 = {
                    "provider_ltm_settings.image_caption": {
                        "description": "自动理解图片",
                        "type": "bool",
-                        "hint": "需要设置默认图片转述模型。",
+                        "hint": "需要设置群聊图片转述模型。",
+                    },
+                    "provider_ltm_settings.image_caption_provider_id": {
+                        "description": "群聊图片转述模型",
+                        "type": "string",
+                        "_special": "select_provider",
+                        "hint": "用于群聊上下文感知的图片理解，与默认图片转述模型分开配置。",
+                        "condition": {
+                            "provider_ltm_settings.image_caption": True,
+                        },
                    },
                    "provider_ltm_settings.active_reply.enable": {
                        "description": "主动回复",
@@ -379,7 +379,11 @@
        },
        "image_caption": {
          "description": "Auto-understand Images",
-          "hint": "Requires setting a default image caption model."
+          "hint": "Requires setting a group chat image caption model."
+        },
+        "image_caption_provider_id": {
+          "description": "Group Chat Image Caption Model",
+          "hint": "Used for image understanding in group chat context awareness, configured separately from the default image caption model."
        },
        "active_reply": {
          "enable": {
@@ -379,7 +379,11 @@
        },
        "image_caption": {
          "description": "自动理解图片",
-          "hint": "需要设置默认图片转述模型。"
+          "hint": "需要设置群聊图片转述模型。"
+        },
+        "image_caption_provider_id": {
+          "description": "群聊图片转述模型",
+          "hint": "用于群聊上下文感知的图片理解，与默认图片转述模型分开配置。"
        },
        "active_reply": {
          "enable": {
@@ -30,16 +30,13 @@ class LongTermMemory:
        except BaseException as e:
            logger.error(e)
            max_cnt = 300
-        image_caption = (
-            True
-            if cfg["provider_settings"]["default_image_caption_provider_id"]
-            and cfg["provider_ltm_settings"]["image_caption"]
-            else False
-        )
        image_caption_prompt = cfg["provider_settings"]["image_caption_prompt"]
-        image_caption_provider_id = cfg["provider_settings"][
-            "default_image_caption_provider_id"
-        ]
+        image_caption_provider_id = cfg["provider_ltm_settings"].get(
+            "image_caption_provider_id"
+        )
+        image_caption = cfg["provider_ltm_settings"]["image_caption"] and bool(
+            image_caption_provider_id
+        )
        active_reply = cfg["provider_ltm_settings"]["active_reply"]
        enable_active_reply = active_reply.get("enable", False)
        ar_method = active_reply["method"]