✨feat: 支持 gemini-2.0-flash-exp-image-generation 对图片模态的输入 #1017
This commit is contained in:
@@ -519,8 +519,9 @@ CONFIG_METADATA_2 = {
|
||||
"api_base": "https://generativelanguage.googleapis.com/",
|
||||
"timeout": 120,
|
||||
"model_config": {
|
||||
"model": "gemini-1.5-flash",
|
||||
"model": "gemini-2.0-flash-exp",
|
||||
},
|
||||
"gm_resp_image_modal": False,
|
||||
},
|
||||
"DeepSeek": {
|
||||
"id": "deepseek_default",
|
||||
@@ -672,6 +673,11 @@ CONFIG_METADATA_2 = {
|
||||
},
|
||||
},
|
||||
"items": {
|
||||
"gm_resp_image_modal": {
|
||||
"description": "启用图片模态",
|
||||
"type": "bool",
|
||||
"hint": "启用后,将支持返回图片内容。需要模型支持,否则会报错。具体支持模型请查看 Google Gemini 官方网站。温馨提示,如果您需要生成图片,请关闭 `启用群员识别` 配置获得更好的效果。",
|
||||
},
|
||||
"rag_options": {
|
||||
"description": "RAG 选项",
|
||||
"type": "object",
|
||||
|
||||
@@ -2,6 +2,8 @@ import base64
|
||||
import aiohttp
|
||||
import json
|
||||
import random
|
||||
import astrbot.core.message.components as Comp
|
||||
from astrbot.core.message.message_event_result import MessageChain
|
||||
from astrbot.core.utils.io import download_image_by_url
|
||||
from astrbot.core.db import BaseDatabase
|
||||
from astrbot.api.provider import Provider, Personality
|
||||
@@ -39,6 +41,7 @@ class SimpleGoogleGenAIClient:
|
||||
model: str = "gemini-1.5-flash",
|
||||
system_instruction: str = "",
|
||||
tools: dict = None,
|
||||
modalities: List[str] = ["Text"],
|
||||
):
|
||||
payload = {}
|
||||
if system_instruction:
|
||||
@@ -46,6 +49,9 @@ class SimpleGoogleGenAIClient:
|
||||
if tools:
|
||||
payload["tools"] = [tools]
|
||||
payload["contents"] = contents
|
||||
payload["generationConfig"] = {
|
||||
"responseModalities": modalities,
|
||||
}
|
||||
logger.debug(f"payload: {payload}")
|
||||
request_url = (
|
||||
f"{self.api_base}/v1beta/models/{model}:generateContent?key={self.api_key}"
|
||||
@@ -185,22 +191,53 @@ class ProviderGoogleGenAI(Provider):
|
||||
|
||||
logger.debug(f"google_genai_conversation: {google_genai_conversation}")
|
||||
|
||||
modalites = ["Text"]
|
||||
if self.provider_config.get("gm_resp_image_modal", False):
|
||||
modalites.append("Image")
|
||||
|
||||
loop = True
|
||||
while loop:
|
||||
loop = False
|
||||
result = await self.client.generate_content(
|
||||
contents=google_genai_conversation,
|
||||
model=self.get_model(),
|
||||
system_instruction=system_instruction,
|
||||
tools=tool,
|
||||
modalities=modalites,
|
||||
)
|
||||
logger.debug(f"result: {result}")
|
||||
|
||||
if "candidates" not in result:
|
||||
# Developer instruction is not enabled for models/gemini-2.0-flash-exp
|
||||
if "Developer instruction is not enabled" in str(result):
|
||||
logger.warning(
|
||||
f"{self.get_model()} 不支持 system prompt, 已自动去除, 将会影响人格设置。"
|
||||
)
|
||||
system_instruction = ""
|
||||
loop = True
|
||||
|
||||
elif "Function calling is not enabled" in str(result):
|
||||
logger.warning(
|
||||
f"{self.get_model()} 不支持函数调用,已自动去除,不影响使用。"
|
||||
)
|
||||
tool = None
|
||||
loop = True
|
||||
|
||||
elif "Multi-modal output is not supported" in str(result):
|
||||
logger.warning(
|
||||
f"{self.get_model()} 不支持多模态输出,降级为文本模态重新请求。"
|
||||
)
|
||||
modalites = ["Text"]
|
||||
loop = True
|
||||
|
||||
elif "candidates" not in result:
|
||||
raise Exception("Gemini 返回异常结果: " + str(result))
|
||||
|
||||
candidates = result["candidates"][0]["content"]["parts"]
|
||||
llm_response = LLMResponse("assistant")
|
||||
chain = []
|
||||
for candidate in candidates:
|
||||
if "text" in candidate:
|
||||
llm_response.completion_text += candidate["text"]
|
||||
chain.append(Comp.Plain(candidate["text"]))
|
||||
elif "functionCall" in candidate:
|
||||
llm_response.role = "tool"
|
||||
llm_response.tools_call_args.append(candidate["functionCall"]["args"])
|
||||
@@ -208,8 +245,12 @@ class ProviderGoogleGenAI(Provider):
|
||||
llm_response.tools_call_ids.append(
|
||||
candidate["functionCall"]["name"]
|
||||
) # 没有 tool id
|
||||
elif "inlineData" in candidate:
|
||||
mime_type: str = candidate["inlineData"]["mimeType"]
|
||||
if mime_type.startswith("image/"):
|
||||
chain.append(Comp.Image.fromBase64(candidate["inlineData"]["data"]))
|
||||
|
||||
llm_response.completion_text = llm_response.completion_text.strip()
|
||||
llm_response.result_chain = MessageChain(chain=chain)
|
||||
return llm_response
|
||||
|
||||
async def text_chat(
|
||||
@@ -253,34 +294,7 @@ class ProviderGoogleGenAI(Provider):
|
||||
llm_response = await self._query(payloads, func_tool)
|
||||
break
|
||||
except Exception as e:
|
||||
if "maximum context length" in str(e):
|
||||
retry_cnt = 20
|
||||
while retry_cnt > 0:
|
||||
logger.warning(
|
||||
f"请求失败:{e}。上下文长度超过限制。尝试弹出最早的记录然后重试。当前记录条数: {len(context_query)}"
|
||||
)
|
||||
try:
|
||||
await self.pop_record(context_query)
|
||||
llm_response = await self._query(payloads, func_tool)
|
||||
break
|
||||
except Exception as e:
|
||||
if "maximum context length" in str(e):
|
||||
retry_cnt -= 1
|
||||
else:
|
||||
raise e
|
||||
if retry_cnt == 0:
|
||||
llm_response = LLMResponse(
|
||||
"err", "err: 请尝试 /reset 重置会话"
|
||||
)
|
||||
elif "Function calling is not enabled" in str(e):
|
||||
logger.info(
|
||||
f"{self.get_model()} 不支持函数工具调用,已自动去除,不影响使用。"
|
||||
)
|
||||
if "tools" in payloads:
|
||||
del payloads["tools"]
|
||||
llm_response = await self._query(payloads, None)
|
||||
break
|
||||
elif "429" in str(e) or "API key not valid" in str(e):
|
||||
if "429" in str(e) or "API key not valid" in str(e):
|
||||
keys.remove(chosen_key)
|
||||
if len(keys) > 0:
|
||||
chosen_key = random.choice(keys)
|
||||
@@ -292,7 +306,7 @@ class ProviderGoogleGenAI(Provider):
|
||||
logger.error(
|
||||
f"检测到 Key 异常({str(e)}),且已没有可用的 Key。 当前 Key: {chosen_key[:12]}..."
|
||||
)
|
||||
raise Exception("API 资源已耗尽,且没有可用的 Key 重试...")
|
||||
raise Exception("达到了 Gemini 速率限制, 请稍后再试...")
|
||||
else:
|
||||
logger.error(
|
||||
f"发生了错误(gemini_source)。Provider 配置如下: {self.provider_config}"
|
||||
|
||||
Reference in New Issue
Block a user