perf: 优化bing网页搜索

This commit is contained in:
Soulter
2024-01-10 16:48:46 +08:00
parent e2f1362a1f
commit 570ff4e8b6
3 changed files with 68 additions and 79 deletions
+23 -24
View File
@@ -8,7 +8,8 @@ from nakuru import (
GroupMessage,
FriendMessage,
GroupMemberIncrease,
Notify
Notify,
Member
)
from typing import Union
import time
@@ -37,12 +38,10 @@ class QQGOCQ(Platform):
try:
self.nick_qq = cfg['nick_qq']
except:
self.nick_qq = ("ai","!","")
self.nick_qq = ["ai","!",""]
nick_qq = self.nick_qq
if isinstance(nick_qq, str):
nick_qq = (nick_qq,)
if isinstance(nick_qq, list):
nick_qq = tuple(nick_qq)
nick_qq = [nick_qq]
self.unique_session = cfg['uniqueSessionMode']
self.pic_mode = cfg['qq_pic_mode']
@@ -60,11 +59,9 @@ class QQGOCQ(Platform):
async def _(app: CQHTTP, source: GroupMessage):
if self.cc.get("gocq_react_group", True):
if isinstance(source.message[0], Plain):
# await self.handle_msg(source, True)
self.new_sub_thread(self.handle_msg, (source, True))
elif isinstance(source.message[0], At):
if source.message[0].qq == source.self_id:
# await self.handle_msg(source, True)
self.new_sub_thread(self.handle_msg, (source, True))
else:
return
@@ -73,7 +70,6 @@ class QQGOCQ(Platform):
async def _(app: CQHTTP, source: FriendMessage):
if self.cc.get("gocq_react_friend", True):
if isinstance(source.message[0], Plain):
# await self.handle_msg(source, False)
self.new_sub_thread(self.handle_msg, (source, False))
else:
return
@@ -112,22 +108,25 @@ class QQGOCQ(Platform):
async def handle_msg(self, message: Union[GroupMessage, FriendMessage, GuildMessage, Notify], is_group: bool):
# 判断是否响应消息
resp = False
for i in message.message:
if isinstance(i, At):
if message.type == "GuildMessage":
if i.qq == message.user_id or i.qq == message.self_tiny_id:
resp = True
if message.type == "FriendMessage":
if i.qq == message.self_id:
resp = True
if message.type == "GroupMessage":
if i.qq == message.self_id:
resp = True
elif isinstance(i, Plain):
for nick in self.nick_qq:
if nick != '' and i.text.strip().startswith(nick):
resp = True
break
if not is_group:
resp = True
else:
for i in message.message:
if isinstance(i, At):
if message.type == "GuildMessage":
if i.qq == message.user_id or i.qq == message.self_tiny_id:
resp = True
if message.type == "FriendMessage":
if i.qq == message.self_id:
resp = True
if message.type == "GroupMessage":
if i.qq == message.self_id:
resp = True
elif isinstance(i, Plain):
for nick in self.nick_qq:
if nick != '' and i.text.strip().startswith(nick):
resp = True
break
if not resp: return
+2 -2
View File
@@ -29,7 +29,7 @@ class ProviderOpenAIOfficial(Provider):
if cfg['key'] != '' and cfg['key'] != None:
self.key_list = cfg['key']
else:
input("[System] 请先去完善ChatGPT的Key。详情请前往https://beta.openai.com/account/api-keys")
input("[System] 请先填写 Key。详情请前往 https://beta.openai.com/account/api-keys 或使用中转 Key 方案。")
if len(self.key_list) == 0:
raise Exception("您打开了 OpenAI 模型服务,但是未填写 key。请前往填写。")
@@ -239,7 +239,7 @@ class ProviderOpenAIOfficial(Provider):
err = str(e)
retry += 1
if retry >= 10:
gu.log(r"如果报错, 且您的机器在中国大陆内, 请确保您的电脑已经设置好代理软件(梯子), 并在配置文件设置了系统代理地址。详见 https://github.com/Soulter/QQChannelChatGPT/wiki", max_len=999)
gu.log(r"如果报错, 且您的机器在中国大陆内且未使用国内中转Key服务, 请确保您的电脑已经设置好代理软件(梯子), 并在配置文件设置了系统代理地址。", max_len=999)
raise BaseException("连接出错: "+str(err))
assert isinstance(response, ChatCompletion)
gu.log(f"OPENAI RESPONSE: {response.usage}", level=gu.LEVEL_DEBUG, max_len=9999)
+43 -53
View File
@@ -53,6 +53,7 @@ def google_web_search(keyword) -> str:
for i in ls:
desc = i.description
try:
gu.log(f"搜索网页: {i.url}", tag="网页搜索", level=gu.LEVEL_INFO)
desc = fetch_website_content(i.url)
except BaseException as e:
print(f"(google) fetch_website_content err: {str(e)}")
@@ -74,51 +75,54 @@ def web_keyword_search_via_bing(keyword) -> str:
}
url = "https://www.bing.com/search?q="+keyword
_cnt = 0
_detail_store = []
# _detail_store = []
while _cnt < 5:
try:
response = requests.get(url, headers=headers)
response.encoding = "utf-8"
gu.log(f"bing response: {response.text}", tag="bing", level=gu.LEVEL_DEBUG, max_len=9999)
soup = BeautifulSoup(response.text, "html.parser")
res = []
res = ""
result_cnt = 0
ols = soup.find(id="b_results")
for i in ols.find_all("li", class_="b_algo"):
try:
title = i.find("h2").text
desc = i.find("p").text
link = i.find("h2").find("a").get("href")
res.append({
"title": title,
"desc": desc,
"link": link,
})
if len(res) >= 5: # 限制5条
break
if len(_detail_store) >= 3:
continue
# res.append({
# "title": title,
# "desc": desc,
# "link": link,
# })
try:
gu.log(f"搜索网页: {link}", tag="网页搜索", level=gu.LEVEL_INFO)
desc = fetch_website_content(link)
except BaseException as e:
print(f"(bing) fetch_website_content err: {str(e)}")
# 爬取前两条的网页内容
if "zhihu.com" in link:
try:
_detail_store.append(special_fetch_zhihu(link))
except BaseException as e:
print(f"zhihu parse err: {str(e)}")
else:
try:
_detail_store.append(fetch_website_content(link))
except BaseException as e:
print(f"fetch_website_content err: {str(e)}")
res += f"# No.{str(result_cnt + 1)}\ntitle: {title}\nurl: {link}\ncontent: {desc}\n\n"
result_cnt += 1
if result_cnt > 5: break
# if len(_detail_store) >= 3:
# continue
# # 爬取前两条的网页内容
# if "zhihu.com" in link:
# try:
# _detail_store.append(special_fetch_zhihu(link))
# except BaseException as e:
# print(f"zhihu parse err: {str(e)}")
# else:
# try:
# _detail_store.append(fetch_website_content(link))
# except BaseException as e:
# print(f"fetch_website_content err: {str(e)}")
except Exception as e:
print(f"bing parse err: {str(e)}")
if len(res) == 0:
break
if len(_detail_store) > 0:
ret = f"{str(res)} \n具体网页内容: {str(_detail_store)}"
else:
ret = f"{str(res)}"
return str(ret)
if result_cnt == 0: break
return res
except Exception as e:
gu.log(f"bing fetch err: {str(e)}")
_cnt += 1
@@ -175,26 +179,6 @@ def fetch_website_content(url):
}
response = requests.get(url, headers=headers, timeout=3)
response.encoding = "utf-8"
# soup = BeautifulSoup(response.text, "html.parser")
# # 如果有container / content / main等的话,就只取这些部分
# has = False
# beleive_ls = ["container", "content", "main"]
# res = ""
# for cls in beleive_ls:
# for i in soup.find_all(class_=cls):
# has = True
# res += i.text
# if not has:
# res = soup.text
# res = res.replace("\n", "").replace(" ", " ").replace("\r", "").replace("\t", "")
# if not has:
# res = res[300:1100]
# else:
# res = res[100:800]
# # with open(f"temp_{time.time()}.html", "w", encoding="utf-8") as f:
# # f.write(res)
# gu.log(f"fetch_website_content: end", tag="fetch_website_content", level=gu.LEVEL_DEBUG)
# return res
doc = Document(response.content)
# print('title:', doc.title())
ret = doc.summary(html_partial=True)
@@ -213,7 +197,7 @@ def web_search(question, provider: Provider, session_id, official_fc=False):
"description": "google search query (分词,尽量保留所有信息)"
}],
"通过搜索引擎搜索。如果问题需要在网页上搜索(如天气、新闻或任何需要通过网页获取信息的问题),则调用此函数;如果没有,不要调用此函数。",
google_web_search
web_keyword_search_via_bing
)
new_func_call.add_func("fetch_website_content", [{
"type": "string",
@@ -259,13 +243,20 @@ def web_search(question, provider: Provider, session_id, official_fc=False):
if has_func:
provider.forget(session_id)
question3 = f"""请你用活泼的语气回答`{question}`问题。\n以下是相关材料,请直接拿此材料针对问题进行总结回答。在文章末尾加上各参考链接,如`[1] <title> <url>`;不要提到任何函数调用的信息;在总结的末尾加上1或2个相关的emoji。```\n{function_invoked_ret}\n```\n"""
question3 = f"""
以下是相关材料,你的任务是:
1. 根据材料对问题`{question}`做切题的总结回答;
2. 发表你对这个问题的看法.
你的总结末尾应当有对材料的引用, 如果有链接, 请在末尾附上引用网页链接。引用格式严格按照 `\n[1] title url \n`。
不要提到任何函数调用的信息。以下是相关材料:
"""
gu.log(f"web_search: {question3}", tag="web_search", level=gu.LEVEL_DEBUG, max_len=99999)
_c = 0
while _c < 3:
try:
print('text chat')
final_ret = provider.text_chat(question3)
final_ret = provider.text_chat(question3 + "```" + function_invoked_ret + "```", session_id)
return final_ret
except Exception as e:
print(e)
@@ -275,5 +266,4 @@ def web_search(question, provider: Provider, session_id, official_fc=False):
provider.forget(session_id)
function_invoked_ret = function_invoked_ret[:int(len(function_invoked_ret) / 2)]
time.sleep(3)
question3 = f"""请回答`{question}`问题。\n以下是相关材料,请直接拿此材料针对问题进行回答,再给参考链接, 参考链接首末有空格。```\n{function_invoked_ret}\n```\n"""
return function_invoked_ret