perf: 优化bing网页搜索

2024-01-10 16:48:46 +08:00
parent e2f1362a1f
commit 570ff4e8b6
3 changed files with 68 additions and 79 deletions
@@ -8,7 +8,8 @@ from nakuru import (
    GroupMessage,
    FriendMessage,
    GroupMemberIncrease,
-    Notify
+    Notify,
+    Member
 )
 from typing import Union
 import time
@@ -37,12 +38,10 @@ class QQGOCQ(Platform):
        try:
            self.nick_qq = cfg['nick_qq']
        except:
-            self.nick_qq = ("ai","!","！")
+            self.nick_qq = ["ai","!","！"]
        nick_qq = self.nick_qq
        if isinstance(nick_qq, str):
-            nick_qq = (nick_qq,)
-        if isinstance(nick_qq, list):
-            nick_qq = tuple(nick_qq)
+            nick_qq = [nick_qq]
        
        self.unique_session = cfg['uniqueSessionMode']
        self.pic_mode = cfg['qq_pic_mode']
@@ -60,11 +59,9 @@ class QQGOCQ(Platform):
        async def _(app: CQHTTP, source: GroupMessage):
            if self.cc.get("gocq_react_group", True):
                if isinstance(source.message[0], Plain):
-                    # await self.handle_msg(source, True)
                    self.new_sub_thread(self.handle_msg, (source, True))
                elif isinstance(source.message[0], At):
                    if source.message[0].qq == source.self_id:
-                        # await self.handle_msg(source, True)
                        self.new_sub_thread(self.handle_msg, (source, True))
                else:
                    return
@@ -73,7 +70,6 @@ class QQGOCQ(Platform):
        async def _(app: CQHTTP, source: FriendMessage):
            if self.cc.get("gocq_react_friend", True):
                if isinstance(source.message[0], Plain):
-                    # await self.handle_msg(source, False)
                    self.new_sub_thread(self.handle_msg, (source, False))
                else:
                    return
@@ -112,22 +108,25 @@ class QQGOCQ(Platform):
    async def handle_msg(self, message: Union[GroupMessage, FriendMessage, GuildMessage, Notify], is_group: bool):
        # 判断是否响应消息
        resp = False
-        for i in message.message:
-            if isinstance(i, At):
-                if message.type == "GuildMessage":
-                    if i.qq == message.user_id or i.qq == message.self_tiny_id:
-                        resp = True
-                if message.type == "FriendMessage":
-                    if i.qq == message.self_id:
-                        resp = True
-                if message.type == "GroupMessage":
-                    if i.qq == message.self_id:
-                        resp = True
-            elif isinstance(i, Plain):
-                for nick in self.nick_qq:
-                    if nick != '' and i.text.strip().startswith(nick):
-                        resp = True
-                        break
+        if not is_group:
+            resp = True
+        else:
+            for i in message.message:
+                if isinstance(i, At):
+                    if message.type == "GuildMessage":
+                        if i.qq == message.user_id or i.qq == message.self_tiny_id:
+                            resp = True
+                    if message.type == "FriendMessage":
+                        if i.qq == message.self_id:
+                            resp = True
+                    if message.type == "GroupMessage":
+                        if i.qq == message.self_id:
+                            resp = True
+                elif isinstance(i, Plain):
+                    for nick in self.nick_qq:
+                        if nick != '' and i.text.strip().startswith(nick):
+                            resp = True
+                            break
                
        if not resp: return
        
@@ -29,7 +29,7 @@ class ProviderOpenAIOfficial(Provider):
        if cfg['key'] != '' and cfg['key'] != None:
            self.key_list = cfg['key']
        else:
-            input("[System] 请先去完善ChatGPT的Key。详情请前往https://beta.openai.com/account/api-keys")
+            input("[System] 请先填写 Key。详情请前往 https://beta.openai.com/account/api-keys 或使用中转 Key 方案。")
        if len(self.key_list) == 0:
            raise Exception("您打开了 OpenAI 模型服务，但是未填写 key。请前往填写。")
        
@@ -239,7 +239,7 @@ class ProviderOpenAIOfficial(Provider):
                err = str(e)
                retry += 1
        if retry >= 10:
-            gu.log(r"如果报错, 且您的机器在中国大陆内, 请确保您的电脑已经设置好代理软件(梯子), 并在配置文件设置了系统代理地址。详见 https://github.com/Soulter/QQChannelChatGPT/wiki", max_len=999)
+            gu.log(r"如果报错, 且您的机器在中国大陆内且未使用国内中转Key服务, 请确保您的电脑已经设置好代理软件(梯子), 并在配置文件设置了系统代理地址。", max_len=999)
            raise BaseException("连接出错: "+str(err))
        assert isinstance(response, ChatCompletion)
        gu.log(f"OPENAI RESPONSE: {response.usage}", level=gu.LEVEL_DEBUG, max_len=9999)
@@ -53,6 +53,7 @@ def google_web_search(keyword) -> str:
        for i in ls:
            desc = i.description
            try:
+                gu.log(f"搜索网页: {i.url}", tag="网页搜索", level=gu.LEVEL_INFO)
                desc = fetch_website_content(i.url)
            except BaseException as e:
                print(f"(google) fetch_website_content err: {str(e)}")
@@ -74,51 +75,54 @@ def web_keyword_search_via_bing(keyword) -> str:
    }
    url = "https://www.bing.com/search?q="+keyword
    _cnt = 0
-    _detail_store = []
+    # _detail_store = []
    while _cnt < 5:
        try:
            response = requests.get(url, headers=headers)
            response.encoding = "utf-8"
            gu.log(f"bing response: {response.text}", tag="bing", level=gu.LEVEL_DEBUG, max_len=9999)
            soup = BeautifulSoup(response.text, "html.parser")
-            res = []
+            res = ""
+            result_cnt = 0
            ols = soup.find(id="b_results")
            for i in ols.find_all("li", class_="b_algo"):
                try:
                    title = i.find("h2").text
                    desc = i.find("p").text
                    link = i.find("h2").find("a").get("href")
-                    res.append({
-                        "title": title,
-                        "desc": desc,
-                        "link": link,
-                    })
-                    if len(res) >= 5: # 限制5条
-                        break
-                    if len(_detail_store) >= 3:
-                        continue
+                    # res.append({
+                    #     "title": title,
+                    #     "desc": desc,
+                    #     "link": link,
+                    # })
+                    try:
+                        gu.log(f"搜索网页: {link}", tag="网页搜索", level=gu.LEVEL_INFO)
+                        desc = fetch_website_content(link)
+                    except BaseException as e:
+                        print(f"(bing) fetch_website_content err: {str(e)}")

-                    # 爬取前两条的网页内容
-                    if "zhihu.com" in link:
-                        try:
-                            _detail_store.append(special_fetch_zhihu(link))
-                        except BaseException as e:
-                            print(f"zhihu parse err: {str(e)}")
-                    else:
-                        try:
-                            _detail_store.append(fetch_website_content(link))
-                        except BaseException as e:
-                            print(f"fetch_website_content err: {str(e)}")
+                    res += f"# No.{str(result_cnt + 1)}\ntitle: {title}\nurl: {link}\ncontent: {desc}\n\n"
+                    result_cnt += 1
+                    if result_cnt > 5: break
+
+                    # if len(_detail_store) >= 3:
+                    #     continue
+                    # # 爬取前两条的网页内容
+                    # if "zhihu.com" in link:
+                    #     try:
+                    #         _detail_store.append(special_fetch_zhihu(link))
+                    #     except BaseException as e:
+                    #         print(f"zhihu parse err: {str(e)}")
+                    # else:
+                    #     try:
+                    #         _detail_store.append(fetch_website_content(link))
+                    #     except BaseException as e:
+                    #         print(f"fetch_website_content err: {str(e)}")

                except Exception as e:
                    print(f"bing parse err: {str(e)}")
-            if len(res) == 0:
-                break
-            if len(_detail_store) > 0:
-                ret = f"{str(res)} \n具体网页内容: {str(_detail_store)}"
-            else:
-                ret = f"{str(res)}"
-            return str(ret)
+            if result_cnt == 0: break
+            return res
        except Exception as e:
            gu.log(f"bing fetch err: {str(e)}")
            _cnt += 1
@@ -175,26 +179,6 @@ def fetch_website_content(url):
    }
    response = requests.get(url, headers=headers, timeout=3)
    response.encoding = "utf-8"
-    # soup = BeautifulSoup(response.text, "html.parser")
-    # # 如果有container / content / main等的话，就只取这些部分
-    # has = False
-    # beleive_ls = ["container", "content", "main"]
-    # res = ""
-    # for cls in beleive_ls:
-    #     for i in soup.find_all(class_=cls):
-    #         has = True
-    #         res += i.text
-    # if not has:
-    #     res = soup.text
-    # res = res.replace("\n", "").replace("  ", " ").replace("\r", "").replace("\t", "")
-    # if not has:
-    #     res = res[300:1100]
-    # else:
-    #     res = res[100:800]
-    # # with open(f"temp_{time.time()}.html", "w", encoding="utf-8") as f:
-    # #     f.write(res)
-    # gu.log(f"fetch_website_content: end", tag="fetch_website_content", level=gu.LEVEL_DEBUG)
-    # return res
    doc = Document(response.content)
    # print('title:', doc.title())
    ret = doc.summary(html_partial=True)
@@ -213,7 +197,7 @@ def web_search(question, provider: Provider, session_id, official_fc=False):
        "description": "google search query (分词，尽量保留所有信息)"
        }],
    "通过搜索引擎搜索。如果问题需要在网页上搜索(如天气、新闻或任何需要通过网页获取信息的问题)，则调用此函数；如果没有，不要调用此函数。",
-    google_web_search
+    web_keyword_search_via_bing
    )
    new_func_call.add_func("fetch_website_content", [{
        "type": "string",
@@ -259,13 +243,20 @@ def web_search(question, provider: Provider, session_id, official_fc=False):

    if has_func:
        provider.forget(session_id)
-        question3 = f"""请你用活泼的语气回答`{question}`问题。\n以下是相关材料，请直接拿此材料针对问题进行总结回答。在文章末尾加上各参考链接，如`[1] <title> <url>`；不要提到任何函数调用的信息；在总结的末尾加上1或2个相关的emoji。```\n{function_invoked_ret}\n```\n"""
+        question3 = f"""
+以下是相关材料，你的任务是：
+1. 根据材料对问题`{question}`做切题的总结回答;
+2. 发表你对这个问题的看法.
+你的总结末尾应当有对材料的引用, 如果有链接, 请在末尾附上引用网页链接。引用格式严格按照 `\n[1] title url \n`。
+不要提到任何函数调用的信息。以下是相关材料：
+"""
+        
        gu.log(f"web_search: {question3}", tag="web_search", level=gu.LEVEL_DEBUG, max_len=99999)
        _c = 0
        while _c < 3:
            try:
                print('text chat')
-                final_ret = provider.text_chat(question3)
+                final_ret = provider.text_chat(question3 + "```" + function_invoked_ret + "```", session_id)
                return final_ret
            except Exception as e:
                print(e)
@@ -275,5 +266,4 @@ def web_search(question, provider: Provider, session_id, official_fc=False):
                    provider.forget(session_id)
                    function_invoked_ret = function_invoked_ret[:int(len(function_invoked_ret) / 2)]
                    time.sleep(3)
-                    question3 = f"""请回答`{question}`问题。\n以下是相关材料，请直接拿此材料针对问题进行回答，再给参考链接, 参考链接首末有空格。```\n{function_invoked_ret}\n```\n"""
    return function_invoked_ret