add model catalogs

2025-07-06 21:27:27 +08:00
608 changed files with 29605 additions and 38773 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,9 +1,9 @@
-root = true
-
-[*]
-charset = utf-8
-indent_style = space
-indent_size = 2
-end_of_line = lf
-insert_final_newline = true
-trim_trailing_whitespace = true
+root = true
+
+[*]
+charset = utf-8
+indent_style = space
+indent_size = 2
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1,2 +0,0 @@
-# ignore #7923 eol change and code formatting
-4ac8a388347ff35f34de42c3ef4a2f81f03fb3b1
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,2 @@
-* text=auto eol=lf
 /.yarn/**            linguist-vendored
 /.yarn/releases/*    binary
--- a/.github/ISSUE_TEMPLATE/#3_others.yml
+++ b/.github/ISSUE_TEMPLATE/#3_others.yml
@@ -73,4 +73,4 @@ body:
    id: additional
    attributes:
      label: 附加信息
-      description: 任何能让我们对您的问题有更多了解的信息，包括截图或相关链接
+      description: 任何能让我们对您的问题有更多了解的信息，包括截图或相关链接
--- a/.github/ISSUE_TEMPLATE/3_others.yml
+++ b/.github/ISSUE_TEMPLATE/3_others.yml
@@ -73,4 +73,4 @@ body:
    id: additional
    attributes:
      label: Additional Information
-      description: Any other information that could help us better understand your question, including screenshots or relevant links
+      description: Any other information that could help us better understand your question, including screenshots or relevant links
--- a/.github/issue-checker.yml
+++ b/.github/issue-checker.yml
@@ -9,115 +9,115 @@ labels:
  # skips and removes
  - name: skip all
    content:
-    regexes: '[Ss]kip (?:[Aa]ll |)[Ll]abels?'
+    regexes: "[Ss]kip (?:[Aa]ll |)[Ll]abels?"
  - name: remove all
    content:
-    regexes: '[Rr]emove (?:[Aa]ll |)[Ll]abels?'
+    regexes: "[Rr]emove (?:[Aa]ll |)[Ll]abels?"

  - name: skip kind/bug
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)kind/bug(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)kind/bug(?:`|)"
  - name: remove kind/bug
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)kind/bug(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)kind/bug(?:`|)"

  - name: skip kind/enhancement
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)kind/enhancement(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)kind/enhancement(?:`|)"
  - name: remove kind/enhancement
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)kind/enhancement(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)kind/enhancement(?:`|)"

  - name: skip kind/question
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)kind/question(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)kind/question(?:`|)"
  - name: remove kind/question
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)kind/question(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)kind/question(?:`|)"

  - name: skip area/Connectivity
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)area/Connectivity(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)area/Connectivity(?:`|)"
  - name: remove area/Connectivity
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)area/Connectivity(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)area/Connectivity(?:`|)"

  - name: skip area/UI/UX
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)area/UI/UX(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)area/UI/UX(?:`|)"
  - name: remove area/UI/UX
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)area/UI/UX(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)area/UI/UX(?:`|)"

  - name: skip kind/documentation
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)kind/documentation(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)kind/documentation(?:`|)"
  - name: remove kind/documentation
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)kind/documentation(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)kind/documentation(?:`|)"

  - name: skip client:linux
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)client:linux(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)client:linux(?:`|)"
  - name: remove client:linux
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)client:linux(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)client:linux(?:`|)"

  - name: skip client:mac
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)client:mac(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)client:mac(?:`|)"
  - name: remove client:mac
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)client:mac(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)client:mac(?:`|)"

  - name: skip client:win
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)client:win(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)client:win(?:`|)"
  - name: remove client:win
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)client:win(?:`|)'
-
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)client:win(?:`|)"
+  
  - name: skip sig/Assistant
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)sig/Assistant(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)sig/Assistant(?:`|)"
  - name: remove sig/Assistant
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)sig/Assistant(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)sig/Assistant(?:`|)"

  - name: skip sig/Data
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)sig/Data(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)sig/Data(?:`|)"
  - name: remove sig/Data
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)sig/Data(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)sig/Data(?:`|)"

  - name: skip sig/MCP
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)sig/MCP(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)sig/MCP(?:`|)"
  - name: remove sig/MCP
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)sig/MCP(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)sig/MCP(?:`|)"

  - name: skip sig/RAG
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)sig/RAG(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)sig/RAG(?:`|)"
  - name: remove sig/RAG
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)sig/RAG(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)sig/RAG(?:`|)"

  - name: skip lgtm
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)lgtm(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)lgtm(?:`|)"
  - name: remove lgtm
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)lgtm(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)lgtm(?:`|)"

  - name: skip License
    content:
-    regexes: '[Ss]kip (?:[Ll]abels? |)(?:`|)License(?:`|)'
+    regexes: "[Ss]kip (?:[Ll]abels? |)(?:`|)License(?:`|)"
  - name: remove License
    content:
-    regexes: '[Rr]emove (?:[Ll]abels? |)(?:`|)License(?:`|)'
+    regexes: "[Rr]emove (?:[Ll]abels? |)(?:`|)License(?:`|)"

  # `Dev Team`
  - name: Dev Team
@@ -129,7 +129,7 @@ labels:
  # Area labels
  - name: area/Connectivity
    content: area/Connectivity
-    regexes: '代理|[Pp]roxy'
+    regexes: "代理|[Pp]roxy"
    skip-if:
      - skip all
      - skip area/Connectivity
@@ -139,7 +139,7 @@ labels:

  - name: area/UI/UX
    content: area/UI/UX
-    regexes: '界面|[Uu][Ii]|重叠|按钮|图标|组件|渲染|菜单|栏目|头像|主题|样式|[Cc][Ss][Ss]'
+    regexes: "界面|[Uu][Ii]|重叠|按钮|图标|组件|渲染|菜单|栏目|头像|主题|样式|[Cc][Ss][Ss]"
    skip-if:
      - skip all
      - skip area/UI/UX
@@ -150,7 +150,7 @@ labels:
  # Kind labels
  - name: kind/documentation
    content: kind/documentation
-    regexes: '文档|教程|[Dd]oc(s|umentation)|[Rr]eadme'
+    regexes: "文档|教程|[Dd]oc(s|umentation)|[Rr]eadme"
    skip-if:
      - skip all
      - skip kind/documentation
@@ -161,7 +161,7 @@ labels:
  # Client labels
  - name: client:linux
    content: client:linux
-    regexes: '(?:[Ll]inux|[Uu]buntu|[Dd]ebian)'
+    regexes: "(?:[Ll]inux|[Uu]buntu|[Dd]ebian)"
    skip-if:
      - skip all
      - skip client:linux
@@ -171,7 +171,7 @@ labels:

  - name: client:mac
    content: client:mac
-    regexes: '(?:[Mm]ac|[Mm]acOS|[Oo]SX)'
+    regexes: "(?:[Mm]ac|[Mm]acOS|[Oo]SX)"
    skip-if:
      - skip all
      - skip client:mac
@@ -181,7 +181,7 @@ labels:

  - name: client:win
    content: client:win
-    regexes: '(?:[Ww]in|[Ww]indows)'
+    regexes: "(?:[Ww]in|[Ww]indows)"
    skip-if:
      - skip all
      - skip client:win
@@ -192,7 +192,7 @@ labels:
  # SIG labels
  - name: sig/Assistant
    content: sig/Assistant
-    regexes: '快捷助手|[Aa]ssistant'
+    regexes: "快捷助手|[Aa]ssistant"
    skip-if:
      - skip all
      - skip sig/Assistant
@@ -202,7 +202,7 @@ labels:

  - name: sig/Data
    content: sig/Data
-    regexes: '[Ww]ebdav|坚果云|备份|同步|数据|Obsidian|Notion|Joplin|思源'
+    regexes: "[Ww]ebdav|坚果云|备份|同步|数据|Obsidian|Notion|Joplin|思源"
    skip-if:
      - skip all
      - skip sig/Data
@@ -212,7 +212,7 @@ labels:

  - name: sig/MCP
    content: sig/MCP
-    regexes: '[Mm][Cc][Pp]'
+    regexes: "[Mm][Cc][Pp]"
    skip-if:
      - skip all
      - skip sig/MCP
@@ -222,7 +222,7 @@ labels:

  - name: sig/RAG
    content: sig/RAG
-    regexes: '知识库|[Rr][Aa][Gg]'
+    regexes: "知识库|[Rr][Aa][Gg]"
    skip-if:
      - skip all
      - skip sig/RAG
@@ -233,7 +233,7 @@ labels:
  # Other labels
  - name: lgtm
    content: lgtm
-    regexes: '(?:[Ll][Gg][Tt][Mm]|[Ll]ooks [Gg]ood [Tt]o [Mm]e)'
+    regexes: "(?:[Ll][Gg][Tt][Mm]|[Ll]ooks [Gg]ood [Tt]o [Mm]e)"
    skip-if:
      - skip all
      - skip lgtm
@@ -243,7 +243,7 @@ labels:

  - name: License
    content: License
-    regexes: '(?:[Ll]icense|[Cc]opyright|[Mm][Ii][Tt]|[Aa]pache)'
+    regexes: "(?:[Ll]icense|[Cc]opyright|[Mm][Ii][Tt]|[Aa]pache)"
    skip-if:
      - skip all
      - skip License
--- a/.github/workflows/issue-checker.yml
+++ b/.github/workflows/issue-checker.yml
@@ -1,4 +1,4 @@
-name: 'Issue Checker'
+name: "Issue Checker"

 on:
  issues:
@@ -19,7 +19,7 @@ jobs:
    steps:
      - uses: MaaAssistantArknights/issue-checker@v1.14
        with:
-          repo-token: '${{ secrets.GITHUB_TOKEN }}'
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
          configuration-path: .github/issue-checker.yml
          not-before: 2022-08-05T00:00:00Z
-          include-title: 1
+          include-title: 1
--- a/.github/workflows/issue-management.yml
+++ b/.github/workflows/issue-management.yml
@@ -1,8 +1,8 @@
-name: 'Stale Issue Management'
+name: "Stale Issue Management"

 on:
  schedule:
-    - cron: '0 0 * * *'
+    - cron: "0 0 * * *"
  workflow_dispatch:

 env:
@@ -24,18 +24,18 @@ jobs:
        uses: actions/stale@v9
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
-          only-labels: 'needs-more-info'
+          only-labels: "needs-more-info"
          days-before-stale: ${{ env.daysBeforeStale }}
-          days-before-close: 0 # Close immediately after stale
-          stale-issue-label: 'inactive'
-          close-issue-label: 'closed:no-response'
+          days-before-close: 0  # Close immediately after stale
+          stale-issue-label: "inactive"
+          close-issue-label: "closed:no-response"
          stale-issue-message: |
            This issue has been labeled as needing more information and has been inactive for ${{ env.daysBeforeStale }} days. 
            It will be closed now due to lack of additional information.
-
+            
            该问题被标记为"需要更多信息"且已经 ${{ env.daysBeforeStale }} 天没有任何活动，将立即关闭。
          operations-per-run: 50
-          exempt-issue-labels: 'pending, Dev Team'
+          exempt-issue-labels: "pending, Dev Team"
          days-before-pr-stale: -1
          days-before-pr-close: -1

@@ -45,11 +45,11 @@ jobs:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
          days-before-stale: ${{ env.daysBeforeStale }}
          days-before-close: ${{ env.daysBeforeClose }}
-          stale-issue-label: 'inactive'
+          stale-issue-label: "inactive"
          stale-issue-message: |
            This issue has been inactive for a prolonged period and will be closed automatically in ${{ env.daysBeforeClose }} days.
            该问题已长时间处于闲置状态，${{ env.daysBeforeClose }} 天后将自动关闭。
-          exempt-issue-labels: 'pending, Dev Team, kind/enhancement'
+          exempt-issue-labels: "pending, Dev Team, kind/enhancement"
          days-before-pr-stale: -1 # Completely disable stalling for PRs
          days-before-pr-close: -1 # Completely disable closing for PRs

--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -77,10 +77,9 @@ jobs:

        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }}
          NODE_OPTIONS: --max-old-space-size=8192
          MAIN_VITE_MINERU_API_KEY: ${{ vars.MAIN_VITE_MINERU_API_KEY }}
-          RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }}
-          RENDERER_VITE_PPIO_APP_SECRET: ${{ vars.RENDERER_VITE_PPIO_APP_SECRET }}

      - name: Build Mac
        if: matrix.os == 'macos-latest'
@@ -94,11 +93,10 @@ jobs:
          APPLE_ID: ${{ vars.APPLE_ID }}
          APPLE_APP_SPECIFIC_PASSWORD: ${{ vars.APPLE_APP_SPECIFIC_PASSWORD }}
          APPLE_TEAM_ID: ${{ vars.APPLE_TEAM_ID }}
+          RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }}
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          NODE_OPTIONS: --max-old-space-size=8192
          MAIN_VITE_MINERU_API_KEY: ${{ vars.MAIN_VITE_MINERU_API_KEY }}
-          RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }}
-          RENDERER_VITE_PPIO_APP_SECRET: ${{ vars.RENDERER_VITE_PPIO_APP_SECRET }}

      - name: Build Windows
        if: matrix.os == 'windows-latest'
@@ -107,10 +105,9 @@ jobs:
          yarn build:win
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }}
          NODE_OPTIONS: --max-old-space-size=8192
          MAIN_VITE_MINERU_API_KEY: ${{ vars.MAIN_VITE_MINERU_API_KEY }}
-          RENDERER_VITE_AIHUBMIX_SECRET: ${{ vars.RENDERER_VITE_AIHUBMIX_SECRET }}
-          RENDERER_VITE_PPIO_APP_SECRET: ${{ vars.RENDERER_VITE_PPIO_APP_SECRET }}

      - name: Release
        uses: ncipollo/release-action@v1
@@ -120,4 +117,4 @@ jobs:
          makeLatest: false
          tag: ${{ steps.get-tag.outputs.tag }}
          artifacts: 'dist/*.exe,dist/*.zip,dist/*.dmg,dist/*.AppImage,dist/*.snap,dist/*.deb,dist/*.rpm,dist/*.tar.gz,dist/latest*.yml,dist/rc*.yml,dist/*.blockmap'
-          token: ${{ secrets.GITHUB_TOKEN }}
+          token: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -46,10 +46,6 @@ local
 .aider*
 .cursorrules
 .cursor/*
-.claude/*
-.gemini/*
-.trae/*
-.claude-code-router/*

 # vitest
 coverage
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -1,3 +1,3 @@
 {
-  "recommendations": ["dbaeumer.vscode-eslint", "esbenp.prettier-vscode", "editorconfig.editorconfig"]
+  "recommendations": ["dbaeumer.vscode-eslint"]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -4,7 +4,6 @@
    "source.fixAll.eslint": "explicit",
    "source.organizeImports": "never"
  },
-  "files.eol": "\n",
  "search.exclude": {
    "**/dist/**": true,
    ".yarn/releases/**": true
--- a/.yarn/patches/@google-genai-npm-1.0.1-e26f0f9af7.patch
+++ b/.yarn/patches/@google-genai-npm-1.0.1-e26f0f9af7.patch
--- a/docs/features/memory-guide-zh.md
+++ b/docs/features/memory-guide-zh.md
@@ -1,222 +0,0 @@
-# Cherry Studio 记忆功能指南
-
-## 功能介绍
-
-Cherry Studio 的记忆功能是一个强大的工具，能够帮助 AI 助手记住对话中的重要信息、用户偏好和上下文。通过记忆功能，您的 AI 助手可以：
-
- 📝 **记住重要信息**：自动从对话中提取并存储关键事实和信息
- 🧠 **个性化响应**：基于存储的记忆提供更加个性化和相关的回答
- 🔍 **智能检索**：在需要时自动搜索相关记忆，增强对话的连贯性
- 👥 **多用户支持**：为不同用户维护独立的记忆上下文
-
-记忆功能特别适用于需要长期保持上下文的场景，例如个人助手、客户服务、教育辅导等。
-
-## 如何启用记忆功能
-
-### 1. 全局配置（首次设置）
-
-在使用记忆功能之前，您需要先进行全局配置：
-
-1. 点击侧边栏的 **记忆** 图标（记忆棒图标）进入记忆管理页面
-2. 点击右上角的 **更多** 按钮（三个点），选择 **设置**
-3. 在设置弹窗中配置以下必要项：
-   - **LLM 模型**：选择用于处理记忆的语言模型（推荐使用 GPT-4 或 Claude 等高级模型）
-   - **嵌入模型**：选择用于生成向量嵌入的模型（如 text-embedding-3-small）
-   - **嵌入维度**：输入嵌入模型的维度（通常为 1536）
-4. 点击 **确定** 保存配置
-
-> ⚠️ **注意**：嵌入模型和维度一旦设置后无法更改，请谨慎选择。
-
-### 2. 为助手启用记忆
-
-完成全局配置后，您可以为特定助手启用记忆功能：
-
-1. 进入 **助手** 页面
-2. 选择要启用记忆的助手，点击 **编辑**
-3. 在助手设置中找到 **记忆** 部分
-4. 打开记忆功能开关
-5. 保存助手设置
-
-启用后，该助手将在对话过程中自动提取和使用记忆。
-
-## 使用方法
-
-### 查看记忆
-
-1. 点击侧边栏的 **记忆** 图标进入记忆管理页面
-2. 您可以看到所有存储的记忆卡片，包括：
-   - 记忆内容
-   - 创建时间
-   - 所属用户
-
-### 添加记忆
-
-手动添加记忆有两种方式：
-
-**方式一：在记忆管理页面添加**
-
-1. 点击右上角的 **添加记忆** 按钮
-2. 在弹窗中输入记忆内容
-3. 点击 **添加** 保存
-
-**方式二：在对话中自动提取**
-
- 当助手启用记忆功能后，系统会自动从对话中提取重要信息并存储为记忆
-
-### 编辑记忆
-
-1. 在记忆卡片上点击 **更多** 按钮（三个点）
-2. 选择 **编辑**
-3. 修改记忆内容
-4. 点击 **保存**
-
-### 删除记忆
-
-1. 在记忆卡片上点击 **更多** 按钮
-2. 选择 **删除**
-3. 确认删除操作
-
-## 记忆搜索
-
-记忆管理页面提供了强大的搜索功能：
-
-1. 在页面顶部的搜索框中输入关键词
-2. 系统会实时过滤显示匹配的记忆
-3. 搜索支持模糊匹配，可以搜索记忆内容的任何部分
-
-## 用户管理
-
-记忆功能支持多用户，您可以为不同的用户维护独立的记忆库：
-
-### 切换用户
-
-1. 在记忆管理页面，点击右上角的用户选择器
-2. 选择要切换到的用户
-3. 页面会自动加载该用户的记忆
-
-### 添加新用户
-
-1. 点击用户选择器
-2. 选择 **添加新用户**
-3. 输入用户 ID（支持字母、数字、下划线和连字符）
-4. 点击 **添加**
-
-### 删除用户
-
-1. 切换到要删除的用户
-2. 点击右上角的 **更多** 按钮
-3. 选择 **删除用户**
-4. 确认删除（注意：这将删除该用户的所有记忆）
-
-> 💡 **提示**：默认用户（default-user）无法删除。
-
-## 设置说明
-
-### LLM 模型
-
- 用于处理记忆提取和更新的语言模型
- 建议选择能力较强的模型以获得更好的记忆提取效果
- 可随时更改
-
-### 嵌入模型
-
- 用于将文本转换为向量，支持语义搜索
- 一旦设置后无法更改（为了保证现有记忆的兼容性）
- 推荐使用 OpenAI 的 text-embedding 系列模型
-
-### 嵌入维度
-
- 嵌入向量的维度，需要与选择的嵌入模型匹配
- 常见维度：
-  - text-embedding-3-small: 1536
-  - text-embedding-3-large: 3072
-  - text-embedding-ada-002: 1536
-
-### 自定义提示词（可选）
-
- **事实提取提示词**：自定义如何从对话中提取信息
- **记忆更新提示词**：自定义如何更新现有记忆
-
-## 最佳实践
-
-### 1. 合理组织记忆
-
- 保持记忆简洁明了，每条记忆专注于一个具体信息
- 使用清晰的语言描述事实，避免模糊表达
- 定期审查和清理过时或不准确的记忆
-
-### 2. 多用户场景
-
- 为不同的使用场景创建独立用户（如工作、个人、学习等）
- 使用有意义的用户 ID，便于识别和管理
- 定期备份重要用户的记忆数据
-
-### 3. 模型选择建议
-
- **LLM 模型**：GPT-4、Claude 3 等高级模型能更准确地提取和理解信息
- **嵌入模型**：选择与您的主要使用语言匹配的模型
-
-### 4. 性能优化
-
- 避免存储过多冗余记忆，这可能影响搜索性能
- 定期整理和合并相似的记忆
- 对于大量记忆的场景，考虑按主题或时间进行分类管理
-
-## 常见问题
-
-### Q: 为什么我无法启用记忆功能？
-
-A: 请确保您已经完成全局配置，包括选择 LLM 模型和嵌入模型。
-
-### Q: 记忆会自动同步到所有助手吗？
-
-A: 不会。每个助手的记忆功能需要单独启用，且记忆是按用户隔离的。
-
-### Q: 如何导出我的记忆数据？
-
-A: 目前系统暂不支持直接导出功能，但所有记忆都存储在本地数据库中。
-
-### Q: 删除的记忆可以恢复吗？
-
-A: 删除操作是永久的，无法恢复。建议在删除前仔细确认。
-
-### Q: 记忆功能会影响对话速度吗？
-
-A: 记忆功能在后台异步处理，不会明显影响对话响应速度。但过多的记忆可能会略微增加搜索时间。
-
-### Q: 如何清空所有记忆？
-
-A: 您可以删除当前用户并重新创建，或者手动删除所有记忆条目。
-
-## 注意事项
-
-### 隐私保护
-
- 所有记忆数据都存储在您的本地设备上，不会上传到云端
- 请勿在记忆中存储敏感信息（如密码、私钥等）
- 定期审查记忆内容，确保没有意外存储的隐私信息
-
-### 数据安全
-
- 记忆数据存储在本地数据库中
- 建议定期备份重要数据
- 更换设备时请注意迁移记忆数据
-
-### 使用限制
-
- 单条记忆的长度建议不超过 500 字
- 每个用户的记忆数量建议控制在 1000 条以内
- 过多的记忆可能影响系统性能
-
-## 技术细节
-
-记忆功能使用了先进的 RAG（检索增强生成）技术：
-
-1. **信息提取**：使用 LLM 从对话中智能提取关键信息
-2. **向量化存储**：通过嵌入模型将文本转换为向量，支持语义搜索
-3. **智能检索**：在对话时自动搜索相关记忆，提供给 AI 作为上下文
-4. **持续学习**：随着对话进行，不断更新和完善记忆库
-
---
-
-💡 **提示**：记忆功能是 Cherry Studio 的高级特性，合理使用可以大大提升 AI 助手的智能程度和用户体验。如有更多问题，欢迎查阅文档或联系支持团队。
--- a/docs/technical/db.settings.md
+++ b/docs/technical/db.settings.md
@@ -1,11 +0,0 @@
-# 数据库设置字段
-
-此文档包含部分字段的数据类型说明。
-
-## 字段
-
-| 字段名                         | 类型                           | 说明         |
-| ------------------------------ | ------------------------------ | ------------ |
-| `translate:target:language`    | `LanguageCode`                 | 翻译目标语言 |
-| `translate:source:language`    | `LanguageCode`                 | 翻译源语言   |
-| `translate:bidirectional:pair` | `[LanguageCode, LanguageCode]` | 双向翻译对   |
--- a/electron-builder.yml
+++ b/electron-builder.yml
@@ -117,8 +117,9 @@ afterSign: scripts/notarize.js
 artifactBuildCompleted: scripts/artifact-build-completed.js
 releaseInfo:
  releaseNotes: |
-    新增全局记忆功能
-    MCP 支持 DXT 格式导入
-    全局快捷键支持 Linux 系统
-    模型思考过程增加动画效果
-    错误修复和性能优化
+    划词助手：支持 macOS 系统
+    文档处理：增加 MinerU、Doc2x，Mistral 等服务商支持
+    知识库：新的知识库界面，增加扫描版 PDF 支持
+    OCR：macOS 增加系统 OCR 支持
+    服务商：支持一键添加服务商，新增 PH8 大模型开放平台, 支持 PPIO OAuth 登录
+    修复：Linux下数据目录移动问题
--- a/electron.vite.config.ts
+++ b/electron.vite.config.ts
@@ -8,9 +8,6 @@ const visualizerPlugin = (type: 'renderer' | 'main') => {
  return process.env[`VISUALIZER_${type.toUpperCase()}`] ? [visualizer({ open: true })] : []
 }

-const isDev = process.env.NODE_ENV === 'development'
-const isProd = process.env.NODE_ENV === 'production'
-
 export default defineConfig({
  main: {
    plugins: [externalizeDepsPlugin(), ...visualizerPlugin('main')],
@@ -25,15 +22,16 @@ export default defineConfig({
      rollupOptions: {
        external: ['@libsql/client', 'bufferutil', 'utf-8-validate', '@cherrystudio/mac-system-ocr'],
        output: {
-          manualChunks: undefined, // 彻底禁用代码分割 - 返回 null 强制单文件打包
-          inlineDynamicImports: true // 内联所有动态导入，这是关键配置
+          // 彻底禁用代码分割 - 返回 null 强制单文件打包
+          manualChunks: undefined,
+          // 内联所有动态导入，这是关键配置
+          inlineDynamicImports: true
        }
      },
-      sourcemap: isDev
+      sourcemap: process.env.NODE_ENV === 'development'
    },
-    esbuild: isProd ? { legalComments: 'none' } : {},
    optimizeDeps: {
-      noDiscovery: isDev
+      noDiscovery: process.env.NODE_ENV === 'development'
    }
  },
  preload: {
@@ -44,7 +42,7 @@ export default defineConfig({
      }
    },
    build: {
-      sourcemap: isDev
+      sourcemap: process.env.NODE_ENV === 'development'
    }
  },
  renderer: {
@@ -62,7 +60,14 @@ export default defineConfig({
          ]
        ]
      }),
-      ...(isDev ? [CodeInspectorPlugin({ bundler: 'vite' })] : []), // 只在开发环境下启用 CodeInspectorPlugin
+      // 只在开发环境下启用 CodeInspectorPlugin
+      ...(process.env.NODE_ENV === 'development'
+        ? [
+            CodeInspectorPlugin({
+              bundler: 'vite'
+            })
+          ]
+        : []),
      ...visualizerPlugin('renderer')
    ],
    resolve: {
@@ -90,7 +95,6 @@ export default defineConfig({
          selectionAction: resolve(__dirname, 'src/renderer/selectionAction.html')
        }
      }
-    },
-    esbuild: isProd ? { legalComments: 'none' } : {}
+    }
  }
 })
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -26,7 +26,7 @@ export default defineConfig([
      'simple-import-sort/exports': 'error',
      'unused-imports/no-unused-imports': 'error',
      '@eslint-react/no-prop-types': 'error',
-      'prettier/prettier': ['error']
+      'prettier/prettier': ['error', { endOfLine: 'auto' }]
    }
  },
  // Configuration for ensuring compatibility with the original ESLint(8.x) rules
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "CherryStudio",
-  "version": "1.5.0",
+  "version": "1.4.8",
  "private": true,
  "description": "A powerful AI assistant for producer.",
  "main": "./out/main/index.js",
@@ -27,12 +27,12 @@
    "build:win": "dotenv npm run build && electron-builder --win --x64 --arm64",
    "build:win:x64": "dotenv npm run build && electron-builder --win --x64",
    "build:win:arm64": "dotenv npm run build && electron-builder --win --arm64",
-    "build:mac": "dotenv npm run build && electron-builder --mac --arm64 --x64",
-    "build:mac:arm64": "dotenv npm run build && electron-builder --mac --arm64",
-    "build:mac:x64": "dotenv npm run build && electron-builder --mac --x64",
-    "build:linux": "dotenv npm run build && electron-builder --linux --x64 --arm64",
-    "build:linux:arm64": "dotenv npm run build && electron-builder --linux --arm64",
-    "build:linux:x64": "dotenv npm run build && electron-builder --linux --x64",
+    "build:mac": "dotenv electron-vite build && electron-builder --mac --arm64 --x64",
+    "build:mac:arm64": "dotenv electron-vite build && electron-builder --mac --arm64",
+    "build:mac:x64": "dotenv electron-vite build && electron-builder --mac --x64",
+    "build:linux": "dotenv electron-vite build && electron-builder --linux --x64 --arm64",
+    "build:linux:arm64": "dotenv electron-vite build && electron-builder --linux --arm64",
+    "build:linux:x64": "dotenv electron-vite build && electron-builder --linux --x64",
    "build:npm": "node scripts/build-npm.js",
    "release": "node scripts/version.js",
    "publish": "yarn build:check && yarn release patch push",
@@ -55,24 +55,20 @@
    "test:lint": "eslint . --ext .js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts",
    "format": "prettier --write .",
    "lint": "eslint . --ext .js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts --fix",
-    "prepare": "git config blame.ignoreRevsFile .git-blame-ignore-revs && husky"
+    "prepare": "husky"
  },
  "dependencies": {
-    "@aws-sdk/client-s3": "^3.840.0",
    "@cherrystudio/pdf-to-img-napi": "^0.0.1",
    "@libsql/client": "0.14.0",
    "@libsql/win32-x64-msvc": "^0.4.7",
    "@strongtz/win32-arm64-msvc": "^0.4.7",
-    "iconv-lite": "^0.6.3",
-    "jaison": "^2.0.2",
-    "jschardet": "^3.1.4",
    "jsdom": "26.1.0",
    "macos-release": "^3.4.0",
    "node-stream-zip": "^1.15.0",
    "notion-helper": "^1.3.22",
    "os-proxy-config": "^1.1.2",
    "pdfjs-dist": "4.10.38",
-    "selection-hook": "^1.0.6",
+    "selection-hook": "^1.0.4",
    "turndown": "7.2.0"
  },
  "devDependencies": {
@@ -93,7 +89,6 @@
    "@cherrystudio/embedjs-loader-xml": "^0.1.31",
    "@cherrystudio/embedjs-ollama": "^0.1.31",
    "@cherrystudio/embedjs-openai": "^0.1.31",
-    "@codemirror/view": "^6.0.0",
    "@electron-toolkit/eslint-config-prettier": "^3.0.0",
    "@electron-toolkit/eslint-config-ts": "^3.0.0",
    "@electron-toolkit/preload": "^3.0.0",
@@ -109,7 +104,7 @@
    "@langchain/community": "^0.3.36",
    "@langchain/ollama": "^0.2.1",
    "@mistralai/mistralai": "^1.6.0",
-    "@modelcontextprotocol/sdk": "^1.12.3",
+    "@modelcontextprotocol/sdk": "^1.11.4",
    "@mozilla/readability": "^0.6.0",
    "@notionhq/client": "^2.2.15",
    "@playwright/test": "^1.52.0",
@@ -143,8 +138,6 @@
    "@vitest/coverage-v8": "^3.1.4",
    "@vitest/ui": "^3.1.4",
    "@vitest/web-worker": "^3.1.4",
-    "@viz-js/lang-dot": "^1.0.5",
-    "@viz-js/viz": "^3.14.0",
    "@xyflow/react": "^12.4.4",
    "antd": "patch:antd@npm%3A5.24.7#~/.yarn/patches/antd-npm-5.24.7-356a553ae5.patch",
    "archiver": "^7.0.1",
@@ -229,7 +222,6 @@
    "tiny-pinyin": "^1.3.2",
    "tokenx": "^1.1.0",
    "typescript": "^5.6.2",
-    "unified": "^11.0.5",
    "uuid": "^10.0.0",
    "vite": "6.2.6",
    "vitest": "^3.1.4",
--- a/packages/shared/IpcChannel.ts
+++ b/packages/shared/IpcChannel.ts
@@ -74,10 +74,6 @@ export enum IpcChannel {
  Mcp_ServersChanged = 'mcp:servers-changed',
  Mcp_ServersUpdated = 'mcp:servers-updated',
  Mcp_CheckConnectivity = 'mcp:check-connectivity',
-  Mcp_UploadDxt = 'mcp:upload-dxt',
-  Mcp_SetProgress = 'mcp:set-progress',
-  Mcp_AbortTool = 'mcp:abort-tool',
-  Mcp_GetServerVersion = 'mcp:get-server-version',

  // Python
  Python_Execute = 'python:execute',
@@ -149,7 +145,6 @@ export enum IpcChannel {
  File_Base64File = 'file:base64File',
  File_GetPdfInfo = 'file:getPdfInfo',
  Fs_Read = 'fs:read',
-  File_OpenWithRelativePath = 'file:openWithRelativePath',

  // file service
  FileService_Upload = 'file-service:upload',
@@ -170,16 +165,6 @@ export enum IpcChannel {
  Backup_CheckConnection = 'backup:checkConnection',
  Backup_CreateDirectory = 'backup:createDirectory',
  Backup_DeleteWebdavFile = 'backup:deleteWebdavFile',
-  Backup_BackupToLocalDir = 'backup:backupToLocalDir',
-  Backup_RestoreFromLocalBackup = 'backup:restoreFromLocalBackup',
-  Backup_ListLocalBackupFiles = 'backup:listLocalBackupFiles',
-  Backup_DeleteLocalBackupFile = 'backup:deleteLocalBackupFile',
-  Backup_SetLocalBackupDir = 'backup:setLocalBackupDir',
-  Backup_BackupToS3 = 'backup:backupToS3',
-  Backup_RestoreFromS3 = 'backup:restoreFromS3',
-  Backup_ListS3Files = 'backup:listS3Files',
-  Backup_DeleteS3File = 'backup:deleteS3File',
-  Backup_CheckS3Connection = 'backup:checkS3Connection',

  // zip
  Zip_Compress = 'zip:compress',
@@ -244,17 +229,5 @@ export enum IpcChannel {
  Selection_ActionWindowMinimize = 'selection:action-window-minimize',
  Selection_ActionWindowPin = 'selection:action-window-pin',
  Selection_ProcessAction = 'selection:process-action',
-  Selection_UpdateActionData = 'selection:update-action-data',
-
-  // Memory
-  Memory_Add = 'memory:add',
-  Memory_Search = 'memory:search',
-  Memory_List = 'memory:list',
-  Memory_Delete = 'memory:delete',
-  Memory_Update = 'memory:update',
-  Memory_Get = 'memory:get',
-  Memory_SetConfig = 'memory:set-config',
-  Memory_DeleteUser = 'memory:delete-user',
-  Memory_DeleteAllMemoriesForUser = 'memory:delete-all-memories-for-user',
-  Memory_GetUsersList = 'memory:get-users-list'
+  Selection_UpdateActionData = 'selection:update-action-data'
 }
--- a/packages/shared/config/constant.ts
+++ b/packages/shared/config/constant.ts
@@ -193,7 +193,6 @@ const textExtsByCategory = new Map([
      '.htm',
      '.xhtml', // HTML
      '.xml', // XML
-      '.fxml', // JavaFX XML
      '.org', // Org-mode
      '.wiki', // Wiki
      '.tex',
--- a/resources/model-catalogs/01-ai/yi-large.yaml
+++ b/resources/model-catalogs/01-ai/yi-large.yaml
@@ -0,0 +1,47 @@
+id: 01-ai/yi-large
+canonical_slug: 01-ai/yi-large
+hugging_face_id: ''
+name: '01.AI: Yi Large'
+type: chat
+created: 1719273600
+description: |-
+  The Yi Large model was designed by 01.AI with the following usecases in mind: knowledge search, data classification, human-like chat bots, and customer service.
+
+  It stands out for its multilingual proficiency, particularly in Spanish, Chinese, Japanese, German, and French.
+
+  Check out the [launch announcement](https://01-ai.github.io/blog/01.ai-yi-large-llm-launch) to learn more.
+context_length: 32768
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Yi
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000003'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - response_format
+  - structured_outputs
+  - logit_bias
+  - logprobs
+  - top_logprobs
+model_provider: 01-ai
--- a/resources/model-catalogs/aetherwiing/mn-starcannon-12b.yaml
+++ b/resources/model-catalogs/aetherwiing/mn-starcannon-12b.yaml
@@ -0,0 +1,42 @@
+id: aetherwiing/mn-starcannon-12b
+canonical_slug: aetherwiing/mn-starcannon-12b
+hugging_face_id: aetherwiing/MN-12B-Starcannon-v2
+name: 'Aetherwiing: Starcannon 12B'
+type: chat
+created: 1723507200
+description: |-
+  Starcannon 12B v2 is a creative roleplay and story writing model, based on Mistral Nemo, using [nothingiisreal/mn-celeste-12b](/nothingiisreal/mn-celeste-12b) as a base, with [intervitens/mini-magnum-12b-v1.1](https://huggingface.co/intervitens/mini-magnum-12b-v1.1) merged in using the [TIES](https://arxiv.org/abs/2306.01708) method.
+
+  Although more similar to Magnum overall, the model remains very creative, with a pleasant writing style. It is recommended for people wanting more variety than Magnum, and yet more verbose prose than Celeste.
+context_length: 16384
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Mistral
+  instruct_type: chatml
+pricing:
+  prompt: '0.0000008'
+  completion: '0.0000012'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - repetition_penalty
+  - top_k
+  - min_p
+  - seed
+model_provider: aetherwiing
--- a/resources/model-catalogs/ai21/jamba-1.6-large.yaml
+++ b/resources/model-catalogs/ai21/jamba-1.6-large.yaml
@@ -0,0 +1,38 @@
+id: ai21/jamba-1.6-large
+canonical_slug: ai21/jamba-1.6-large
+hugging_face_id: ai21labs/AI21-Jamba-Large-1.6
+name: 'AI21: Jamba 1.6 Large'
+type: chat
+created: 1741905173
+description: |-
+  AI21 Jamba Large 1.6 is a high-performance hybrid foundation model combining State Space Models (Mamba) with Transformer attention mechanisms. Developed by AI21, it excels in extremely long-context handling (256K tokens), demonstrates superior inference efficiency (up to 2.5x faster than comparable models), and supports structured JSON output and tool-use capabilities. It has 94 billion active parameters (398 billion total), optimized quantization support (ExpertsInt8), and multilingual proficiency in languages such as English, Spanish, French, Portuguese, Italian, Dutch, German, Arabic, and Hebrew.
+
+  Usage of this model is subject to the [Jamba Open Model License](https://www.ai21.com/licenses/jamba-open-model-license).
+context_length: 256000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.000002'
+  completion: '0.000008'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+model_provider: ai21
--- a/resources/model-catalogs/ai21/jamba-1.6-mini.yaml
+++ b/resources/model-catalogs/ai21/jamba-1.6-mini.yaml
@@ -0,0 +1,38 @@
+id: ai21/jamba-1.6-mini
+canonical_slug: ai21/jamba-1.6-mini
+hugging_face_id: ai21labs/AI21-Jamba-Mini-1.6
+name: 'AI21: Jamba Mini 1.6'
+type: chat
+created: 1741905171
+description: |-
+  AI21 Jamba Mini 1.6 is a hybrid foundation model combining State Space Models (Mamba) with Transformer attention mechanisms. With 12 billion active parameters (52 billion total), this model excels in extremely long-context tasks (up to 256K tokens) and achieves superior inference efficiency, outperforming comparable open models on tasks such as retrieval-augmented generation (RAG) and grounded question answering. Jamba Mini 1.6 supports multilingual tasks across English, Spanish, French, Portuguese, Italian, Dutch, German, Arabic, and Hebrew, along with structured JSON output and tool-use capabilities.
+
+  Usage of this model is subject to the [Jamba Open Model License](https://www.ai21.com/licenses/jamba-open-model-license).
+context_length: 256000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.0000002'
+  completion: '0.0000004'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+model_provider: ai21
--- a/resources/model-catalogs/aion-labs/aion-1.0-mini.yaml
+++ b/resources/model-catalogs/aion-labs/aion-1.0-mini.yaml
@@ -0,0 +1,34 @@
+id: aion-labs/aion-1.0-mini
+canonical_slug: aion-labs/aion-1.0-mini
+hugging_face_id: FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview
+name: 'AionLabs: Aion-1.0-Mini'
+type: chat
+created: 1738697107
+description: Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant of a FuseAI model that outperforms R1-Distill-Qwen-32B and R1-Distill-Llama-70B, with benchmark results available on its [Hugging Face page](https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview), independently replicated for verification.
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.0000007'
+  completion: '0.0000014'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+model_provider: aion-labs
--- a/resources/model-catalogs/aion-labs/aion-1.0.yaml
+++ b/resources/model-catalogs/aion-labs/aion-1.0.yaml
@@ -0,0 +1,34 @@
+id: aion-labs/aion-1.0
+canonical_slug: aion-labs/aion-1.0
+hugging_face_id: ''
+name: 'AionLabs: Aion-1.0'
+type: chat
+created: 1738697557
+description: Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree of Thoughts (ToT) and Mixture of Experts (MoE). It is Aion Lab's most powerful reasoning model.
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.000004'
+  completion: '0.000008'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+model_provider: aion-labs
--- a/resources/model-catalogs/aion-labs/aion-rp-llama-3.1-8b.yaml
+++ b/resources/model-catalogs/aion-labs/aion-rp-llama-3.1-8b.yaml
@@ -0,0 +1,32 @@
+id: aion-labs/aion-rp-llama-3.1-8b
+canonical_slug: aion-labs/aion-rp-llama-3.1-8b
+hugging_face_id: ''
+name: 'AionLabs: Aion-RP 1.0 (8B)'
+type: chat
+created: 1738696718
+description: Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses. It is a fine-tuned base model rather than an instruct model, designed to produce more natural and varied writing.
+context_length: 32768
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.0000002'
+  completion: '0.0000002'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+model_provider: aion-labs
--- a/resources/model-catalogs/alfredpros/codellama-7b-instruct-solidity.yaml
+++ b/resources/model-catalogs/alfredpros/codellama-7b-instruct-solidity.yaml
@@ -0,0 +1,39 @@
+id: alfredpros/codellama-7b-instruct-solidity
+canonical_slug: alfredpros/codellama-7b-instruct-solidity
+hugging_face_id: AlfredPros/CodeLlama-7b-Instruct-Solidity
+name: 'AlfredPros: CodeLLaMa 7B Instruct Solidity'
+type: chat
+created: 1744641874
+description: A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.
+context_length: 4096
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: alpaca
+pricing:
+  prompt: '0.0000008'
+  completion: '0.0000012'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - repetition_penalty
+  - top_k
+  - min_p
+  - seed
+model_provider: alfredpros
--- a/resources/model-catalogs/all-hands/openhands-lm-32b-v0.1.yaml
+++ b/resources/model-catalogs/all-hands/openhands-lm-32b-v0.1.yaml
@@ -0,0 +1,44 @@
+id: all-hands/openhands-lm-32b-v0.1
+canonical_slug: all-hands/openhands-lm-32b-v0.1
+hugging_face_id: all-hands/openhands-lm-32b-v0.1
+name: OpenHands LM 32B V0.1
+type: chat
+created: 1743613013
+description: |-
+  OpenHands LM v0.1 is a 32B open-source coding model fine-tuned from Qwen2.5-Coder-32B-Instruct using reinforcement learning techniques outlined in SWE-Gym. It is optimized for autonomous software development agents and achieves strong performance on SWE-Bench Verified, with a 37.2% resolve rate. The model supports a 128K token context window, making it well-suited for long-horizon code reasoning and large codebase tasks.
+
+  OpenHands LM is designed for local deployment and runs on consumer-grade GPUs such as a single 3090. It enables fully offline agent workflows without dependency on proprietary APIs. This release is intended as a research preview, and future updates aim to improve generalizability, reduce repetition, and offer smaller variants.
+context_length: 16384
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.0000026'
+  completion: '0.0000034'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - repetition_penalty
+  - top_k
+  - min_p
+  - seed
+model_provider: all-hands
--- a/resources/model-catalogs/alpindale/goliath-120b.yaml
+++ b/resources/model-catalogs/alpindale/goliath-120b.yaml
@@ -0,0 +1,48 @@
+id: alpindale/goliath-120b
+canonical_slug: alpindale/goliath-120b
+hugging_face_id: alpindale/goliath-120b
+name: Goliath 120B
+type: chat
+created: 1699574400
+description: |-
+  A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.
+
+  Credits to
+  - [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).
+  - [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.
+
+  #merge
+context_length: 6144
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Llama2
+  instruct_type: airoboros
+pricing:
+  prompt: '0.00001'
+  completion: '0.0000125'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - repetition_penalty
+  - logit_bias
+  - top_k
+  - min_p
+  - seed
+  - top_a
+model_provider: alpindale
--- a/resources/model-catalogs/alpindale/magnum-72b.yaml
+++ b/resources/model-catalogs/alpindale/magnum-72b.yaml
@@ -0,0 +1,42 @@
+id: alpindale/magnum-72b
+canonical_slug: alpindale/magnum-72b
+hugging_face_id: alpindale/magnum-72b-v1
+name: Magnum 72B
+type: chat
+created: 1720656000
+description: |-
+  From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the first in a new family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.
+
+  The model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.
+context_length: 16384
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Qwen
+  instruct_type: chatml
+pricing:
+  prompt: '0.000004'
+  completion: '0.000006'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - repetition_penalty
+  - top_k
+  - min_p
+  - seed
+model_provider: alpindale
--- a/resources/model-catalogs/amazon/nova-lite-v1.yaml
+++ b/resources/model-catalogs/amazon/nova-lite-v1.yaml
@@ -0,0 +1,39 @@
+id: amazon/nova-lite-v1
+canonical_slug: amazon/nova-lite-v1
+hugging_face_id: ''
+name: 'Amazon: Nova Lite 1.0'
+type: chat
+created: 1733437363
+description: |-
+  Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.
+
+  With an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.
+context_length: 300000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Nova
+  instruct_type: null
+pricing:
+  prompt: '0.00000006'
+  completion: '0.00000024'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0.00009'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: amazon
--- a/resources/model-catalogs/amazon/nova-micro-v1.yaml
+++ b/resources/model-catalogs/amazon/nova-micro-v1.yaml
@@ -0,0 +1,35 @@
+id: amazon/nova-micro-v1
+canonical_slug: amazon/nova-micro-v1
+hugging_face_id: ''
+name: 'Amazon: Nova Micro 1.0'
+type: chat
+created: 1733437237
+description: Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has  simple mathematical reasoning and coding abilities.
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Nova
+  instruct_type: null
+pricing:
+  prompt: '0.000000035'
+  completion: '0.00000014'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: amazon
--- a/resources/model-catalogs/amazon/nova-pro-v1.yaml
+++ b/resources/model-catalogs/amazon/nova-pro-v1.yaml
@@ -0,0 +1,41 @@
+id: amazon/nova-pro-v1
+canonical_slug: amazon/nova-pro-v1
+hugging_face_id: ''
+name: 'Amazon: Nova Pro 1.0'
+type: chat
+created: 1733436303
+description: |-
+  Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).
+
+  Amazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.
+
+  **NOTE**: Video input is not supported at this time.
+context_length: 300000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Nova
+  instruct_type: null
+pricing:
+  prompt: '0.0000008'
+  completion: '0.0000032'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0.0012'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: amazon
--- a/resources/model-catalogs/anthracite-org/magnum-v2-72b.yaml
+++ b/resources/model-catalogs/anthracite-org/magnum-v2-72b.yaml
@@ -0,0 +1,43 @@
+id: anthracite-org/magnum-v2-72b
+canonical_slug: anthracite-org/magnum-v2-72b
+hugging_face_id: anthracite-org/magnum-v2-72b
+name: Magnum v2 72B
+type: chat
+created: 1727654400
+description: |-
+  From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the seventh in a family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.
+
+  The model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.
+context_length: 32768
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Qwen
+  instruct_type: chatml
+pricing:
+  prompt: '0.000003'
+  completion: '0.000003'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - repetition_penalty
+  - logit_bias
+  - top_k
+  - min_p
+  - seed
+model_provider: anthracite-org
--- a/resources/model-catalogs/anthracite-org/magnum-v4-72b.yaml
+++ b/resources/model-catalogs/anthracite-org/magnum-v4-72b.yaml
@@ -0,0 +1,44 @@
+id: anthracite-org/magnum-v4-72b
+canonical_slug: anthracite-org/magnum-v4-72b
+hugging_face_id: anthracite-org/magnum-v4-72b
+name: Magnum v4 72B
+type: chat
+created: 1729555200
+description: |-
+  This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).
+
+  The model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).
+context_length: 16384
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Qwen
+  instruct_type: chatml
+pricing:
+  prompt: '0.0000025'
+  completion: '0.000003'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - repetition_penalty
+  - top_k
+  - min_p
+  - seed
+  - logit_bias
+  - top_a
+model_provider: anthracite-org
--- a/resources/model-catalogs/anthropic/claude-2-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-2-beta.yaml
@@ -0,0 +1,34 @@
+id: anthropic/claude-2:beta
+canonical_slug: anthropic/claude-2
+hugging_face_id: ''
+name: 'Anthropic: Claude v2 (self-moderated)'
+type: chat
+created: 1700611200
+description: 'Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.'
+context_length: 200000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000008'
+  completion: '0.000024'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-2.0-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-2.0-beta.yaml
@@ -0,0 +1,34 @@
+id: anthropic/claude-2.0:beta
+canonical_slug: anthropic/claude-2.0
+hugging_face_id: ''
+name: 'Anthropic: Claude v2.0 (self-moderated)'
+type: chat
+created: 1690502400
+description: Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.
+context_length: 100000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000008'
+  completion: '0.000024'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-2.0.yaml
+++ b/resources/model-catalogs/anthropic/claude-2.0.yaml
@@ -0,0 +1,34 @@
+id: anthropic/claude-2.0
+canonical_slug: anthropic/claude-2.0
+hugging_face_id: ''
+name: 'Anthropic: Claude v2.0'
+type: chat
+created: 1690502400
+description: Anthropic's flagship model. Superior performance on tasks that require complex reasoning. Supports hundreds of pages of text.
+context_length: 100000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000008'
+  completion: '0.000024'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-2.1-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-2.1-beta.yaml
@@ -0,0 +1,34 @@
+id: anthropic/claude-2.1:beta
+canonical_slug: anthropic/claude-2.1
+hugging_face_id: ''
+name: 'Anthropic: Claude v2.1 (self-moderated)'
+type: chat
+created: 1700611200
+description: 'Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.'
+context_length: 200000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000008'
+  completion: '0.000024'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-2.1.yaml
+++ b/resources/model-catalogs/anthropic/claude-2.1.yaml
@@ -0,0 +1,34 @@
+id: anthropic/claude-2.1
+canonical_slug: anthropic/claude-2.1
+hugging_face_id: ''
+name: 'Anthropic: Claude v2.1'
+type: chat
+created: 1700611200
+description: 'Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.'
+context_length: 200000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000008'
+  completion: '0.000024'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-2.yaml
+++ b/resources/model-catalogs/anthropic/claude-2.yaml
@@ -0,0 +1,34 @@
+id: anthropic/claude-2
+canonical_slug: anthropic/claude-2
+hugging_face_id: ''
+name: 'Anthropic: Claude v2'
+type: chat
+created: 1700611200
+description: 'Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.'
+context_length: 200000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000008'
+  completion: '0.000024'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3-haiku-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-3-haiku-beta.yaml
@@ -0,0 +1,43 @@
+id: anthropic/claude-3-haiku:beta
+canonical_slug: anthropic/claude-3-haiku
+hugging_face_id: ''
+name: 'Anthropic: Claude 3 Haiku (self-moderated)'
+type: chat
+created: 1710288000
+description: |-
+  Claude 3 Haiku is Anthropic's fastest and most compact model for
+  near-instant responsiveness. Quick and accurate targeted performance.
+
+  See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.00000025'
+  completion: '0.00000125'
+  input_cache_read: '0.00000003'
+  input_cache_write: '0.0000003'
+  request: '0'
+  image: '0.0004'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3-haiku.yaml
+++ b/resources/model-catalogs/anthropic/claude-3-haiku.yaml
@@ -0,0 +1,43 @@
+id: anthropic/claude-3-haiku
+canonical_slug: anthropic/claude-3-haiku
+hugging_face_id: ''
+name: 'Anthropic: Claude 3 Haiku'
+type: chat
+created: 1710288000
+description: |-
+  Claude 3 Haiku is Anthropic's fastest and most compact model for
+  near-instant responsiveness. Quick and accurate targeted performance.
+
+  See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.00000025'
+  completion: '0.00000125'
+  input_cache_read: '0.00000003'
+  input_cache_write: '0.0000003'
+  request: '0'
+  image: '0.0004'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3-opus-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-3-opus-beta.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-3-opus:beta
+canonical_slug: anthropic/claude-3-opus
+hugging_face_id: ''
+name: 'Anthropic: Claude 3 Opus (self-moderated)'
+type: chat
+created: 1709596800
+description: |-
+  Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.
+
+  See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000015'
+  completion: '0.000075'
+  input_cache_read: '0.0000015'
+  input_cache_write: '0.00001875'
+  request: '0'
+  image: '0.024'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3-opus.yaml
+++ b/resources/model-catalogs/anthropic/claude-3-opus.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-3-opus
+canonical_slug: anthropic/claude-3-opus
+hugging_face_id: ''
+name: 'Anthropic: Claude 3 Opus'
+type: chat
+created: 1709596800
+description: |-
+  Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.
+
+  See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000015'
+  completion: '0.000075'
+  input_cache_read: '0.0000015'
+  input_cache_write: '0.00001875'
+  request: '0'
+  image: '0.024'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3-sonnet-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-3-sonnet-beta.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-3-sonnet:beta
+canonical_slug: anthropic/claude-3-sonnet
+hugging_face_id: ''
+name: 'Anthropic: Claude 3 Sonnet (self-moderated)'
+type: chat
+created: 1709596800
+description: |-
+  Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.
+
+  See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3-sonnet.yaml
+++ b/resources/model-catalogs/anthropic/claude-3-sonnet.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-3-sonnet
+canonical_slug: anthropic/claude-3-sonnet
+hugging_face_id: ''
+name: 'Anthropic: Claude 3 Sonnet'
+type: chat
+created: 1709596800
+description: |-
+  Claude 3 Sonnet is an ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments.
+
+  See the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.5-haiku-20241022-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.5-haiku-20241022-beta.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-3.5-haiku-20241022:beta
+canonical_slug: anthropic/claude-3-5-haiku-20241022
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.5 Haiku (2024-10-22) (self-moderated)'
+type: chat
+created: 1730678400
+description: |-
+  Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.
+
+  It does not support image inputs.
+
+  See the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.0000008'
+  completion: '0.000004'
+  input_cache_read: '0.00000008'
+  input_cache_write: '0.000001'
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.5-haiku-20241022.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.5-haiku-20241022.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-3.5-haiku-20241022
+canonical_slug: anthropic/claude-3-5-haiku-20241022
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.5 Haiku (2024-10-22)'
+type: chat
+created: 1730678400
+description: |-
+  Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.
+
+  It does not support image inputs.
+
+  See the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.0000008'
+  completion: '0.000004'
+  input_cache_read: '0.00000008'
+  input_cache_write: '0.000001'
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.5-haiku-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.5-haiku-beta.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-3.5-haiku:beta
+canonical_slug: anthropic/claude-3-5-haiku
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.5 Haiku (self-moderated)'
+type: chat
+created: 1730678400
+description: |-
+  Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.
+
+  This makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.
+
+  This model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.0000008'
+  completion: '0.000004'
+  input_cache_read: '0.00000008'
+  input_cache_write: '0.000001'
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.5-haiku.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.5-haiku.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-3.5-haiku
+canonical_slug: anthropic/claude-3-5-haiku
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.5 Haiku'
+type: chat
+created: 1730678400
+description: |-
+  Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.
+
+  This makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.
+
+  This model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.0000008'
+  completion: '0.000004'
+  input_cache_read: '0.00000008'
+  input_cache_write: '0.000001'
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.5-sonnet-20240620-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.5-sonnet-20240620-beta.yaml
@@ -0,0 +1,47 @@
+id: anthropic/claude-3.5-sonnet-20240620:beta
+canonical_slug: anthropic/claude-3.5-sonnet-20240620
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.5 Sonnet (2024-06-20) (self-moderated)'
+type: chat
+created: 1718841600
+description: |-
+  Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:
+
+  - Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting
+  - Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights
+  - Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone
+  - Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)
+
+  For the latest version (2024-10-23), check out [Claude 3.5 Sonnet](/anthropic/claude-3.5-sonnet).
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.5-sonnet-20240620.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.5-sonnet-20240620.yaml
@@ -0,0 +1,47 @@
+id: anthropic/claude-3.5-sonnet-20240620
+canonical_slug: anthropic/claude-3.5-sonnet-20240620
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.5 Sonnet (2024-06-20)'
+type: chat
+created: 1718841600
+description: |-
+  Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:
+
+  - Coding: Autonomously writes, edits, and runs code with reasoning and troubleshooting
+  - Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights
+  - Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone
+  - Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)
+
+  For the latest version (2024-10-23), check out [Claude 3.5 Sonnet](/anthropic/claude-3.5-sonnet).
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.5-sonnet-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.5-sonnet-beta.yaml
@@ -0,0 +1,45 @@
+id: anthropic/claude-3.5-sonnet:beta
+canonical_slug: anthropic/claude-3.5-sonnet
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.5 Sonnet (self-moderated)'
+type: chat
+created: 1729555200
+description: |-
+  New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:
+
+  - Coding: Scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding
+  - Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights
+  - Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone
+  - Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.5-sonnet.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.5-sonnet.yaml
@@ -0,0 +1,45 @@
+id: anthropic/claude-3.5-sonnet
+canonical_slug: anthropic/claude-3.5-sonnet
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.5 Sonnet'
+type: chat
+created: 1729555200
+description: |-
+  New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:
+
+  - Coding: Scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding
+  - Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights
+  - Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone
+  - Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)
+
+  #multimodal
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - top_k
+  - stop
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.7-sonnet-beta.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.7-sonnet-beta.yaml
@@ -0,0 +1,37 @@
+id: anthropic/claude-3.7-sonnet:beta
+canonical_slug: anthropic/claude-3-7-sonnet-20250219
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.7 Sonnet (self-moderated)'
+type: chat
+created: 1740422110
+description: "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)"
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - stop
+  - reasoning
+  - include_reasoning
+  - tools
+  - tool_choice
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.7-sonnet-thinking.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.7-sonnet-thinking.yaml
@@ -0,0 +1,37 @@
+id: anthropic/claude-3.7-sonnet:thinking
+canonical_slug: anthropic/claude-3-7-sonnet-20250219
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.7 Sonnet (thinking)'
+type: chat
+created: 1740422110
+description: "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)"
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - stop
+  - reasoning
+  - include_reasoning
+  - tools
+  - tool_choice
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-3.7-sonnet.yaml
+++ b/resources/model-catalogs/anthropic/claude-3.7-sonnet.yaml
@@ -0,0 +1,39 @@
+id: anthropic/claude-3.7-sonnet
+canonical_slug: anthropic/claude-3-7-sonnet-20250219
+hugging_face_id: ''
+name: 'Anthropic: Claude 3.7 Sonnet'
+type: chat
+created: 1740422110
+description: "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)"
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - stop
+  - reasoning
+  - include_reasoning
+  - tools
+  - tool_choice
+  - top_p
+  - top_k
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-opus-4.yaml
+++ b/resources/model-catalogs/anthropic/claude-opus-4.yaml
@@ -0,0 +1,39 @@
+id: anthropic/claude-opus-4
+canonical_slug: anthropic/claude-4-opus-20250522
+hugging_face_id: ''
+name: 'Anthropic: Claude Opus 4'
+type: chat
+created: 1747931245
+description: "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)"
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - image
+    - text
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000015'
+  completion: '0.000075'
+  input_cache_read: '0.0000015'
+  input_cache_write: '0.00001875'
+  request: '0'
+  image: '0.024'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - stop
+  - reasoning
+  - include_reasoning
+  - tools
+  - tool_choice
+  - top_p
+  - top_k
+model_provider: anthropic
--- a/resources/model-catalogs/anthropic/claude-sonnet-4.yaml
+++ b/resources/model-catalogs/anthropic/claude-sonnet-4.yaml
@@ -0,0 +1,42 @@
+id: anthropic/claude-sonnet-4
+canonical_slug: anthropic/claude-4-sonnet-20250522
+hugging_face_id: ''
+name: 'Anthropic: Claude Sonnet 4'
+type: chat
+created: 1747930371
+description: |-
+  Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.
+
+  Read more at the [blog post here](https://www.anthropic.com/news/claude-4)
+context_length: 200000
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - image
+    - text
+  output_modalities:
+    - text
+  tokenizer: Claude
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: '0.0000003'
+  input_cache_write: '0.00000375'
+  request: '0'
+  image: '0.0048'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - stop
+  - reasoning
+  - include_reasoning
+  - tools
+  - tool_choice
+  - top_p
+  - top_k
+model_provider: anthropic
--- a/resources/model-catalogs/arcee-ai/arcee-blitz.yaml
+++ b/resources/model-catalogs/arcee-ai/arcee-blitz.yaml
@@ -0,0 +1,40 @@
+id: arcee-ai/arcee-blitz
+canonical_slug: arcee-ai/arcee-blitz
+hugging_face_id: arcee-ai/arcee-blitz
+name: 'Arcee AI: Arcee Blitz'
+type: chat
+created: 1746470100
+description: 'Arcee Blitz is a 24 B‑parameter dense model distilled from DeepSeek and built on Mistral architecture for "everyday" chat. The distillation‑plus‑refinement pipeline trims compute while keeping DeepSeek‑style reasoning, so Blitz punches above its weight on MMLU, GSM‑8K and BBH compared with other mid‑size open models. With a default 128 k context window and competitive throughput, it serves as a cost‑efficient workhorse for summarization, brainstorming and light code help. Internally, Arcee uses Blitz as the default writer in Conductor pipelines when the heavier Virtuoso line is not required. Users therefore get near‑70 B quality at ~⅓ the latency and price. '
+context_length: 32768
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.00000045'
+  completion: '0.00000075'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+model_provider: arcee-ai
--- a/resources/model-catalogs/arcee-ai/caller-large.yaml
+++ b/resources/model-catalogs/arcee-ai/caller-large.yaml
@@ -0,0 +1,42 @@
+id: arcee-ai/caller-large
+canonical_slug: arcee-ai/caller-large
+hugging_face_id: ''
+name: 'Arcee AI: Caller Large'
+type: chat
+created: 1746487869
+description: 'Caller Large is Arcee''s specialist "function‑calling" SLM built to orchestrate external tools and APIs. Instead of maximizing next‑token accuracy, training focuses on structured JSON outputs, parameter extraction and multi‑step tool chains, making Caller a natural choice for retrieval‑augmented generation, robotic process automation or data‑pull chatbots. It incorporates a routing head that decides when (and how) to invoke a tool versus answering directly, reducing hallucinated calls. The model is already the backbone of Arcee Conductor''s auto‑tool mode, where it parses user intent, emits clean function signatures and hands control back once the tool response is ready. Developers thus gain an OpenAI‑style function‑calling UX without handing requests to a frontier‑scale model. '
+context_length: 32768
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.00000055'
+  completion: '0.00000085'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+model_provider: arcee-ai
--- a/resources/model-catalogs/arcee-ai/coder-large.yaml
+++ b/resources/model-catalogs/arcee-ai/coder-large.yaml
@@ -0,0 +1,40 @@
+id: arcee-ai/coder-large
+canonical_slug: arcee-ai/coder-large
+hugging_face_id: ''
+name: 'Arcee AI: Coder Large'
+type: chat
+created: 1746478663
+description: 'Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora. It supports a 32k context window, enabling multi‑file refactoring or long diff review in a single call, and understands 30‑plus programming languages with special attention to TypeScript, Go and Terraform. Internal benchmarks show 5–8 pt gains over CodeLlama‑34 B‑Python on HumanEval and competitive BugFix scores thanks to a reinforcement pass that rewards compilable output. The model emits structured explanations alongside code blocks by default, making it suitable for educational tooling as well as production copilot scenarios. Cost‑wise, Together AI prices it well below proprietary incumbents, so teams can scale interactive coding without runaway spend. '
+context_length: 32768
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.0000005'
+  completion: '0.0000008'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+model_provider: arcee-ai
--- a/resources/model-catalogs/arcee-ai/maestro-reasoning.yaml
+++ b/resources/model-catalogs/arcee-ai/maestro-reasoning.yaml
@@ -0,0 +1,40 @@
+id: arcee-ai/maestro-reasoning
+canonical_slug: arcee-ai/maestro-reasoning
+hugging_face_id: ''
+name: 'Arcee AI: Maestro Reasoning'
+type: chat
+created: 1746481269
+description: 'Maestro Reasoning is Arcee''s flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic. Compared to the earlier 7 B preview, the production 32 B release widens the context window to 128 k tokens and doubles pass‑rate on MATH and GSM‑8K, while also lifting code completion accuracy. Its instruction style encourages structured "thought → answer" traces that can be parsed or hidden according to user preference. That transparency pairs well with audit‑focused industries like finance or healthcare where seeing the reasoning path matters. In Arcee Conductor, Maestro is automatically selected for complex, multi‑constraint queries that smaller SLMs bounce. '
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.0000009'
+  completion: '0.0000033'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+model_provider: arcee-ai
--- a/resources/model-catalogs/arcee-ai/spotlight.yaml
+++ b/resources/model-catalogs/arcee-ai/spotlight.yaml
@@ -0,0 +1,41 @@
+id: arcee-ai/spotlight
+canonical_slug: arcee-ai/spotlight
+hugging_face_id: ''
+name: 'Arcee AI: Spotlight'
+type: chat
+created: 1746481552
+description: 'Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks. It offers a 32 k‑token context window, enabling rich multimodal conversations that combine lengthy documents with one or more images. Training emphasized fast inference on consumer GPUs while retaining strong captioning, visual‐question‑answering, and diagram‑analysis accuracy. As a result, Spotlight slots neatly into agent workflows where screenshots, charts or UI mock‑ups need to be interpreted on the fly. Early benchmarks show it matching or out‑scoring larger VLMs such as LLaVA‑1.6 13 B on popular VQA and POPE alignment tests. '
+context_length: 131072
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - image
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.00000018'
+  completion: '0.00000018'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+model_provider: arcee-ai
--- a/resources/model-catalogs/arcee-ai/virtuoso-large.yaml
+++ b/resources/model-catalogs/arcee-ai/virtuoso-large.yaml
@@ -0,0 +1,42 @@
+id: arcee-ai/virtuoso-large
+canonical_slug: arcee-ai/virtuoso-large
+hugging_face_id: ''
+name: 'Arcee AI: Virtuoso Large'
+type: chat
+created: 1746478885
+description: Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the "fallback" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.00000075'
+  completion: '0.0000012'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+model_provider: arcee-ai
--- a/resources/model-catalogs/arcee-ai/virtuoso-medium-v2.yaml
+++ b/resources/model-catalogs/arcee-ai/virtuoso-medium-v2.yaml
@@ -0,0 +1,42 @@
+id: arcee-ai/virtuoso-medium-v2
+canonical_slug: arcee-ai/virtuoso-medium-v2
+hugging_face_id: arcee-ai/Virtuoso-Medium-v2
+name: 'Arcee AI: Virtuoso Medium V2'
+type: chat
+created: 1746478434
+description: 'Virtuoso‑Medium‑v2 is a 32 B model distilled from DeepSeek‑v3 logits and merged back onto a Qwen 2.5 backbone, yielding a sharper, more factual successor to the original Virtuoso Medium. The team harvested ~1.1 B logit tokens and applied "fusion‑merging" plus DPO alignment, which pushed scores past Arcee‑Nova 2024 and many 40 B‑plus peers on MMLU‑Pro, MATH and HumanEval. With a 128 k context and aggressive quantization options (from BF16 down to 4‑bit GGUF), it balances capability with deployability on single‑GPU nodes. Typical use cases include enterprise chat assistants, technical writing aids and medium‑complexity code drafting where Virtuoso‑Large would be overkill. '
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.0000005'
+  completion: '0.0000008'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+model_provider: arcee-ai
--- a/resources/model-catalogs/bytedance/doubao-embedding-large-text-240715.yaml
+++ b/resources/model-catalogs/bytedance/doubao-embedding-large-text-240715.yaml
@@ -0,0 +1,24 @@
+id: bytedance/doubao-embedding-text-240715
+canonical_slug: bytedance/doubao-embedding-text-240715
+type: embedding
+hugging_face_id: null
+name: 'ByteDance: Doubao Embedding Text (240715)'
+description: |-
+  Doubao Embedding Large 是字节跳动语义向量化模型的最新升级版，模型以豆包语言模型为基座，具备强大的语言理解能力；主要面向向量检索的使用场景，支持中、英双语。
+context_length: 4000
+dimensions:
+  - 512
+  - 1024
+  - 2048
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Doubao
+pricing:
+  prompt: '0.7'
+  unit: 1000000
+  currency: CNY
+model_provider: bytedance
--- a/resources/model-catalogs/bytedance/doubao-embedding-large-text-240915.yaml
+++ b/resources/model-catalogs/bytedance/doubao-embedding-large-text-240915.yaml
@@ -0,0 +1,25 @@
+id: bytedance/doubao-embedding-large-text-240915
+canonical_slug: bytedance/doubao-embedding-large-text-240915
+type: embedding
+hugging_face_id: null
+name: 'ByteDance: Doubao Embedding Large Text (240915)'
+description: |-
+  Doubao Embedding Large 是字节跳动语义向量化模型的最新升级版，模型以豆包语言模型为基座，具备强大的语言理解能力；主要面向向量检索的使用场景，支持中、英双语。
+context_length: 4000
+dimensions:
+  - 512
+  - 1024
+  - 2048
+  - 4096
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Doubao
+pricing:
+  prompt: '0.7'
+  unit: 1000000
+  currency: CNY
+model_provider: bytedance
--- a/resources/model-catalogs/bytedance/doubao-embedding-text-240515.yaml
+++ b/resources/model-catalogs/bytedance/doubao-embedding-text-240515.yaml
@@ -0,0 +1,24 @@
+id: bytedance/doubao-embedding-text-240715
+canonical_slug: bytedance/doubao-embedding-text-240715
+type: embedding
+hugging_face_id: null
+name: 'ByteDance: Doubao Embedding'
+description: |-
+  由字节跳动研发的语义向量化模型，主要面向向量检索的使用场景，支持中、英双语，最长 4K 上下文长度。向量维度 2048 维，支持 512、1024 降维使用。
+context_length: 4000
+dimensions:
+  - 512
+  - 1024
+  - 2048
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Doubao
+pricing:
+  prompt: '0.5'
+  unit: 1000000
+  currency: CNY
+model_provider: bytedance
--- a/resources/model-catalogs/bytedance/doubao-embedding-text-240715.yaml
+++ b/resources/model-catalogs/bytedance/doubao-embedding-text-240715.yaml
@@ -0,0 +1,25 @@
+id: bytedance/doubao-embedding-text-240715
+canonical_slug: bytedance/doubao-embedding-text-240715
+type: embedding
+hugging_face_id: null
+name: 'ByteDance: Doubao Embedding'
+description: |-
+  由字节跳动研发的语义向量化模型，主要面向向量检索的使用场景，支持中、英双语，最长 4K 上下文长度。向量维度 2048 维，支持 512、1024 降维使用。
+context_length: 4000
+dimensions:
+  - 512
+  - 1024
+  - 2048
+  - 2560
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Doubao
+pricing:
+  prompt: '0.5'
+  unit: 1000000
+  currency: CNY
+model_provider: bytedance
--- a/resources/model-catalogs/bytedance/doubao-embedding-vision-241215.yaml
+++ b/resources/model-catalogs/bytedance/doubao-embedding-vision-241215.yaml
@@ -0,0 +1,24 @@
+id: bytedance/doubao-embedding-vision-241215
+canonical_slug: bytedance/doubao-embedding-vision-241215
+type: embedding
+hugging_face_id: null
+name: 'ByteDance: Doubao Embedding Vision'
+description: |-
+  Doubao-embedding-vision，全新升级图文多模态向量化模型，主要面向图文多模向量检索的使用场景，支持图片输入及中、英双语文本输入，最长 8K 上下文长度。
+context_length: 8000
+dimensions:
+  - 3072
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Doubao
+pricing:
+  prompt: '0.7'
+  prompt_image: '1.8'
+  unit: 1000000
+  currency: CNY
+model_provider: bytedance
--- a/resources/model-catalogs/bytedance/doubao-embedding-vision-250328.yaml
+++ b/resources/model-catalogs/bytedance/doubao-embedding-vision-250328.yaml
@@ -0,0 +1,25 @@
+id: bytedance/doubao-embedding-vision-250328
+canonical_slug: bytedance/doubao-embedding-vision-250328
+type: embedding
+hugging_face_id: null
+name: 'ByteDance: Doubao Embedding Vision'
+description: |-
+  Doubao-embedding-vision，全新升级图文多模态向量化模型，主要面向图文多模向量检索的使用场景，支持图片输入及中、英双语文本输入，最长 8K 上下文长度。
+context_length: 8000
+dimensions:
+  - 1024
+  - 2048
+architecture:
+  modality: text+image->text
+  input_modalities:
+    - text
+    - image
+  output_modalities:
+    - text
+  tokenizer: Doubao
+pricing:
+  prompt: '0.7'
+  prompt_image: '1.8'
+  unit: 1000000
+  currency: CNY
+model_provider: bytedance
--- a/resources/model-catalogs/bytedance/doubao-seed-1.6-flash.yaml
+++ b/resources/model-catalogs/bytedance/doubao-seed-1.6-flash.yaml
@@ -0,0 +1,41 @@
+id: bytedance/doubao-seed-1.6-flash
+canonical_slug: bytedance/doubao-seed-1.6-flash
+type: chat
+hugging_face_id: ''
+name: 'ByteDance: Doubao Seed 1.6 Flash'
+created: 1738402289
+description: 有极致推理速度的多模态深度思考模型；同时支持文本和视觉理解。文本理解能力超过上一代 Lite 系列模型，视觉理解比肩友商 Pro 系列模型。
+context_length: 256000
+architecture:
+  modality: text+image+vedio->text
+  input_modalities:
+    - text
+    - image
+    - video
+  output_modalities:
+    - text
+  tokenizer: Doubao
+  instruct_type: null
+pricing:
+  prompt: '0.15'
+  completion: '1.5'
+  input_cache_read: '0.03'
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  currency: CNY
+  unit: 1000000
+supported_parameters:
+  - max_tokens
+  - temperature
+  - stop
+  - reasoning
+  - include_reasoning
+  - tools
+  - tool_choice
+  - top_p
+  - top_k
+  - structured_outputs
+model_provider: bytedance
--- a/resources/model-catalogs/bytedance/doubao-seed-1.6-thinking.yaml
+++ b/resources/model-catalogs/bytedance/doubao-seed-1.6-thinking.yaml
@@ -0,0 +1,41 @@
+id: bytedance/doubao-seed-1.6-thinking
+canonical_slug: bytedance/doubao-seed-1.6-thinking
+type: chat
+hugging_face_id: ''
+name: 'ByteDance: Doubao Seed 1.6 Thinking'
+created: 1738402289
+description: 在思考能力上进行了大幅强化， 对比 doubao 1.5 代深度理解模型，在编程、数学、逻辑推理等基础能力上进一步提升， 支持视觉理解。
+context_length: 256000
+architecture:
+  modality: text+image+vedio->text
+  input_modalities:
+    - text
+    - image
+    - video
+  output_modalities:
+    - text
+  tokenizer: Doubao
+  instruct_type: null
+pricing:
+  prompt: '0.8'
+  completion: '8.0'
+  input_cache_read: '0.16'
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  currency: CNY
+  unit: 1000000
+supported_parameters:
+  - max_tokens
+  - temperature
+  - stop
+  - reasoning
+  - include_reasoning
+  - tools
+  - tool_choice
+  - top_p
+  - top_k
+  - structured_outputs
+model_provider: bytedance
--- a/resources/model-catalogs/bytedance/doubao-seed-1.6.yaml
+++ b/resources/model-catalogs/bytedance/doubao-seed-1.6.yaml
@@ -0,0 +1,41 @@
+id: bytedance/doubao-seed-1.6
+canonical_slug: bytedance/doubao-seed-1.6
+type: chat
+hugging_face_id: ''
+name: 'ByteDance: Doubao Seed 1.6'
+created: 1738402289
+description: 全新多模态深度思考模型，同时支持 thinking、non-thinking、auto三种思考模式。其中 non-thinking 模型对比 doubao-1.5-pro-32k-250115 模型大幅提升。
+context_length: 256000
+architecture:
+  modality: text+image+vedio->text
+  input_modalities:
+    - text
+    - image
+    - video
+  output_modalities:
+    - text
+  tokenizer: Doubao
+  instruct_type: null
+pricing:
+  prompt: '0.8'
+  completion: '8.0'
+  input_cache_read: '0.16'
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  currency: CNY
+  unit: 1000000
+supported_parameters:
+  - max_tokens
+  - temperature
+  - stop
+  - reasoning
+  - include_reasoning
+  - tools
+  - tool_choice
+  - top_p
+  - top_k
+  - structured_outputs
+model_provider: bytedance
--- a/resources/model-catalogs/cognitivecomputations/dolphin-mixtral-8x22b.yaml
+++ b/resources/model-catalogs/cognitivecomputations/dolphin-mixtral-8x22b.yaml
@@ -0,0 +1,47 @@
+id: cognitivecomputations/dolphin-mixtral-8x22b
+canonical_slug: cognitivecomputations/dolphin-mixtral-8x22b
+hugging_face_id: cognitivecomputations/dolphin-2.9.2-mixtral-8x22b
+name: "Dolphin 2.9.2 Mixtral 8x22B \U0001F42C"
+type: chat
+created: 1717804800
+description: |-
+  Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of [Mixtral 8x22B Instruct](/models/mistralai/mixtral-8x22b-instruct). It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.
+
+  This model is a successor to [Dolphin Mixtral 8x7B](/models/cognitivecomputations/dolphin-mixtral-8x7b).
+
+  The model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use. Users are cautioned to use this highly compliant model responsibly, as detailed in a blog post about uncensored models at [erichartford.com/uncensored-models](https://erichartford.com/uncensored-models).
+
+  #moe #uncensored
+context_length: 16000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Mistral
+  instruct_type: chatml
+pricing:
+  prompt: '0.0000009'
+  completion: '0.0000009'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - seed
+  - top_k
+  - min_p
+  - repetition_penalty
+  - logit_bias
+model_provider: cognitivecomputations
--- a/resources/model-catalogs/cohere/command-a.yaml
+++ b/resources/model-catalogs/cohere/command-a.yaml
@@ -0,0 +1,41 @@
+id: cohere/command-a
+canonical_slug: cohere/command-a-03-2025
+hugging_face_id: CohereForAI/c4ai-command-a-03-2025
+name: 'Cohere: Command A'
+type: chat
+created: 1741894342
+description: |-
+  Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases.
+  Compared to other leading proprietary and open-weights models Command A delivers maximum performance with minimum hardware costs, excelling on business-critical agentic and multilingual tasks.
+context_length: 256000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: null
+pricing:
+  prompt: '0.0000025'
+  completion: '0.00001'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/cohere/command-r-03-2024.yaml
+++ b/resources/model-catalogs/cohere/command-r-03-2024.yaml
@@ -0,0 +1,45 @@
+id: cohere/command-r-03-2024
+canonical_slug: cohere/command-r-03-2024
+hugging_face_id: ''
+name: 'Cohere: Command R (03-2024)'
+type: chat
+created: 1709341200
+description: |-
+  Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.
+
+  Read the launch post [here](https://txt.cohere.com/command-r/).
+
+  Use of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Cohere
+  instruct_type: null
+pricing:
+  prompt: '0.0000005'
+  completion: '0.0000015'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/cohere/command-r-08-2024.yaml
+++ b/resources/model-catalogs/cohere/command-r-08-2024.yaml
@@ -0,0 +1,45 @@
+id: cohere/command-r-08-2024
+canonical_slug: cohere/command-r-08-2024
+hugging_face_id: ''
+name: 'Cohere: Command R (08-2024)'
+type: chat
+created: 1724976000
+description: |-
+  command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.
+
+  Read the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).
+
+  Use of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Cohere
+  instruct_type: null
+pricing:
+  prompt: '0.00000015'
+  completion: '0.0000006'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/cohere/command-r-plus-04-2024.yaml
+++ b/resources/model-catalogs/cohere/command-r-plus-04-2024.yaml
@@ -0,0 +1,45 @@
+id: cohere/command-r-plus-04-2024
+canonical_slug: cohere/command-r-plus-04-2024
+hugging_face_id: ''
+name: 'Cohere: Command R+ (04-2024)'
+type: chat
+created: 1712016000
+description: |-
+  Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).
+
+  It offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).
+
+  Use of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Cohere
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/cohere/command-r-plus-08-2024.yaml
+++ b/resources/model-catalogs/cohere/command-r-plus-08-2024.yaml
@@ -0,0 +1,45 @@
+id: cohere/command-r-plus-08-2024
+canonical_slug: cohere/command-r-plus-08-2024
+hugging_face_id: ''
+name: 'Cohere: Command R+ (08-2024)'
+type: chat
+created: 1724976000
+description: |-
+  command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.
+
+  Read the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).
+
+  Use of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Cohere
+  instruct_type: null
+pricing:
+  prompt: '0.0000025'
+  completion: '0.00001'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/cohere/command-r-plus.yaml
+++ b/resources/model-catalogs/cohere/command-r-plus.yaml
@@ -0,0 +1,45 @@
+id: cohere/command-r-plus
+canonical_slug: cohere/command-r-plus
+hugging_face_id: ''
+name: 'Cohere: Command R+'
+type: chat
+created: 1712188800
+description: |-
+  Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).
+
+  It offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).
+
+  Use of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Cohere
+  instruct_type: null
+pricing:
+  prompt: '0.000003'
+  completion: '0.000015'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/cohere/command-r.yaml
+++ b/resources/model-catalogs/cohere/command-r.yaml
@@ -0,0 +1,45 @@
+id: cohere/command-r
+canonical_slug: cohere/command-r
+hugging_face_id: ''
+name: 'Cohere: Command R'
+type: chat
+created: 1710374400
+description: |-
+  Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.
+
+  Read the launch post [here](https://txt.cohere.com/command-r/).
+
+  Use of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Cohere
+  instruct_type: null
+pricing:
+  prompt: '0.0000005'
+  completion: '0.0000015'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/cohere/command-r7b-12-2024.yaml
+++ b/resources/model-catalogs/cohere/command-r7b-12-2024.yaml
@@ -0,0 +1,42 @@
+id: cohere/command-r7b-12-2024
+canonical_slug: cohere/command-r7b-12-2024
+hugging_face_id: ''
+name: 'Cohere: Command R7B (12-2024)'
+type: chat
+created: 1734158152
+description: |-
+  Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps.
+
+  Use of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Cohere
+  instruct_type: null
+pricing:
+  prompt: '0.0000000375'
+  completion: '0.00000015'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/cohere/command.yaml
+++ b/resources/model-catalogs/cohere/command.yaml
@@ -0,0 +1,42 @@
+id: cohere/command
+canonical_slug: cohere/command
+hugging_face_id: ''
+name: 'Cohere: Command'
+type: chat
+created: 1710374400
+description: |-
+  Command is an instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models.
+
+  Use of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).
+context_length: 4096
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Cohere
+  instruct_type: null
+pricing:
+  prompt: '0.000001'
+  completion: '0.000002'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - seed
+  - response_format
+  - structured_outputs
+model_provider: cohere
--- a/resources/model-catalogs/deepseek/deepseek-chat-v3-0324.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-chat-v3-0324.yaml
@@ -0,0 +1,49 @@
+id: deepseek/deepseek-chat-v3-0324
+canonical_slug: deepseek/deepseek-chat-v3-0324
+hugging_face_id: deepseek-ai/DeepSeek-V3-0324
+name: 'DeepSeek: DeepSeek V3 0324'
+type: chat
+created: 1742824755
+description: |-
+  DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.
+
+  It succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.
+context_length: 163840
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: DeepSeek
+  instruct_type: null
+pricing:
+  prompt: '0.0000003'
+  completion: '0.00000088'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - structured_outputs
+  - response_format
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - logprobs
+  - top_logprobs
+  - seed
+  - min_p
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-chat.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-chat.yaml
@@ -0,0 +1,49 @@
+id: deepseek/deepseek-chat
+canonical_slug: deepseek/deepseek-chat-v3
+hugging_face_id: deepseek-ai/DeepSeek-V3
+name: 'DeepSeek: DeepSeek V3'
+type: chat
+created: 1735241320
+description: |-
+  DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.
+
+  For model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).
+context_length: 163840
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: DeepSeek
+  instruct_type: null
+pricing:
+  prompt: '0.00000038'
+  completion: '0.00000089'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - tools
+  - tool_choice
+  - max_tokens
+  - temperature
+  - top_p
+  - structured_outputs
+  - response_format
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - logprobs
+  - top_logprobs
+  - seed
+  - min_p
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-prover-v2.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-prover-v2.yaml
@@ -0,0 +1,41 @@
+id: deepseek/deepseek-prover-v2
+canonical_slug: deepseek/deepseek-prover-v2
+hugging_face_id: deepseek-ai/DeepSeek-Prover-V2-671B
+name: 'DeepSeek: DeepSeek Prover V2'
+type: chat
+created: 1746013094
+description: DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from [DeepSeek-Prover-V1.5](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V1.5-RL) Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: DeepSeek
+  instruct_type: null
+pricing:
+  prompt: '0.0000005'
+  completion: '0.00000218'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - seed
+  - top_k
+  - min_p
+  - repetition_penalty
+  - logit_bias
+  - response_format
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1-0528-qwen3-8b.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1-0528-qwen3-8b.yaml
@@ -0,0 +1,45 @@
+id: deepseek/deepseek-r1-0528-qwen3-8b
+canonical_slug: deepseek/deepseek-r1-0528-qwen3-8b
+hugging_face_id: deepseek-ai/deepseek-r1-0528-qwen3-8b
+name: 'DeepSeek: Deepseek R1 0528 Qwen3 8B'
+type: chat
+created: 1748538543
+description: |-
+  DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and Gemini 2.5 Pro.
+  It now tops math, programming, and logic leaderboards, showcasing a step-change in depth-of-thought.
+  The distilled variant, DeepSeek-R1-0528-Qwen3-8B, transfers this chain-of-thought into an 8 B-parameter form, beating standard Qwen3 8B by +10 pp and tying the 235 B “thinking” giant on AIME 2024.
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Qwen
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.00000005'
+  completion: '0.0000001'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - presence_penalty
+  - frequency_penalty
+  - repetition_penalty
+  - top_k
+  - stop
+  - seed
+  - min_p
+  - logit_bias
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1-0528.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1-0528.yaml
@@ -0,0 +1,51 @@
+id: deepseek/deepseek-r1-0528
+canonical_slug: deepseek/deepseek-r1-0528
+hugging_face_id: deepseek-ai/DeepSeek-R1-0528
+name: 'DeepSeek: R1 0528'
+type: chat
+created: 1748455170
+description: |-
+  May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.
+
+  Fully open-source model.
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: DeepSeek
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.0000005'
+  completion: '0.00000215'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+  - logprobs
+  - top_logprobs
+  - tools
+  - tool_choice
+  - seed
+  - structured_outputs
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1-distill-llama-70b.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1-distill-llama-70b.yaml
@@ -0,0 +1,55 @@
+id: deepseek/deepseek-r1-distill-llama-70b
+canonical_slug: deepseek/deepseek-r1-distill-llama-70b
+hugging_face_id: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+name: 'DeepSeek: R1 Distill Llama 70B'
+type: chat
+created: 1737663169
+description: |-
+  DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:
+
+  - AIME 2024 pass@1: 70.0
+  - MATH-500 pass@1: 94.5
+  - CodeForces Rating: 1633
+
+  The model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Llama3
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.0000001'
+  completion: '0.0000004'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - seed
+  - top_k
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - logit_bias
+  - logprobs
+  - top_logprobs
+  - min_p
+  - repetition_penalty
+  - tools
+  - tool_choice
+  - response_format
+  - structured_outputs
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1-distill-llama-8b.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1-distill-llama-8b.yaml
@@ -0,0 +1,42 @@
+id: deepseek/deepseek-r1-distill-llama-8b
+canonical_slug: deepseek/deepseek-r1-distill-llama-8b
+hugging_face_id: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+name: 'DeepSeek: R1 Distill Llama 8B'
+type: chat
+created: 1738937718
+description: "DeepSeek R1 Distill Llama 8B is a distilled large language model based on [Llama-3.1-8B-Instruct](/meta-llama/llama-3.1-8b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 50.4\n- MATH-500 pass@1: 89.1\n- CodeForces Rating: 1205\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.\n\nHugging Face: \n- [Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) \n- [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B)   |"
+context_length: 32000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Llama3
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.00000004'
+  completion: '0.00000004'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - seed
+  - top_k
+  - min_p
+  - repetition_penalty
+  - logit_bias
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1-distill-qwen-1.5b.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1-distill-qwen-1.5b.yaml
@@ -0,0 +1,51 @@
+id: deepseek/deepseek-r1-distill-qwen-1.5b
+canonical_slug: deepseek/deepseek-r1-distill-qwen-1.5b
+hugging_face_id: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+name: 'DeepSeek: R1 Distill Qwen 1.5B'
+type: chat
+created: 1738328067
+description: |-
+  DeepSeek R1 Distill Qwen 1.5B is a distilled large language model based on  [Qwen 2.5 Math 1.5B](https://huggingface.co/Qwen/Qwen2.5-Math-1.5B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It's a very small and efficient model which outperforms [GPT 4o 0513](/openai/gpt-4o-2024-05-13) on Math Benchmarks.
+
+  Other benchmark results include:
+
+  - AIME 2024 pass@1: 28.9
+  - AIME 2024 cons@64: 52.7
+  - MATH-500 pass@1: 83.9
+
+  The model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Other
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.00000018'
+  completion: '0.00000018'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - repetition_penalty
+  - logit_bias
+  - min_p
+  - response_format
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1-distill-qwen-14b.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1-distill-qwen-14b.yaml
@@ -0,0 +1,52 @@
+id: deepseek/deepseek-r1-distill-qwen-14b
+canonical_slug: deepseek/deepseek-r1-distill-qwen-14b
+hugging_face_id: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+name: 'DeepSeek: R1 Distill Qwen 14B'
+type: chat
+created: 1738193940
+description: |-
+  DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.
+
+  Other benchmark results include:
+
+  - AIME 2024 pass@1: 69.7
+  - MATH-500 pass@1: 93.9
+  - CodeForces Rating: 1481
+
+  The model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.
+context_length: 64000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Qwen
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.00000015'
+  completion: '0.00000015'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - seed
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - min_p
+  - repetition_penalty
+  - logit_bias
+  - response_format
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1-distill-qwen-32b.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1-distill-qwen-32b.yaml
@@ -0,0 +1,43 @@
+id: deepseek/deepseek-r1-distill-qwen-32b
+canonical_slug: deepseek/deepseek-r1-distill-qwen-32b
+hugging_face_id: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+name: 'DeepSeek: R1 Distill Qwen 32B'
+type: chat
+created: 1738194830
+description: 'DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI''s o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 72.6\n- MATH-500 pass@1: 94.3\n- CodeForces Rating: 1691\n\nThe model leverages fine-tuning from DeepSeek R1''s outputs, enabling competitive performance comparable to larger frontier models.'
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Qwen
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.00000012'
+  completion: '0.00000018'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - seed
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - top_k
+  - min_p
+  - repetition_penalty
+  - logit_bias
+  - response_format
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1-distill-qwen-7b.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1-distill-qwen-7b.yaml
@@ -0,0 +1,35 @@
+id: deepseek/deepseek-r1-distill-qwen-7b
+canonical_slug: deepseek/deepseek-r1-distill-qwen-7b
+hugging_face_id: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+name: 'DeepSeek: R1 Distill Qwen 7B'
+type: chat
+created: 1748628237
+description: DeepSeek-R1-Distill-Qwen-7B is a 7 billion parameter dense language model distilled from DeepSeek-R1, leveraging reinforcement learning-enhanced reasoning data generated by DeepSeek's larger models. The distillation process transfers advanced reasoning, math, and code capabilities into a smaller, more efficient model architecture based on Qwen2.5-Math-7B. This model demonstrates strong performance across mathematical benchmarks (92.8% pass@1 on MATH-500), coding tasks (Codeforces rating 1189), and general reasoning (49.1% pass@1 on GPQA Diamond), achieving competitive accuracy relative to larger models while maintaining smaller inference costs.
+context_length: 131072
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: Qwen
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.0000001'
+  completion: '0.0000002'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - seed
+model_provider: deepseek
--- a/resources/model-catalogs/deepseek/deepseek-r1.yaml
+++ b/resources/model-catalogs/deepseek/deepseek-r1.yaml
@@ -0,0 +1,53 @@
+id: deepseek/deepseek-r1
+canonical_slug: deepseek/deepseek-r1
+hugging_face_id: deepseek-ai/DeepSeek-R1
+name: 'DeepSeek: R1'
+type: chat
+created: 1737381095
+description: |-
+  DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.
+
+  Fully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).
+
+  MIT licensed: Distill & commercialize freely!
+context_length: 128000
+architecture:
+  modality: text->text
+  input_modalities:
+    - text
+  output_modalities:
+    - text
+  tokenizer: DeepSeek
+  instruct_type: deepseek-r1
+pricing:
+  prompt: '0.00000045'
+  completion: '0.00000215'
+  input_cache_read: ''
+  input_cache_write: ''
+  request: '0'
+  image: '0'
+  web_search: '0'
+  internal_reasoning: '0'
+  unit: 1
+  currency: USD
+supported_parameters:
+  - max_tokens
+  - temperature
+  - top_p
+  - reasoning
+  - include_reasoning
+  - stop
+  - frequency_penalty
+  - presence_penalty
+  - seed
+  - top_k
+  - min_p
+  - logit_bias
+  - top_logprobs
+  - response_format
+  - structured_outputs
+  - logprobs
+  - repetition_penalty
+  - tools
+  - tool_choice
+model_provider: deepseek
--- a/Show More
+++ b/Show More