Co-authored-by: Dt8333 <25431943+Dt8333@users.noreply.github.com> Co-authored-by: Soulter <905617992@qq.com>
121 lines
3.9 KiB
Python
121 lines
3.9 KiB
Python
import uuid
|
|
from datetime import datetime, timezone
|
|
|
|
from sqlmodel import Field, MetaData, SQLModel, Text, UniqueConstraint
|
|
|
|
|
|
class BaseKBModel(SQLModel, table=False):
|
|
metadata = MetaData()
|
|
|
|
|
|
class KnowledgeBase(BaseKBModel, table=True):
|
|
"""知识库表
|
|
|
|
存储知识库的基本信息和统计数据。
|
|
"""
|
|
|
|
__tablename__ = "knowledge_bases" # type: ignore
|
|
|
|
id: int | None = Field(
|
|
primary_key=True,
|
|
sa_column_kwargs={"autoincrement": True},
|
|
default=None,
|
|
)
|
|
kb_id: str = Field(
|
|
max_length=36,
|
|
nullable=False,
|
|
unique=True,
|
|
default_factory=lambda: str(uuid.uuid4()),
|
|
index=True,
|
|
)
|
|
kb_name: str = Field(max_length=100, nullable=False)
|
|
description: str | None = Field(default=None, sa_type=Text)
|
|
emoji: str | None = Field(default="📚", max_length=10)
|
|
embedding_provider_id: str | None = Field(default=None, max_length=100)
|
|
rerank_provider_id: str | None = Field(default=None, max_length=100)
|
|
# 分块配置参数
|
|
chunk_size: int | None = Field(default=512, nullable=True)
|
|
chunk_overlap: int | None = Field(default=50, nullable=True)
|
|
# 检索配置参数
|
|
top_k_dense: int | None = Field(default=50, nullable=True)
|
|
top_k_sparse: int | None = Field(default=50, nullable=True)
|
|
top_m_final: int | None = Field(default=5, nullable=True)
|
|
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
updated_at: datetime = Field(
|
|
default_factory=lambda: datetime.now(timezone.utc),
|
|
sa_column_kwargs={"onupdate": datetime.now(timezone.utc)},
|
|
)
|
|
doc_count: int = Field(default=0, nullable=False)
|
|
chunk_count: int = Field(default=0, nullable=False)
|
|
|
|
__table_args__ = (
|
|
UniqueConstraint(
|
|
"kb_name",
|
|
name="uix_kb_name",
|
|
),
|
|
)
|
|
|
|
|
|
class KBDocument(BaseKBModel, table=True):
|
|
"""文档表
|
|
|
|
存储上传到知识库的文档元数据。
|
|
"""
|
|
|
|
__tablename__ = "kb_documents" # type: ignore
|
|
|
|
id: int | None = Field(
|
|
primary_key=True,
|
|
sa_column_kwargs={"autoincrement": True},
|
|
default=None,
|
|
)
|
|
doc_id: str = Field(
|
|
max_length=36,
|
|
nullable=False,
|
|
unique=True,
|
|
default_factory=lambda: str(uuid.uuid4()),
|
|
index=True,
|
|
)
|
|
kb_id: str = Field(max_length=36, nullable=False, index=True)
|
|
doc_name: str = Field(max_length=255, nullable=False)
|
|
file_type: str = Field(max_length=20, nullable=False)
|
|
file_size: int = Field(nullable=False)
|
|
file_path: str = Field(max_length=512, nullable=False)
|
|
chunk_count: int = Field(default=0, nullable=False)
|
|
media_count: int = Field(default=0, nullable=False)
|
|
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
updated_at: datetime = Field(
|
|
default_factory=lambda: datetime.now(timezone.utc),
|
|
sa_column_kwargs={"onupdate": datetime.now(timezone.utc)},
|
|
)
|
|
|
|
|
|
class KBMedia(BaseKBModel, table=True):
|
|
"""多媒体资源表
|
|
|
|
存储从文档中提取的图片、视频等多媒体资源。
|
|
"""
|
|
|
|
__tablename__ = "kb_media" # type: ignore
|
|
|
|
id: int | None = Field(
|
|
primary_key=True,
|
|
sa_column_kwargs={"autoincrement": True},
|
|
default=None,
|
|
)
|
|
media_id: str = Field(
|
|
max_length=36,
|
|
nullable=False,
|
|
unique=True,
|
|
default_factory=lambda: str(uuid.uuid4()),
|
|
index=True,
|
|
)
|
|
doc_id: str = Field(max_length=36, nullable=False, index=True)
|
|
kb_id: str = Field(max_length=36, nullable=False, index=True)
|
|
media_type: str = Field(max_length=20, nullable=False)
|
|
file_name: str = Field(max_length=255, nullable=False)
|
|
file_path: str = Field(max_length=512, nullable=False)
|
|
file_size: int = Field(nullable=False)
|
|
mime_type: str = Field(max_length=100, nullable=False)
|
|
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|