mirror of
https://github.com/clawdbot/clawdbot.git
synced 2026-03-17 17:33:45 +01:00
feat(plugins): tighten media runtime integration
This commit is contained in:
@@ -8,10 +8,7 @@ const {
|
||||
createAudioPlayerMock,
|
||||
resolveAgentRouteMock,
|
||||
agentCommandMock,
|
||||
buildProviderRegistryMock,
|
||||
createMediaAttachmentCacheMock,
|
||||
normalizeMediaAttachmentsMock,
|
||||
runCapabilityMock,
|
||||
transcribeAudioFileMock,
|
||||
} = vi.hoisted(() => {
|
||||
type EventHandler = (...args: unknown[]) => unknown;
|
||||
type MockConnection = {
|
||||
@@ -68,14 +65,7 @@ const {
|
||||
})),
|
||||
resolveAgentRouteMock: vi.fn(() => ({ agentId: "agent-1", sessionKey: "discord:g1:c1" })),
|
||||
agentCommandMock: vi.fn(async (_opts?: unknown, _runtime?: unknown) => ({ payloads: [] })),
|
||||
buildProviderRegistryMock: vi.fn(() => ({})),
|
||||
createMediaAttachmentCacheMock: vi.fn(() => ({
|
||||
cleanup: vi.fn(async () => undefined),
|
||||
})),
|
||||
normalizeMediaAttachmentsMock: vi.fn(() => [{ kind: "audio", path: "/tmp/test.wav" }]),
|
||||
runCapabilityMock: vi.fn(async () => ({
|
||||
outputs: [{ kind: "audio.transcription", text: "hello from voice" }],
|
||||
})),
|
||||
transcribeAudioFileMock: vi.fn(async () => ({ text: "hello from voice" })),
|
||||
};
|
||||
});
|
||||
|
||||
@@ -103,11 +93,8 @@ vi.mock("../../../../src/commands/agent.js", () => ({
|
||||
agentCommandFromIngress: agentCommandMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../../../src/media-understanding/runner.js", () => ({
|
||||
buildProviderRegistry: buildProviderRegistryMock,
|
||||
createMediaAttachmentCache: createMediaAttachmentCacheMock,
|
||||
normalizeMediaAttachments: normalizeMediaAttachmentsMock,
|
||||
runCapability: runCapabilityMock,
|
||||
vi.mock("../../../../src/media-understanding/runtime.js", () => ({
|
||||
transcribeAudioFile: transcribeAudioFileMock,
|
||||
}));
|
||||
|
||||
let managerModule: typeof import("./manager.js");
|
||||
@@ -149,15 +136,8 @@ describe("DiscordVoiceManager", () => {
|
||||
resolveAgentRouteMock.mockClear();
|
||||
agentCommandMock.mockReset();
|
||||
agentCommandMock.mockResolvedValue({ payloads: [] });
|
||||
buildProviderRegistryMock.mockReset();
|
||||
buildProviderRegistryMock.mockReturnValue({});
|
||||
createMediaAttachmentCacheMock.mockClear();
|
||||
normalizeMediaAttachmentsMock.mockReset();
|
||||
normalizeMediaAttachmentsMock.mockReturnValue([{ kind: "audio", path: "/tmp/test.wav" }]);
|
||||
runCapabilityMock.mockReset();
|
||||
runCapabilityMock.mockResolvedValue({
|
||||
outputs: [{ kind: "audio.transcription", text: "hello from voice" }],
|
||||
});
|
||||
transcribeAudioFileMock.mockReset();
|
||||
transcribeAudioFileMock.mockResolvedValue({ text: "hello from voice" });
|
||||
});
|
||||
|
||||
const createManager = (
|
||||
|
||||
@@ -17,7 +17,6 @@ import {
|
||||
type VoiceConnection,
|
||||
} from "@discordjs/voice";
|
||||
import { resolveAgentDir } from "../../../../src/agents/agent-scope.js";
|
||||
import type { MsgContext } from "../../../../src/auto-reply/templating.js";
|
||||
import { agentCommandFromIngress } from "../../../../src/commands/agent.js";
|
||||
import type { OpenClawConfig } from "../../../../src/config/config.js";
|
||||
import { isDangerousNameMatchingEnabled } from "../../../../src/config/dangerous-name-matching.js";
|
||||
@@ -26,12 +25,7 @@ import { logVerbose, shouldLogVerbose } from "../../../../src/globals.js";
|
||||
import { formatErrorMessage } from "../../../../src/infra/errors.js";
|
||||
import { resolvePreferredOpenClawTmpDir } from "../../../../src/infra/tmp-openclaw-dir.js";
|
||||
import { createSubsystemLogger } from "../../../../src/logging/subsystem.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
runCapability,
|
||||
} from "../../../../src/media-understanding/runner.js";
|
||||
import { transcribeAudioFile } from "../../../../src/media-understanding/runtime.js";
|
||||
import { resolveAgentRoute } from "../../../../src/routing/resolve-route.js";
|
||||
import type { RuntimeEnv } from "../../../../src/runtime.js";
|
||||
import { parseTtsDirectives } from "../../../../src/tts/tts-core.js";
|
||||
@@ -236,33 +230,13 @@ async function transcribeAudio(params: {
|
||||
agentId: string;
|
||||
filePath: string;
|
||||
}): Promise<string | undefined> {
|
||||
const ctx: MsgContext = {
|
||||
MediaPath: params.filePath,
|
||||
MediaType: "audio/wav",
|
||||
};
|
||||
const attachments = normalizeMediaAttachments(ctx);
|
||||
if (attachments.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
const cache = createMediaAttachmentCache(attachments);
|
||||
const providerRegistry = buildProviderRegistry();
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg: params.cfg,
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media: attachments,
|
||||
agentDir: resolveAgentDir(params.cfg, params.agentId),
|
||||
providerRegistry,
|
||||
config: params.cfg.tools?.media?.audio,
|
||||
});
|
||||
const output = result.outputs.find((entry) => entry.kind === "audio.transcription");
|
||||
const text = output?.text?.trim();
|
||||
return text || undefined;
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
}
|
||||
const result = await transcribeAudioFile({
|
||||
cfg: params.cfg,
|
||||
filePath: params.filePath,
|
||||
mime: "audio/wav",
|
||||
agentDir: resolveAgentDir(params.cfg, params.agentId),
|
||||
});
|
||||
return result.text?.trim() || undefined;
|
||||
}
|
||||
|
||||
export class DiscordVoiceManager {
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
pathExists,
|
||||
splitSetupEntries,
|
||||
setSetupChannelEnabled,
|
||||
type DmPolicy,
|
||||
type OpenClawConfig,
|
||||
} from "../../../src/plugin-sdk-internal/setup.js";
|
||||
import type { ChannelSetupWizard } from "../../../src/plugin-sdk-internal/setup.js";
|
||||
|
||||
@@ -41,7 +41,9 @@ export async function probeZaloAccount(params: {
|
||||
|
||||
export async function startZaloGatewayAccount(
|
||||
ctx: Parameters<
|
||||
NonNullable<import("openclaw/plugin-sdk/zalo").ChannelPlugin["gateway"]>["startAccount"]
|
||||
NonNullable<
|
||||
NonNullable<import("openclaw/plugin-sdk/zalo").ChannelPlugin["gateway"]>["startAccount"]
|
||||
>
|
||||
>[0],
|
||||
) {
|
||||
const account = ctx.account;
|
||||
|
||||
@@ -47,26 +47,20 @@ type RegistrablePlugin = {
|
||||
register: (api: ReturnType<typeof createCapturedPluginRegistration>["api"]) => void;
|
||||
};
|
||||
|
||||
type ProviderContractEntry = {
|
||||
type CapabilityContractEntry<T> = {
|
||||
pluginId: string;
|
||||
provider: ProviderPlugin;
|
||||
provider: T;
|
||||
};
|
||||
|
||||
type WebSearchProviderContractEntry = {
|
||||
pluginId: string;
|
||||
provider: WebSearchProviderPlugin;
|
||||
type ProviderContractEntry = CapabilityContractEntry<ProviderPlugin>;
|
||||
|
||||
type WebSearchProviderContractEntry = CapabilityContractEntry<WebSearchProviderPlugin> & {
|
||||
credentialValue: unknown;
|
||||
};
|
||||
|
||||
type SpeechProviderContractEntry = {
|
||||
pluginId: string;
|
||||
provider: SpeechProviderPlugin;
|
||||
};
|
||||
|
||||
type MediaUnderstandingProviderContractEntry = {
|
||||
pluginId: string;
|
||||
provider: MediaUnderstandingProviderPlugin;
|
||||
};
|
||||
type SpeechProviderContractEntry = CapabilityContractEntry<SpeechProviderPlugin>;
|
||||
type MediaUnderstandingProviderContractEntry =
|
||||
CapabilityContractEntry<MediaUnderstandingProviderPlugin>;
|
||||
|
||||
type PluginRegistrationContractEntry = {
|
||||
pluginId: string;
|
||||
@@ -138,15 +132,23 @@ function captureRegistrations(plugin: RegistrablePlugin) {
|
||||
return captured;
|
||||
}
|
||||
|
||||
export const providerContractRegistry: ProviderContractEntry[] = bundledProviderPlugins.flatMap(
|
||||
(plugin) => {
|
||||
function buildCapabilityContractRegistry<T>(params: {
|
||||
plugins: RegistrablePlugin[];
|
||||
select: (captured: ReturnType<typeof createCapturedPluginRegistration>) => T[];
|
||||
}): CapabilityContractEntry<T>[] {
|
||||
return params.plugins.flatMap((plugin) => {
|
||||
const captured = captureRegistrations(plugin);
|
||||
return captured.providers.map((provider) => ({
|
||||
return params.select(captured).map((provider) => ({
|
||||
pluginId: plugin.id,
|
||||
provider,
|
||||
}));
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export const providerContractRegistry: ProviderContractEntry[] = buildCapabilityContractRegistry({
|
||||
plugins: bundledProviderPlugins,
|
||||
select: (captured) => captured.providers,
|
||||
});
|
||||
|
||||
export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[] =
|
||||
bundledWebSearchPlugins.flatMap((plugin) => {
|
||||
@@ -159,21 +161,15 @@ export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[]
|
||||
});
|
||||
|
||||
export const speechProviderContractRegistry: SpeechProviderContractEntry[] =
|
||||
bundledSpeechPlugins.flatMap((plugin) => {
|
||||
const captured = captureRegistrations(plugin);
|
||||
return captured.speechProviders.map((provider) => ({
|
||||
pluginId: plugin.id,
|
||||
provider,
|
||||
}));
|
||||
buildCapabilityContractRegistry({
|
||||
plugins: bundledSpeechPlugins,
|
||||
select: (captured) => captured.speechProviders,
|
||||
});
|
||||
|
||||
export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProviderContractEntry[] =
|
||||
bundledMediaUnderstandingPlugins.flatMap((plugin) => {
|
||||
const captured = captureRegistrations(plugin);
|
||||
return captured.mediaUnderstandingProviders.map((provider) => ({
|
||||
pluginId: plugin.id,
|
||||
provider,
|
||||
}));
|
||||
buildCapabilityContractRegistry({
|
||||
plugins: bundledMediaUnderstandingPlugins,
|
||||
select: (captured) => captured.mediaUnderstandingProviders,
|
||||
});
|
||||
|
||||
const bundledPluginRegistrationList = [
|
||||
|
||||
@@ -104,29 +104,20 @@ export type PluginProviderRegistration = {
|
||||
rootDir?: string;
|
||||
};
|
||||
|
||||
export type PluginWebSearchProviderRegistration = {
|
||||
type PluginOwnedProviderRegistration<T extends { id: string }> = {
|
||||
pluginId: string;
|
||||
pluginName?: string;
|
||||
provider: WebSearchProviderPlugin;
|
||||
provider: T;
|
||||
source: string;
|
||||
rootDir?: string;
|
||||
};
|
||||
|
||||
export type PluginSpeechProviderRegistration = {
|
||||
pluginId: string;
|
||||
pluginName?: string;
|
||||
provider: SpeechProviderPlugin;
|
||||
source: string;
|
||||
rootDir?: string;
|
||||
};
|
||||
|
||||
export type PluginMediaUnderstandingProviderRegistration = {
|
||||
pluginId: string;
|
||||
pluginName?: string;
|
||||
provider: MediaUnderstandingProviderPlugin;
|
||||
source: string;
|
||||
rootDir?: string;
|
||||
};
|
||||
export type PluginSpeechProviderRegistration =
|
||||
PluginOwnedProviderRegistration<SpeechProviderPlugin>;
|
||||
export type PluginMediaUnderstandingProviderRegistration =
|
||||
PluginOwnedProviderRegistration<MediaUnderstandingProviderPlugin>;
|
||||
export type PluginWebSearchProviderRegistration =
|
||||
PluginOwnedProviderRegistration<WebSearchProviderPlugin>;
|
||||
|
||||
export type PluginHookRegistration = {
|
||||
pluginId: string;
|
||||
@@ -576,13 +567,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
|
||||
|
||||
const registerUniqueProviderLike = <
|
||||
T extends { id: string },
|
||||
R extends {
|
||||
pluginId: string;
|
||||
pluginName?: string;
|
||||
provider: T;
|
||||
source: string;
|
||||
rootDir?: string;
|
||||
},
|
||||
R extends PluginOwnedProviderRegistration<T>,
|
||||
>(params: {
|
||||
record: PluginRecord;
|
||||
provider: T;
|
||||
|
||||
@@ -55,6 +55,14 @@ describe("plugin runtime command execution", () => {
|
||||
expect(runtime.events.onSessionTranscriptUpdate).toBe(onSessionTranscriptUpdate);
|
||||
});
|
||||
|
||||
it("exposes runtime.mediaUnderstanding helpers and keeps stt as an alias", () => {
|
||||
const runtime = createPluginRuntime();
|
||||
expect(typeof runtime.mediaUnderstanding.runFile).toBe("function");
|
||||
expect(typeof runtime.mediaUnderstanding.describeImageFile).toBe("function");
|
||||
expect(typeof runtime.mediaUnderstanding.describeVideoFile).toBe("function");
|
||||
expect(runtime.mediaUnderstanding.transcribeAudioFile).toBe(runtime.stt.transcribeAudioFile);
|
||||
});
|
||||
|
||||
it("exposes runtime.system.requestHeartbeatNow", () => {
|
||||
const runtime = createPluginRuntime();
|
||||
expect(runtime.system.requestHeartbeatNow).toBe(requestHeartbeatNow);
|
||||
|
||||
@@ -26,7 +26,7 @@ export default defineConfig({
|
||||
pool: "forks",
|
||||
maxWorkers: e2eWorkers,
|
||||
silent: !verboseE2E,
|
||||
include: ["test/**/*.e2e.test.ts", "src/**/*.e2e.test.ts"],
|
||||
include: ["test/**/*.e2e.test.ts", "src/**/*.e2e.test.ts", "extensions/**/*.e2e.test.ts"],
|
||||
exclude,
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user