feat(plugins): tighten media runtime integration

This commit is contained in:
Peter Steinberger
2026-03-16 21:13:38 -07:00
parent 45cb02b1dd
commit f4fa84aea7
8 changed files with 63 additions and 117 deletions

View File

@@ -8,10 +8,7 @@ const {
createAudioPlayerMock,
resolveAgentRouteMock,
agentCommandMock,
buildProviderRegistryMock,
createMediaAttachmentCacheMock,
normalizeMediaAttachmentsMock,
runCapabilityMock,
transcribeAudioFileMock,
} = vi.hoisted(() => {
type EventHandler = (...args: unknown[]) => unknown;
type MockConnection = {
@@ -68,14 +65,7 @@ const {
})),
resolveAgentRouteMock: vi.fn(() => ({ agentId: "agent-1", sessionKey: "discord:g1:c1" })),
agentCommandMock: vi.fn(async (_opts?: unknown, _runtime?: unknown) => ({ payloads: [] })),
buildProviderRegistryMock: vi.fn(() => ({})),
createMediaAttachmentCacheMock: vi.fn(() => ({
cleanup: vi.fn(async () => undefined),
})),
normalizeMediaAttachmentsMock: vi.fn(() => [{ kind: "audio", path: "/tmp/test.wav" }]),
runCapabilityMock: vi.fn(async () => ({
outputs: [{ kind: "audio.transcription", text: "hello from voice" }],
})),
transcribeAudioFileMock: vi.fn(async () => ({ text: "hello from voice" })),
};
});
@@ -103,11 +93,8 @@ vi.mock("../../../../src/commands/agent.js", () => ({
agentCommandFromIngress: agentCommandMock,
}));
vi.mock("../../../../src/media-understanding/runner.js", () => ({
buildProviderRegistry: buildProviderRegistryMock,
createMediaAttachmentCache: createMediaAttachmentCacheMock,
normalizeMediaAttachments: normalizeMediaAttachmentsMock,
runCapability: runCapabilityMock,
vi.mock("../../../../src/media-understanding/runtime.js", () => ({
transcribeAudioFile: transcribeAudioFileMock,
}));
let managerModule: typeof import("./manager.js");
@@ -149,15 +136,8 @@ describe("DiscordVoiceManager", () => {
resolveAgentRouteMock.mockClear();
agentCommandMock.mockReset();
agentCommandMock.mockResolvedValue({ payloads: [] });
buildProviderRegistryMock.mockReset();
buildProviderRegistryMock.mockReturnValue({});
createMediaAttachmentCacheMock.mockClear();
normalizeMediaAttachmentsMock.mockReset();
normalizeMediaAttachmentsMock.mockReturnValue([{ kind: "audio", path: "/tmp/test.wav" }]);
runCapabilityMock.mockReset();
runCapabilityMock.mockResolvedValue({
outputs: [{ kind: "audio.transcription", text: "hello from voice" }],
});
transcribeAudioFileMock.mockReset();
transcribeAudioFileMock.mockResolvedValue({ text: "hello from voice" });
});
const createManager = (

View File

@@ -17,7 +17,6 @@ import {
type VoiceConnection,
} from "@discordjs/voice";
import { resolveAgentDir } from "../../../../src/agents/agent-scope.js";
import type { MsgContext } from "../../../../src/auto-reply/templating.js";
import { agentCommandFromIngress } from "../../../../src/commands/agent.js";
import type { OpenClawConfig } from "../../../../src/config/config.js";
import { isDangerousNameMatchingEnabled } from "../../../../src/config/dangerous-name-matching.js";
@@ -26,12 +25,7 @@ import { logVerbose, shouldLogVerbose } from "../../../../src/globals.js";
import { formatErrorMessage } from "../../../../src/infra/errors.js";
import { resolvePreferredOpenClawTmpDir } from "../../../../src/infra/tmp-openclaw-dir.js";
import { createSubsystemLogger } from "../../../../src/logging/subsystem.js";
import {
buildProviderRegistry,
createMediaAttachmentCache,
normalizeMediaAttachments,
runCapability,
} from "../../../../src/media-understanding/runner.js";
import { transcribeAudioFile } from "../../../../src/media-understanding/runtime.js";
import { resolveAgentRoute } from "../../../../src/routing/resolve-route.js";
import type { RuntimeEnv } from "../../../../src/runtime.js";
import { parseTtsDirectives } from "../../../../src/tts/tts-core.js";
@@ -236,33 +230,13 @@ async function transcribeAudio(params: {
agentId: string;
filePath: string;
}): Promise<string | undefined> {
const ctx: MsgContext = {
MediaPath: params.filePath,
MediaType: "audio/wav",
};
const attachments = normalizeMediaAttachments(ctx);
if (attachments.length === 0) {
return undefined;
}
const cache = createMediaAttachmentCache(attachments);
const providerRegistry = buildProviderRegistry();
try {
const result = await runCapability({
capability: "audio",
cfg: params.cfg,
ctx,
attachments: cache,
media: attachments,
agentDir: resolveAgentDir(params.cfg, params.agentId),
providerRegistry,
config: params.cfg.tools?.media?.audio,
});
const output = result.outputs.find((entry) => entry.kind === "audio.transcription");
const text = output?.text?.trim();
return text || undefined;
} finally {
await cache.cleanup();
}
const result = await transcribeAudioFile({
cfg: params.cfg,
filePath: params.filePath,
mime: "audio/wav",
agentDir: resolveAgentDir(params.cfg, params.agentId),
});
return result.text?.trim() || undefined;
}
export class DiscordVoiceManager {

View File

@@ -9,6 +9,7 @@ import {
pathExists,
splitSetupEntries,
setSetupChannelEnabled,
type DmPolicy,
type OpenClawConfig,
} from "../../../src/plugin-sdk-internal/setup.js";
import type { ChannelSetupWizard } from "../../../src/plugin-sdk-internal/setup.js";

View File

@@ -41,7 +41,9 @@ export async function probeZaloAccount(params: {
export async function startZaloGatewayAccount(
ctx: Parameters<
NonNullable<import("openclaw/plugin-sdk/zalo").ChannelPlugin["gateway"]>["startAccount"]
NonNullable<
NonNullable<import("openclaw/plugin-sdk/zalo").ChannelPlugin["gateway"]>["startAccount"]
>
>[0],
) {
const account = ctx.account;

View File

@@ -47,26 +47,20 @@ type RegistrablePlugin = {
register: (api: ReturnType<typeof createCapturedPluginRegistration>["api"]) => void;
};
type ProviderContractEntry = {
type CapabilityContractEntry<T> = {
pluginId: string;
provider: ProviderPlugin;
provider: T;
};
type WebSearchProviderContractEntry = {
pluginId: string;
provider: WebSearchProviderPlugin;
type ProviderContractEntry = CapabilityContractEntry<ProviderPlugin>;
type WebSearchProviderContractEntry = CapabilityContractEntry<WebSearchProviderPlugin> & {
credentialValue: unknown;
};
type SpeechProviderContractEntry = {
pluginId: string;
provider: SpeechProviderPlugin;
};
type MediaUnderstandingProviderContractEntry = {
pluginId: string;
provider: MediaUnderstandingProviderPlugin;
};
type SpeechProviderContractEntry = CapabilityContractEntry<SpeechProviderPlugin>;
type MediaUnderstandingProviderContractEntry =
CapabilityContractEntry<MediaUnderstandingProviderPlugin>;
type PluginRegistrationContractEntry = {
pluginId: string;
@@ -138,15 +132,23 @@ function captureRegistrations(plugin: RegistrablePlugin) {
return captured;
}
export const providerContractRegistry: ProviderContractEntry[] = bundledProviderPlugins.flatMap(
(plugin) => {
function buildCapabilityContractRegistry<T>(params: {
plugins: RegistrablePlugin[];
select: (captured: ReturnType<typeof createCapturedPluginRegistration>) => T[];
}): CapabilityContractEntry<T>[] {
return params.plugins.flatMap((plugin) => {
const captured = captureRegistrations(plugin);
return captured.providers.map((provider) => ({
return params.select(captured).map((provider) => ({
pluginId: plugin.id,
provider,
}));
},
);
});
}
export const providerContractRegistry: ProviderContractEntry[] = buildCapabilityContractRegistry({
plugins: bundledProviderPlugins,
select: (captured) => captured.providers,
});
export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[] =
bundledWebSearchPlugins.flatMap((plugin) => {
@@ -159,21 +161,15 @@ export const webSearchProviderContractRegistry: WebSearchProviderContractEntry[]
});
export const speechProviderContractRegistry: SpeechProviderContractEntry[] =
bundledSpeechPlugins.flatMap((plugin) => {
const captured = captureRegistrations(plugin);
return captured.speechProviders.map((provider) => ({
pluginId: plugin.id,
provider,
}));
buildCapabilityContractRegistry({
plugins: bundledSpeechPlugins,
select: (captured) => captured.speechProviders,
});
export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProviderContractEntry[] =
bundledMediaUnderstandingPlugins.flatMap((plugin) => {
const captured = captureRegistrations(plugin);
return captured.mediaUnderstandingProviders.map((provider) => ({
pluginId: plugin.id,
provider,
}));
buildCapabilityContractRegistry({
plugins: bundledMediaUnderstandingPlugins,
select: (captured) => captured.mediaUnderstandingProviders,
});
const bundledPluginRegistrationList = [

View File

@@ -104,29 +104,20 @@ export type PluginProviderRegistration = {
rootDir?: string;
};
export type PluginWebSearchProviderRegistration = {
type PluginOwnedProviderRegistration<T extends { id: string }> = {
pluginId: string;
pluginName?: string;
provider: WebSearchProviderPlugin;
provider: T;
source: string;
rootDir?: string;
};
export type PluginSpeechProviderRegistration = {
pluginId: string;
pluginName?: string;
provider: SpeechProviderPlugin;
source: string;
rootDir?: string;
};
export type PluginMediaUnderstandingProviderRegistration = {
pluginId: string;
pluginName?: string;
provider: MediaUnderstandingProviderPlugin;
source: string;
rootDir?: string;
};
export type PluginSpeechProviderRegistration =
PluginOwnedProviderRegistration<SpeechProviderPlugin>;
export type PluginMediaUnderstandingProviderRegistration =
PluginOwnedProviderRegistration<MediaUnderstandingProviderPlugin>;
export type PluginWebSearchProviderRegistration =
PluginOwnedProviderRegistration<WebSearchProviderPlugin>;
export type PluginHookRegistration = {
pluginId: string;
@@ -576,13 +567,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
const registerUniqueProviderLike = <
T extends { id: string },
R extends {
pluginId: string;
pluginName?: string;
provider: T;
source: string;
rootDir?: string;
},
R extends PluginOwnedProviderRegistration<T>,
>(params: {
record: PluginRecord;
provider: T;

View File

@@ -55,6 +55,14 @@ describe("plugin runtime command execution", () => {
expect(runtime.events.onSessionTranscriptUpdate).toBe(onSessionTranscriptUpdate);
});
it("exposes runtime.mediaUnderstanding helpers and keeps stt as an alias", () => {
const runtime = createPluginRuntime();
expect(typeof runtime.mediaUnderstanding.runFile).toBe("function");
expect(typeof runtime.mediaUnderstanding.describeImageFile).toBe("function");
expect(typeof runtime.mediaUnderstanding.describeVideoFile).toBe("function");
expect(runtime.mediaUnderstanding.transcribeAudioFile).toBe(runtime.stt.transcribeAudioFile);
});
it("exposes runtime.system.requestHeartbeatNow", () => {
const runtime = createPluginRuntime();
expect(runtime.system.requestHeartbeatNow).toBe(requestHeartbeatNow);

View File

@@ -26,7 +26,7 @@ export default defineConfig({
pool: "forks",
maxWorkers: e2eWorkers,
silent: !verboseE2E,
include: ["test/**/*.e2e.test.ts", "src/**/*.e2e.test.ts"],
include: ["test/**/*.e2e.test.ts", "src/**/*.e2e.test.ts", "extensions/**/*.e2e.test.ts"],
exclude,
},
});