diff --git a/CHANGELOG.md b/CHANGELOG.md index d32a9f7a3..bb52cc8e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Status: beta. - Branding: update launchd labels, mobile bundle IDs, and logging subsystems to bot.molt (legacy com.clawdbot migrations). Thanks @thewilloftheshadow. - Tools: add per-sender group tool policies and fix precedence. (#1757) Thanks @adam91holt. - Agents: summarize dropped messages during compaction safeguard pruning. (#2509) Thanks @jogi47. +- Memory Search: allow extra paths for memory indexing (ignores symlinks). (#3600) Thanks @kira-ariaki. - Skills: add multi-image input support to Nano Banana Pro skill. (#1958) Thanks @tyler6204. - Agents: honor tools.exec.safeBins in exec allowlist checks. (#2281) - Matrix: switch plugin SDK to @vector-im/matrix-bot-sdk. diff --git a/docs/cli/memory.md b/docs/cli/memory.md index 3dc79932f..513b7ef07 100644 --- a/docs/cli/memory.md +++ b/docs/cli/memory.md @@ -39,3 +39,4 @@ Notes: - `memory status --deep` probes vector + embedding availability. - `memory status --deep --index` runs a reindex if the store is dirty. - `memory index --verbose` prints per-phase details (provider, model, sources, batch activity). +- `memory status` includes any extra paths configured via `memorySearch.extraPaths`. diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index 8a386aba9..f2bca461a 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -75,8 +75,9 @@ For the full compaction lifecycle, see ## Vector memory search -Moltbot can build a small vector index over `MEMORY.md` and `memory/*.md` so -semantic queries can find related notes even when wording differs. +Moltbot can build a small vector index over `MEMORY.md` and `memory/*.md` (plus +any extra directories or files you opt in) so semantic queries can find related +notes even when wording differs. Defaults: - Enabled by default. @@ -96,6 +97,27 @@ embeddings for memory search. For Gemini, use `GEMINI_API_KEY` or `models.providers.google.apiKey`. When using a custom OpenAI-compatible endpoint, set `memorySearch.remote.apiKey` (and optional `memorySearch.remote.headers`). +### Additional memory paths + +If you want to index Markdown files outside the default workspace layout, add +explicit paths: + +```json5 +agents: { + defaults: { + memorySearch: { + extraPaths: ["../team-docs", "/srv/shared-notes/overview.md"] + } + } +} +``` + +Notes: +- Paths can be absolute or workspace-relative. +- Directories are scanned recursively for `.md` files. +- Only Markdown files are indexed. +- Symlinks are ignored (files or directories). + ### Gemini embeddings (native) Set the provider to `gemini` to use the Gemini embeddings API directly: @@ -189,14 +211,14 @@ Local mode: ### How the memory tools work - `memory_search` semantically searches Markdown chunks (~400 token target, 80-token overlap) from `MEMORY.md` + `memory/**/*.md`. It returns snippet text (capped ~700 chars), file path, line range, score, provider/model, and whether we fell back from local → remote embeddings. No full file payload is returned. -- `memory_get` reads a specific memory Markdown file (workspace-relative), optionally from a starting line and for N lines. Paths outside `MEMORY.md` / `memory/` are rejected. +- `memory_get` reads a specific memory Markdown file (workspace-relative), optionally from a starting line and for N lines. Paths outside `MEMORY.md` / `memory/` are allowed only when explicitly listed in `memorySearch.extraPaths`. - Both tools are enabled only when `memorySearch.enabled` resolves true for the agent. ### What gets indexed (and when) -- File type: Markdown only (`MEMORY.md`, `memory/**/*.md`). +- File type: Markdown only (`MEMORY.md`, `memory/**/*.md`, plus any `.md` files under `memorySearch.extraPaths`). - Index storage: per-agent SQLite at `~/.clawdbot/memory/.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token). -- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync is scheduled on session start, on search, or on an interval and runs asynchronously. Session transcripts use delta thresholds to trigger background sync. +- Freshness: watcher on `MEMORY.md`, `memory/`, and `memorySearch.extraPaths` marks the index dirty (debounce 1.5s). Sync is scheduled on session start, on search, or on an interval and runs asynchronously. Session transcripts use delta thresholds to trigger background sync. - Reindex triggers: the index stores the embedding **provider/model + endpoint fingerprint + chunking params**. If any of those change, Moltbot automatically resets and reindexes the entire store. ### Hybrid search (BM25 + vector) diff --git a/docs/gateway/configuration-examples.md b/docs/gateway/configuration-examples.md index 11ac14337..470689673 100644 --- a/docs/gateway/configuration-examples.md +++ b/docs/gateway/configuration-examples.md @@ -267,7 +267,8 @@ Save to `~/.clawdbot/moltbot.json` and you can DM the bot from that number. model: "gemini-embedding-001", remote: { apiKey: "${GEMINI_API_KEY}" - } + }, + extraPaths: ["../team-docs", "/srv/shared-notes"] }, sandbox: { mode: "non-main", diff --git a/src/agents/memory-search.test.ts b/src/agents/memory-search.test.ts index e6b86ea3d..c3165815f 100644 --- a/src/agents/memory-search.test.ts +++ b/src/agents/memory-search.test.ts @@ -82,6 +82,29 @@ describe("memory search config", () => { expect(resolved?.store.vector.extensionPath).toBe("/opt/sqlite-vec.dylib"); }); + it("merges extra memory paths from defaults and overrides", () => { + const cfg = { + agents: { + defaults: { + memorySearch: { + extraPaths: ["/shared/notes", " docs "], + }, + }, + list: [ + { + id: "main", + default: true, + memorySearch: { + extraPaths: ["/shared/notes", "../team-notes"], + }, + }, + ], + }, + }; + const resolved = resolveMemorySearchConfig(cfg, "main"); + expect(resolved?.extraPaths).toEqual(["/shared/notes", "docs", "../team-notes"]); + }); + it("includes batch defaults for openai without remote overrides", () => { const cfg = { agents: { diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts index d9838e15e..25aeb7cac 100644 --- a/src/agents/memory-search.ts +++ b/src/agents/memory-search.ts @@ -9,7 +9,7 @@ import { resolveAgentConfig } from "./agent-scope.js"; export type ResolvedMemorySearchConfig = { enabled: boolean; sources: Array<"memory" | "sessions">; - paths: string[]; + extraPaths: string[]; provider: "openai" | "local" | "gemini" | "auto"; remote?: { baseUrl?: string; @@ -163,9 +163,10 @@ function mergeConfig( modelCacheDir: overrides?.local?.modelCacheDir ?? defaults?.local?.modelCacheDir, }; const sources = normalizeSources(overrides?.sources ?? defaults?.sources, sessionMemory); - // Merge paths from defaults and overrides (both arrays combined, deduped) - const pathsSet = new Set([...(defaults?.paths ?? []), ...(overrides?.paths ?? [])]); - const paths = Array.from(pathsSet); + const rawPaths = [...(defaults?.extraPaths ?? []), ...(overrides?.extraPaths ?? [])] + .map((value) => value.trim()) + .filter(Boolean); + const extraPaths = Array.from(new Set(rawPaths)); const vector = { enabled: overrides?.store?.vector?.enabled ?? defaults?.store?.vector?.enabled ?? true, extensionPath: @@ -240,7 +241,7 @@ function mergeConfig( return { enabled, sources, - paths, + extraPaths, provider, remote, experimental: { diff --git a/src/agents/tools/memory-tool.ts b/src/agents/tools/memory-tool.ts index b7b619af3..274af4c02 100644 --- a/src/agents/tools/memory-tool.ts +++ b/src/agents/tools/memory-tool.ts @@ -83,7 +83,7 @@ export function createMemoryGetTool(options: { label: "Memory Get", name: "memory_get", description: - "Safe snippet read from MEMORY.md or memory/*.md with optional from/lines; use after memory_search to pull only the needed lines and keep context small.", + "Safe snippet read from MEMORY.md, memory/*.md, or configured memorySearch.extraPaths with optional from/lines; use after memory_search to pull only the needed lines and keep context small.", parameters: MemoryGetSchema, execute: async (_toolCallId, params) => { const relPath = readStringParam(params, "path", { required: true }); diff --git a/src/cli/memory-cli.ts b/src/cli/memory-cli.ts index 68894adf5..b72267a2a 100644 --- a/src/cli/memory-cli.ts +++ b/src/cli/memory-cli.ts @@ -12,7 +12,7 @@ import { setVerbose } from "../globals.js"; import { withProgress, withProgressTotals } from "./progress.js"; import { formatErrorMessage, withManager } from "./cli-utils.js"; import { getMemorySearchManager, type MemorySearchManagerResult } from "../memory/index.js"; -import { listMemoryFiles } from "../memory/internal.js"; +import { listMemoryFiles, normalizeExtraMemoryPaths } from "../memory/internal.js"; import { defaultRuntime } from "../runtime.js"; import { formatDocsLink } from "../terminal/links.js"; import { colorize, isRich, theme } from "../terminal/theme.js"; @@ -74,6 +74,10 @@ function resolveAgentIds(cfg: ReturnType, agent?: string): st return [resolveDefaultAgentId(cfg)]; } +function formatExtraPaths(workspaceDir: string, extraPaths: string[]): string[] { + return normalizeExtraMemoryPaths(workspaceDir, extraPaths).map((entry) => shortenHomePath(entry)); +} + async function checkReadableFile(pathname: string): Promise<{ exists: boolean; issue?: string }> { try { await fs.access(pathname, fsSync.constants.R_OK); @@ -110,7 +114,10 @@ async function scanSessionFiles(agentId: string): Promise { } } -async function scanMemoryFiles(workspaceDir: string): Promise { +async function scanMemoryFiles( + workspaceDir: string, + extraPaths: string[] = [], +): Promise { const issues: string[] = []; const memoryFile = path.join(workspaceDir, "MEMORY.md"); const altMemoryFile = path.join(workspaceDir, "memory.md"); @@ -121,6 +128,25 @@ async function scanMemoryFiles(workspaceDir: string): Promise { if (primary.issue) issues.push(primary.issue); if (alt.issue) issues.push(alt.issue); + const resolvedExtraPaths = normalizeExtraMemoryPaths(workspaceDir, extraPaths); + for (const extraPath of resolvedExtraPaths) { + try { + const stat = await fs.lstat(extraPath); + if (stat.isSymbolicLink()) continue; + const extraCheck = await checkReadableFile(extraPath); + if (extraCheck.issue) issues.push(extraCheck.issue); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code === "ENOENT") { + issues.push(`additional memory path missing (${shortenHomePath(extraPath)})`); + } else { + issues.push( + `additional memory path not accessible (${shortenHomePath(extraPath)}): ${code ?? "error"}`, + ); + } + } + } + let dirReadable: boolean | null = null; try { await fs.access(memoryDir, fsSync.constants.R_OK); @@ -141,7 +167,7 @@ async function scanMemoryFiles(workspaceDir: string): Promise { let listed: string[] = []; let listedOk = false; try { - listed = await listMemoryFiles(workspaceDir); + listed = await listMemoryFiles(workspaceDir, resolvedExtraPaths); listedOk = true; } catch (err) { const code = (err as NodeJS.ErrnoException).code; @@ -176,11 +202,13 @@ async function scanMemorySources(params: { workspaceDir: string; agentId: string; sources: MemorySourceName[]; + extraPaths?: string[]; }): Promise { const scans: SourceScan[] = []; + const extraPaths = params.extraPaths ?? []; for (const source of params.sources) { if (source === "memory") { - scans.push(await scanMemoryFiles(params.workspaceDir)); + scans.push(await scanMemoryFiles(params.workspaceDir, extraPaths)); } if (source === "sessions") { scans.push(await scanSessionFiles(params.agentId)); @@ -268,6 +296,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) { workspaceDir: status.workspaceDir, agentId, sources, + extraPaths: status.extraPaths, }); allResults.push({ agentId, status, embeddingProbe, indexError, scan }); }, @@ -299,6 +328,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) { const line = indexError ? `Memory index failed: ${indexError}` : "Memory index complete."; defaultRuntime.log(line); } + const extraPaths = formatExtraPaths(status.workspaceDir, status.extraPaths ?? []); const lines = [ `${heading("Memory Search")} ${muted(`(${agentId})`)}`, `${label("Provider")} ${info(status.provider)} ${muted( @@ -306,6 +336,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) { )}`, `${label("Model")} ${info(status.model)}`, status.sources?.length ? `${label("Sources")} ${info(status.sources.join(", "))}` : null, + extraPaths.length ? `${label("Extra paths")} ${info(extraPaths.join(", "))}` : null, `${label("Indexed")} ${success(indexedLabel)}`, `${label("Dirty")} ${status.dirty ? warn("yes") : muted("no")}`, `${label("Store")} ${info(shortenHomePath(status.dbPath))}`, @@ -469,6 +500,7 @@ export function registerMemoryCli(program: Command) { const sourceLabels = status.sources.map((source) => formatSourceLabel(source, status.workspaceDir, agentId), ); + const extraPaths = formatExtraPaths(status.workspaceDir, status.extraPaths ?? []); const lines = [ `${heading("Memory Index")} ${muted(`(${agentId})`)}`, `${label("Provider")} ${info(status.provider)} ${muted( @@ -478,6 +510,9 @@ export function registerMemoryCli(program: Command) { sourceLabels.length ? `${label("Sources")} ${info(sourceLabels.join(", "))}` : null, + extraPaths.length + ? `${label("Extra paths")} ${info(extraPaths.join(", "))}` + : null, ].filter(Boolean) as string[]; if (status.fallback) { lines.push(`${label("Fallback")} ${warn(status.fallback.from)}`); diff --git a/src/config/schema.ts b/src/config/schema.ts index 3be0b8320..28c994f3d 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -222,7 +222,7 @@ const FIELD_LABELS: Record = { "agents.defaults.memorySearch": "Memory Search", "agents.defaults.memorySearch.enabled": "Enable Memory Search", "agents.defaults.memorySearch.sources": "Memory Search Sources", - "agents.defaults.memorySearch.paths": "Additional Memory Paths", + "agents.defaults.memorySearch.extraPaths": "Extra Memory Paths", "agents.defaults.memorySearch.experimental.sessionMemory": "Memory Search Session Index (Experimental)", "agents.defaults.memorySearch.provider": "Memory Search Provider", @@ -500,8 +500,8 @@ const FIELD_HELP: Record = { "Vector search over MEMORY.md and memory/*.md (per-agent overrides supported).", "agents.defaults.memorySearch.sources": 'Sources to index for memory search (default: ["memory"]; add "sessions" to include session transcripts).', - "agents.defaults.memorySearch.paths": - "Additional paths to include in memory search (directories or .md files; relative paths resolved from workspace).", + "agents.defaults.memorySearch.extraPaths": + "Extra paths to include in memory search (directories or .md files; relative paths resolved from workspace).", "agents.defaults.memorySearch.experimental.sessionMemory": "Enable experimental session transcript indexing for memory search (default: false).", "agents.defaults.memorySearch.provider": 'Embedding provider ("openai", "gemini", or "local").', diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index ab5e15dcb..db32cb59d 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -226,8 +226,8 @@ export type MemorySearchConfig = { enabled?: boolean; /** Sources to index and search (default: ["memory"]). */ sources?: Array<"memory" | "sessions">; - /** Additional paths to include in memory search (directories or .md files). */ - paths?: string[]; + /** Extra paths to include in memory search (directories or .md files). */ + extraPaths?: string[]; /** Experimental memory search settings. */ experimental?: { /** Enable session transcript indexing (experimental, default: false). */ diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index d3dc056e4..7e95c3538 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -304,7 +304,7 @@ export const MemorySearchSchema = z .object({ enabled: z.boolean().optional(), sources: z.array(z.union([z.literal("memory"), z.literal("sessions")])).optional(), - paths: z.array(z.string()).optional(), + extraPaths: z.array(z.string()).optional(), experimental: z .object({ sessionMemory: z.boolean().optional(), diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts index 58a98e580..cccd1fa49 100644 --- a/src/memory/index.test.ts +++ b/src/memory/index.test.ts @@ -412,4 +412,52 @@ describe("memory index", () => { manager = result.manager; await expect(result.manager.readFile({ relPath: "NOTES.md" })).rejects.toThrow("path required"); }); + + it("allows reading from additional memory paths and blocks symlinks", async () => { + const extraDir = path.join(workspaceDir, "extra"); + await fs.mkdir(extraDir, { recursive: true }); + await fs.writeFile(path.join(extraDir, "extra.md"), "Extra content."); + + const cfg = { + agents: { + defaults: { + workspace: workspaceDir, + memorySearch: { + provider: "openai", + model: "mock-embed", + store: { path: indexPath }, + sync: { watch: false, onSessionStart: false, onSearch: true }, + extraPaths: [extraDir], + }, + }, + list: [{ id: "main", default: true }], + }, + }; + const result = await getMemorySearchManager({ cfg, agentId: "main" }); + expect(result.manager).not.toBeNull(); + if (!result.manager) throw new Error("manager missing"); + manager = result.manager; + await expect(result.manager.readFile({ relPath: "extra/extra.md" })).resolves.toEqual({ + path: "extra/extra.md", + text: "Extra content.", + }); + + const linkPath = path.join(extraDir, "linked.md"); + let symlinkOk = true; + try { + await fs.symlink(path.join(extraDir, "extra.md"), linkPath, "file"); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code === "EPERM" || code === "EACCES") { + symlinkOk = false; + } else { + throw err; + } + } + if (symlinkOk) { + await expect(result.manager.readFile({ relPath: "extra/linked.md" })).rejects.toThrow( + "path required", + ); + } + }); }); diff --git a/src/memory/internal.test.ts b/src/memory/internal.test.ts index 38b580ea6..7530d8e44 100644 --- a/src/memory/internal.test.ts +++ b/src/memory/internal.test.ts @@ -4,7 +4,22 @@ import path from "node:path"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; -import { chunkMarkdown, listMemoryFiles } from "./internal.js"; +import { chunkMarkdown, listMemoryFiles, normalizeExtraMemoryPaths } from "./internal.js"; + +describe("normalizeExtraMemoryPaths", () => { + it("trims, resolves, and dedupes paths", () => { + const workspaceDir = path.join(os.tmpdir(), "memory-test-workspace"); + const absPath = path.resolve(path.sep, "shared-notes"); + const result = normalizeExtraMemoryPaths(workspaceDir, [ + " notes ", + "./notes", + absPath, + absPath, + "", + ]); + expect(result).toEqual([path.resolve(workspaceDir, "notes"), absPath]); + }); +}); describe("listMemoryFiles", () => { let tmpDir: string; @@ -18,10 +33,7 @@ describe("listMemoryFiles", () => { }); it("includes files from additional paths (directory)", async () => { - // Create default memory file await fs.writeFile(path.join(tmpDir, "MEMORY.md"), "# Default memory"); - - // Create additional directory with files const extraDir = path.join(tmpDir, "extra-notes"); await fs.mkdir(extraDir, { recursive: true }); await fs.writeFile(path.join(extraDir, "note1.md"), "# Note 1"); @@ -29,11 +41,11 @@ describe("listMemoryFiles", () => { await fs.writeFile(path.join(extraDir, "ignore.txt"), "Not a markdown file"); const files = await listMemoryFiles(tmpDir, [extraDir]); - expect(files).toHaveLength(3); // MEMORY.md + 2 notes - expect(files.some((f) => f.endsWith("MEMORY.md"))).toBe(true); - expect(files.some((f) => f.endsWith("note1.md"))).toBe(true); - expect(files.some((f) => f.endsWith("note2.md"))).toBe(true); - expect(files.some((f) => f.endsWith("ignore.txt"))).toBe(false); + expect(files).toHaveLength(3); + expect(files.some((file) => file.endsWith("MEMORY.md"))).toBe(true); + expect(files.some((file) => file.endsWith("note1.md"))).toBe(true); + expect(files.some((file) => file.endsWith("note2.md"))).toBe(true); + expect(files.some((file) => file.endsWith("ignore.txt"))).toBe(false); }); it("includes files from additional paths (single file)", async () => { @@ -43,7 +55,7 @@ describe("listMemoryFiles", () => { const files = await listMemoryFiles(tmpDir, [singleFile]); expect(files).toHaveLength(2); - expect(files.some((f) => f.endsWith("standalone.md"))).toBe(true); + expect(files.some((file) => file.endsWith("standalone.md"))).toBe(true); }); it("handles relative paths in additional paths", async () => { @@ -52,10 +64,9 @@ describe("listMemoryFiles", () => { await fs.mkdir(extraDir, { recursive: true }); await fs.writeFile(path.join(extraDir, "nested.md"), "# Nested"); - // Use relative path const files = await listMemoryFiles(tmpDir, ["subdir"]); expect(files).toHaveLength(2); - expect(files.some((f) => f.endsWith("nested.md"))).toBe(true); + expect(files.some((file) => file.endsWith("nested.md"))).toBe(true); }); it("ignores non-existent additional paths", async () => { @@ -64,6 +75,42 @@ describe("listMemoryFiles", () => { const files = await listMemoryFiles(tmpDir, ["/does/not/exist"]); expect(files).toHaveLength(1); }); + + it("ignores symlinked files and directories", async () => { + await fs.writeFile(path.join(tmpDir, "MEMORY.md"), "# Default memory"); + const extraDir = path.join(tmpDir, "extra"); + await fs.mkdir(extraDir, { recursive: true }); + await fs.writeFile(path.join(extraDir, "note.md"), "# Note"); + + const targetFile = path.join(tmpDir, "target.md"); + await fs.writeFile(targetFile, "# Target"); + const linkFile = path.join(extraDir, "linked.md"); + + const targetDir = path.join(tmpDir, "target-dir"); + await fs.mkdir(targetDir, { recursive: true }); + await fs.writeFile(path.join(targetDir, "nested.md"), "# Nested"); + const linkDir = path.join(tmpDir, "linked-dir"); + + let symlinksOk = true; + try { + await fs.symlink(targetFile, linkFile, "file"); + await fs.symlink(targetDir, linkDir, "dir"); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code === "EPERM" || code === "EACCES") { + symlinksOk = false; + } else { + throw err; + } + } + + const files = await listMemoryFiles(tmpDir, [extraDir, linkDir]); + expect(files.some((file) => file.endsWith("note.md"))).toBe(true); + if (symlinksOk) { + expect(files.some((file) => file.endsWith("linked.md"))).toBe(false); + expect(files.some((file) => file.endsWith("nested.md"))).toBe(false); + } + }); }); describe("chunkMarkdown", () => { diff --git a/src/memory/internal.ts b/src/memory/internal.ts index bf69aedb9..b2ab8c0a4 100644 --- a/src/memory/internal.ts +++ b/src/memory/internal.ts @@ -30,6 +30,17 @@ export function normalizeRelPath(value: string): string { return trimmed.replace(/\\/g, "/"); } +export function normalizeExtraMemoryPaths(workspaceDir: string, extraPaths?: string[]): string[] { + if (!extraPaths?.length) return []; + const resolved = extraPaths + .map((value) => value.trim()) + .filter(Boolean) + .map((value) => + path.isAbsolute(value) ? path.resolve(value) : path.resolve(workspaceDir, value), + ); + return Array.from(new Set(resolved)); +} + export function isMemoryPath(relPath: string): boolean { const normalized = normalizeRelPath(relPath); if (!normalized) return false; @@ -37,19 +48,11 @@ export function isMemoryPath(relPath: string): boolean { return normalized.startsWith("memory/"); } -async function exists(filePath: string): Promise { - try { - await fs.access(filePath); - return true; - } catch { - return false; - } -} - async function walkDir(dir: string, files: string[]) { const entries = await fs.readdir(dir, { withFileTypes: true }); for (const entry of entries) { const full = path.join(dir, entry.name); + if (entry.isSymbolicLink()) continue; if (entry.isDirectory()) { await walkDir(full, files); continue; @@ -62,28 +65,45 @@ async function walkDir(dir: string, files: string[]) { export async function listMemoryFiles( workspaceDir: string, - additionalPaths?: string[], + extraPaths?: string[], ): Promise { const result: string[] = []; const memoryFile = path.join(workspaceDir, "MEMORY.md"); const altMemoryFile = path.join(workspaceDir, "memory.md"); - if (await exists(memoryFile)) result.push(memoryFile); - if (await exists(altMemoryFile)) result.push(altMemoryFile); const memoryDir = path.join(workspaceDir, "memory"); - if (await exists(memoryDir)) { - await walkDir(memoryDir, result); - } - // Include files from additional explicit paths - if (additionalPaths && additionalPaths.length > 0) { - for (const p of additionalPaths) { - const resolved = path.isAbsolute(p) ? p : path.resolve(workspaceDir, p); - if (!(await exists(resolved))) continue; - const stat = await fs.stat(resolved); - if (stat.isDirectory()) { - await walkDir(resolved, result); - } else if (stat.isFile() && resolved.endsWith(".md")) { - result.push(resolved); - } + + const addMarkdownFile = async (absPath: string) => { + try { + const stat = await fs.lstat(absPath); + if (stat.isSymbolicLink() || !stat.isFile()) return; + if (!absPath.endsWith(".md")) return; + result.push(absPath); + } catch {} + }; + + await addMarkdownFile(memoryFile); + await addMarkdownFile(altMemoryFile); + try { + const dirStat = await fs.lstat(memoryDir); + if (!dirStat.isSymbolicLink() && dirStat.isDirectory()) { + await walkDir(memoryDir, result); + } + } catch {} + + const normalizedExtraPaths = normalizeExtraMemoryPaths(workspaceDir, extraPaths); + if (normalizedExtraPaths.length > 0) { + for (const inputPath of normalizedExtraPaths) { + try { + const stat = await fs.lstat(inputPath); + if (stat.isSymbolicLink()) continue; + if (stat.isDirectory()) { + await walkDir(inputPath, result); + continue; + } + if (stat.isFile() && inputPath.endsWith(".md")) { + result.push(inputPath); + } + } catch {} } } if (result.length <= 1) return result; diff --git a/src/memory/manager-cache-key.ts b/src/memory/manager-cache-key.ts index 9fbe3e436..d143a9057 100644 --- a/src/memory/manager-cache-key.ts +++ b/src/memory/manager-cache-key.ts @@ -13,6 +13,7 @@ export function computeMemoryManagerCacheKey(params: { JSON.stringify({ enabled: settings.enabled, sources: [...settings.sources].sort((a, b) => a.localeCompare(b)), + extraPaths: [...settings.extraPaths].sort((a, b) => a.localeCompare(b)), provider: settings.provider, model: settings.model, fallback: settings.fallback, diff --git a/src/memory/manager.ts b/src/memory/manager.ts index b43305676..a799a5e0f 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -1,4 +1,5 @@ import { randomUUID } from "node:crypto"; +import fsSync from "node:fs"; import fs from "node:fs/promises"; import path from "node:path"; @@ -35,9 +36,9 @@ import { hashText, isMemoryPath, listMemoryFiles, + normalizeExtraMemoryPaths, type MemoryChunk, type MemoryFileEntry, - normalizeRelPath, parseEmbedding, } from "./internal.js"; import { bm25RankToScore, buildFtsQuery, mergeHybridResults } from "./hybrid.js"; @@ -396,13 +397,52 @@ export class MemoryIndexManager { from?: number; lines?: number; }): Promise<{ text: string; path: string }> { - const relPath = normalizeRelPath(params.relPath); - if (!relPath || !isMemoryPath(relPath)) { + const rawPath = params.relPath.trim(); + if (!rawPath) { throw new Error("path required"); } - const absPath = path.resolve(this.workspaceDir, relPath); - if (!absPath.startsWith(this.workspaceDir)) { - throw new Error("path escapes workspace"); + const absPath = path.isAbsolute(rawPath) + ? path.resolve(rawPath) + : path.resolve(this.workspaceDir, rawPath); + const relPath = path.relative(this.workspaceDir, absPath).replace(/\\/g, "/"); + const inWorkspace = + relPath.length > 0 && !relPath.startsWith("..") && !path.isAbsolute(relPath); + const allowedWorkspace = inWorkspace && isMemoryPath(relPath); + let allowedAdditional = false; + if (!allowedWorkspace && this.settings.extraPaths.length > 0) { + const additionalPaths = normalizeExtraMemoryPaths( + this.workspaceDir, + this.settings.extraPaths, + ); + for (const additionalPath of additionalPaths) { + try { + const stat = await fs.lstat(additionalPath); + if (stat.isSymbolicLink()) continue; + if (stat.isDirectory()) { + if (absPath === additionalPath || absPath.startsWith(`${additionalPath}${path.sep}`)) { + allowedAdditional = true; + break; + } + continue; + } + if (stat.isFile()) { + if (absPath === additionalPath && absPath.endsWith(".md")) { + allowedAdditional = true; + break; + } + } + } catch {} + } + } + if (!allowedWorkspace && !allowedAdditional) { + throw new Error("path required"); + } + if (!absPath.endsWith(".md")) { + throw new Error("path required"); + } + const stat = await fs.lstat(absPath); + if (stat.isSymbolicLink() || !stat.isFile()) { + throw new Error("path required"); } const content = await fs.readFile(absPath, "utf-8"); if (!params.from && !params.lines) { @@ -425,6 +465,7 @@ export class MemoryIndexManager { model: string; requestedProvider: string; sources: MemorySource[]; + extraPaths: string[]; sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>; cache?: { enabled: boolean; entries?: number; maxEntries?: number }; fts?: { enabled: boolean; available: boolean; error?: string }; @@ -498,6 +539,7 @@ export class MemoryIndexManager { model: this.provider.model, requestedProvider: this.requestedProvider, sources: Array.from(this.sources), + extraPaths: this.settings.extraPaths, sourceCounts, cache: this.cache.enabled ? { @@ -769,11 +811,23 @@ export class MemoryIndexManager { private ensureWatcher() { if (!this.sources.has("memory") || !this.settings.sync.watch || this.watcher) return; - const watchPaths = [ + const additionalPaths = normalizeExtraMemoryPaths(this.workspaceDir, this.settings.extraPaths) + .map((entry) => { + try { + const stat = fsSync.lstatSync(entry); + return stat.isSymbolicLink() ? null : entry; + } catch { + return null; + } + }) + .filter((entry): entry is string => Boolean(entry)); + const watchPaths = new Set([ path.join(this.workspaceDir, "MEMORY.md"), + path.join(this.workspaceDir, "memory.md"), path.join(this.workspaceDir, "memory"), - ]; - this.watcher = chokidar.watch(watchPaths, { + ...additionalPaths, + ]); + this.watcher = chokidar.watch(Array.from(watchPaths), { ignoreInitial: true, awaitWriteFinish: { stabilityThreshold: this.settings.sync.watchDebounceMs, @@ -975,7 +1029,7 @@ export class MemoryIndexManager { needsFullReindex: boolean; progress?: MemorySyncProgressState; }) { - const files = await listMemoryFiles(this.workspaceDir, this.settings.paths); + const files = await listMemoryFiles(this.workspaceDir, this.settings.extraPaths); const fileEntries = await Promise.all( files.map(async (file) => buildFileEntry(file, this.workspaceDir)), ); diff --git a/src/memory/sync-memory-files.ts b/src/memory/sync-memory-files.ts index 6e88eb179..c5073dc50 100644 --- a/src/memory/sync-memory-files.ts +++ b/src/memory/sync-memory-files.ts @@ -14,7 +14,7 @@ type ProgressState = { export async function syncMemoryFiles(params: { workspaceDir: string; - additionalPaths?: string[]; + extraPaths?: string[]; db: DatabaseSync; needsFullReindex: boolean; progress?: ProgressState; @@ -28,7 +28,7 @@ export async function syncMemoryFiles(params: { ftsAvailable: boolean; model: string; }) { - const files = await listMemoryFiles(params.workspaceDir, params.additionalPaths); + const files = await listMemoryFiles(params.workspaceDir, params.extraPaths); const fileEntries = await Promise.all( files.map(async (file) => buildFileEntry(file, params.workspaceDir)), );