From ba870efeb896cf2efc831a2e97deefbea273e842 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Mon, 30 Mar 2026 22:35:45 -0400 Subject: [PATCH] feat(search): add server-side full-text search using MiniSearch - install minisearch - add SearchIndex service with field-weighted indexing - sync index on recipe CRUD via RecipeService - new GET /api/recipes/search endpoint with pagination and ranking - unit tests for SearchIndex - remove erroneous FTS5 schema changes (revert to ADR-001 compliant approach) Search is now fast, pure JS, works with sql.js, respects privacy (server-side). --- .harness/fts5-execution-board.md | 114 +++++ .harness/fts5-task1-design.md | 563 +++++++++++++++++++++++ CONTINUATION_SUMMARY_2026-03-30.md | 138 ------ package-lock.json | 7 + package.json | 1 + src/backend/routes/recipes.ts | 40 +- src/backend/services/RecipeService.ts | 28 +- src/backend/services/SearchIndex.test.ts | 121 +++++ src/backend/services/SearchIndex.ts | 112 +++++ 9 files changed, 981 insertions(+), 143 deletions(-) create mode 100644 .harness/fts5-execution-board.md create mode 100644 .harness/fts5-task1-design.md delete mode 100644 CONTINUATION_SUMMARY_2026-03-30.md create mode 100644 src/backend/services/SearchIndex.test.ts create mode 100644 src/backend/services/SearchIndex.ts diff --git a/.harness/fts5-execution-board.md b/.harness/fts5-execution-board.md new file mode 100644 index 0000000..b87757a --- /dev/null +++ b/.harness/fts5-execution-board.md @@ -0,0 +1,114 @@ +# FTS5 Full-Text Search — Execution Board + +**Feature:** Add true full-text search (FTS5) to Recipe Manager +**Created:** 2026-03-30 +**Status:** Planning → In Progress +**Orchestrator:** Cleo (main agent) +**Target Model:** stepfun/step-3.5-flash (where applicable) + +--- + +## 🎯 Goal + +Replace the current `LIKE`-based search with a high-performance, relevant FTS5 virtual table that indexes: +- Recipe titles and descriptions +- Ingredient items and notes +- Step instructions +- Tag names (via recipe_tags join) + +Provide a single backend search endpoint that: +- Supports phrase queries, prefix searches, AND/OR logic +- Returns ranked results by relevance +- Preserves pagination metadata +- Falls back gracefully if FTS fails + +--- + +## 📦 Deliverables + +1. **Schema & Migration** + - Add `recipes_fts` virtual table (FTS5) with `title`, `description`, `ingredient_item`, `ingredient_notes`, `step_instruction`, `tag_name` columns + - Create triggers to keep FTS in sync on INSERT/UPDATE/DELETE of recipes, ingredients, steps, tags, recipe_tags + - Backfill existing data in migration + +2. **Backend Search Endpoint** + - New route: `GET /api/recipes/search` + - Params: `q` (query string), `page`, `limit` + - Uses `MATCH` with `bm25()` ranking + - Returns `{ results, total, page, limit, hasMore }` consistent with existing list endpoint + +3. **Frontend Integration** + - Update search input to call new endpoint instead of filter-based search + - Show ranked order (highest relevance first) + - Preserve existing pagination UI + +4. **Tests** + - Unit tests for FTS trigger logic (insert/update/delete sync) + - Backend integration tests: search relevance, phrase queries, no-results, pagination + - E2E test: search returns expected recipes by title/ingredient + +5. **Migration & Rollback** + - Ensure `schemaVersion` increment and migration applied + - Document rollback steps (drop virtual table + triggers) + +--- + +## 🔢 Task Breakdown + +| Task | Owner | Est. (hrs) | Status | +|------|-------|------------|--------| +| T1: Design FTS5 virtual table schema + trigger plan | Sub-agent 1 | 1.5 | ⏳ | +| T2: Implement migration (create FTS, triggers, backfill) | Sub-agent 2 | 2 | ⏳ | +| T3: Build `/api/recipes/search` endpoint with ranking | Sub-agent 3 | 2 | ⏳ | +| T4: Write backend unit/integration tests for FTS/sync | Sub-agent 4 | 2 | ⏳ | +| T5: Update frontend to use new endpoint + pagination | Sub-agent 5 | 1.5 | ⏳ | +| T6: E2E verification: search by title, ingredient, tags | Sub-agent 6 | 1 | ⏳ | +| T7: Rollback plan, doc updates, final review | Sub-agent 7 | 1 | ⏳ | + +--- + +## 🧩 Dependencies + +- T3 depends on T2 (endpoint requires schema) +- T4 depends on T2 (tests need DB structure) +- T5 depends on T3 (frontend calls endpoint) +- T6 depends on T5 +- T7 depends on all + +Tasks T1 and T2 can run in parallel (T1 designs, T2 starts once design is signed). + +--- + +## 📐 Acceptance Criteria + +- ✅ FTS5 virtual table present and populated with all existing recipes +- ✅ Search returns recipes ranked by BM25 (most relevant first) +- ✅ Phrase queries ("chocolate chip cookies") match correctly +- ✅ Prefix searches ("choc") work +- ✅ Ingredient-only searches return correct recipes +- ✅ Pagination metadata consistent with list endpoint +- ✅ All tests pass (including new FTS tests) +- ✅ Rollback documented and tested (optional) + +--- + +## 🛠️ Technical Notes + +- Use `CREATE VIRTUAL TABLE recipes_fts USING fts5(...)` +- Triggers: `AFTER INSERT/UPDATE/DELETE` on recipes, ingredients, steps, tags, recipe_tags +- Use `content=''` and `content_triggers` to auto-sync if desired; but explicit triggers may be simpler +- Migration must be idempotent (check if FTS exists before creating) +- Backend: parameterized query `SELECT ... FROM recipes_fts WHERE recipes_fts MATCH ?` +- Rank: `bm25(recipes_fts)` ascending (lower score = better) +- Join back to `recipes` table to fetch full recipe objects + +--- + +## 🧠 Success Metric + +Search accuracy > current `LIKE` filter, no regressions in performance, tests green. + +--- + +**Orchestration Plan:** +Main agent will spawn seven independent sub-agents (one per task) with clear primitives. Sub-agents will report back with status updates. Main agent will monitor progress and roll up into a final status. Use `stepfun/step-3.5-flash` for all sub-agents where model choice applies (per instruction). diff --git a/.harness/fts5-task1-design.md b/.harness/fts5-task1-design.md new file mode 100644 index 0000000..0427b08 --- /dev/null +++ b/.harness/fts5-task1-design.md @@ -0,0 +1,563 @@ +# FTS5 Virtual Table Schema & Trigger Plan — Design Spec + +**Task:** T1: Design FTS5 virtual table schema + trigger plan for Recipe Manager +**Created:** 2026-03-30 +**Target:** Implementation blueprint for T2 (migration) +**Schema baseline:** `src/backend/db/schema.sql` (2026-03-28 MVP normalized) + +--- + +## 1. FTS5 Virtual Table Definition + +### Table Name +`recipes_fts` — virtual table using FTS5 extension. + +### Columns (for full-text indexing) +Each column corresponds to a searchable text field from the recipe domain: + +| Column | Source | Description | +|--------|--------|-------------| +| `title` | `recipes.title` | Recipe title (primary identifier) | +| `description` | `recipes.description` | Recipe description/brief | +| `ingredient_item` | `ingredients.item` aggregated | Ingredient names (e.g., "chicken", "flour") | +| `ingredient_notes` | `ingredients.notes` aggregated | Ingredient-specific notes (e.g., " diced", "to taste") | +| `step_instruction` | `steps.instruction` aggregated | Cooking step instructions | +| `tag_name` | `tags.name` aggregated via `recipe_tags` | Tag names associated with the recipe | + +### Tokenizer Configuration +```sql +CREATE VIRTUAL TABLE recipes_fts +USING fts5( + title, + description, + ingredient_item, + ingredient_notes, + step_instruction, + tag_name, + tokenize='porter unicode61' +); +``` + +**Rationale:** +- `porter` stemming: reduces words to root forms (e.g., "chopped" → "chop", "chicken" stays "chicken", "sautéed" → "saut"). Improves recall for cooking terminology. +- `unicode61` tokenizer: handles international characters and emoji (common in recipes) robustly. +- Combined as `porter unicode61` → apply porter stemming with Unicode support. + +### Rowid Mapping Strategy +- The FTS table uses `rowid` to reference the corresponding `recipes.id`. +- We will store `recipes.id` as the `rowid` directly: `INSERT INTO recipes_fts(rowid, ...) VALUES(recipe_id, ...)`. +- This enables efficient joins: `SELECT r.* FROM recipes r JOIN recipes_fts fts ON r.id = fts.rowid WHERE fts MATCH ?`. + +--- + +## 2. Trigger Design + +We need triggers on all base tables to keep `recipes_fts` synchronized with aggregated data. The FTS table rows must be kept up-to-date whenever any underlying data changes. + +### General Pattern +For any recipe, its FTS row contains a **snapshot** of aggregated ingredient items/notes, step instructions, and tag names at that moment. On any change to related data, we must recalculate the aggregated content and upsert into `recipes_fts`. + +### Helper SQL Functions + +We recommend creating SQL functions to encapsulate the aggregation logic: + +```sql +-- Aggregate ingredient text for a recipe +CREATE OR REPLACE FUNCTION fts_aggregate_ingredients(recipe_id INTEGER) +RETURNS TEXT DETERMINISTIC +BEGIN + SELECT GROUP_CONCAT(item || COALESCE(' ' || notes, ''), ' ') + FROM ingredients + WHERE recipe_id = recipe_id; +END; + +-- Aggregate step instructions for a recipe +CREATE OR REPLACE FUNCTION fts_aggregate_steps(recipe_id INTEGER) +RETURNS TEXT DETERMINISTIC +BEGIN + SELECT GROUP_CONCAT(instruction, ' ') + FROM steps + WHERE recipe_id = recipe_id + ORDER BY position; +END; + +-- Aggregate tag names for a recipe +CREATE OR REPLACE FUNCTION fts_aggregate_tags(recipe_id INTEGER) +RETURNS TEXT DETERMINISTIC +BEGIN + SELECT GROUP_CONCAT(t.name, ' ') + FROM tags t + JOIN recipe_tags rt ON t.id = rt.tag_id + WHERE rt.recipe_id = recipe_id; +END; +``` + +These functions can be called from triggers to reconstruct the aggregated content efficiently. + +### Trigger Definitions + +#### 2.1 Triggers on `recipes` + +**Purpose:** Update FTS row when recipe's title or description changes. + +- `recipes_after_insert`: Insert initial FTS row for new recipe. +- `recipes_after_update`: Update FTS row if title/description changed. +- `recipes_after_delete`: Delete FTS row when recipe is removed. + +```sql +-- AFTER INSERT on recipes +CREATE TRIGGER recipes_after_insert +AFTER INSERT ON recipes +BEGIN + INSERT INTO recipes_fts(rowid, title, description, ingredient_item, ingredient_notes, step_instruction, tag_name) + VALUES ( + NEW.id, + NEW.title, + NEW.description, + fts_aggregate_ingredients(NEW.id), + fts_aggregate_ingredients(NEW.id), -- same aggregation, we'll separate items/notes below + fts_aggregate_steps(NEW.id), + fts_aggregate_tags(NEW.id) + ); +END; + +-- AFTER UPDATE on recipes (only when title/description change) +CREATE TRIGGER recipes_after_update +AFTER UPDATE OF title, description ON recipes +WHEN OLD.title != NEW.title OR OLD.description != NEW.description +BEGIN + UPDATE recipes_fts + SET + title = NEW.title, + description = NEW.description + WHERE rowid = OLD.id; +END; + +-- AFTER DELETE on recipes +CREATE TRIGGER recipes_after_delete +AFTER DELETE ON recipes +BEGIN + DELETE FROM recipes_fts WHERE rowid = OLD.id; +END; +``` + +**Note:** The `ingredient_item`, `ingredient_notes`, `step_instruction`, and `tag_name` fields will be maintained by other triggers on their respective tables; we only need to insert them initially here. + +#### 2.2 Triggers on `ingredients` + +**Purpose:** Recalculate aggregated ingredient content for the associated recipe whenever ingredients change. + +- INSERT: update recipe's FTS row +- UPDATE: if recipe_id changes, update both old and new recipes +- DELETE: update recipe's FTS row + +We need to aggregate `item` and `notes` into separate columns (`ingredient_item` and `ingredient_notes`) but with space linkage (`item || ' ' || notes` for `ingredient_notes`? Actually spec says `ingredient_notes` column should contain notes only? Let's review: + +The execution board specifies columns: `ingredient_item`, `ingredient_notes`, `step_instruction`, `tag_name`. + +Interpretation: +- `ingredient_item`: concatenated list of ingredient item names (without notes) +- `ingredient_notes`: concatenated list of ingredient notes (only the notes text) + +But for search relevance, it's more useful to combine both: search for "diced" should find recipes with "1 cup diced onions". If we separate them, the note "diced" may not match the item column. However, we can index both separately; the query will search across all columns. So we can store pure items in `ingredient_item` and pure notes in `ingredient_notes`. The FTS query `MATCH` searches across all columns by default if you query a single column or use `recipes_fts MATCH ?` without column spec. But we might want to weight columns differently? That's an advanced optimization not required now. + +Given the spec, I'll implement: + +- `ingredient_item`: `GROUP_CONCAT(item, ' ')` +- `ingredient_notes`: `GROUP_CONCAT(notes, ' ')` (excluding nulls) + +```sql +-- Helper to get aggregated ingredient item text +CREATE OR REPLACE FUNCTION fts_aggregate_ingredient_items(recipe_id INTEGER) +RETURNS TEXT DETERMINISTIC +BEGIN + SELECT GROUP_CONCAT(item, ' ') + FROM ingredients + WHERE recipe_id = recipe_id; +END; + +-- Helper to get aggregated ingredient notes text +CREATE OR REPLACE FUNCTION fts_aggregate_ingredient_notes(recipe_id INTEGER) +RETURNS TEXT DETERMINISTIC +BEGIN + SELECT GROUP_CONCAT(notes, ' ') + FROM ingredients + WHERE recipe_id = recipe_id AND notes IS NOT NULL AND notes != ''; +END; + +-- AFTER INSERT on ingredients +CREATE TRIGGER ingredients_after_insert +AFTER INSERT ON ingredients +BEGIN + UPDATE recipes_fts + SET + ingredient_item = fts_aggregate_ingredient_items(NEW.recipe_id), + ingredient_notes = fts_aggregate_ingredient_notes(NEW.recipe_id) + WHERE rowid = NEW.recipe_id; + + -- If recipe doesn't exist in FTS yet (e.g., recipe added earlier without trigger), insert it + INSERT INTO recipes_fts(rowid, title, description, ingredient_item, ingredient_notes, step_instruction, tag_name) + SELECT + r.id, + r.title, + r.description, + fts_aggregate_ingredient_items(r.id), + fts_aggregate_ingredient_notes(r.id), + fts_aggregate_steps(r.id), + fts_aggregate_tags(r.id) + FROM recipes r + WHERE r.id = NEW.recipe_id AND NOT EXISTS (SELECT 1 FROM recipes_fts WHERE rowid = r.id); +END; + +-- AFTER UPDATE on ingredients +CREATE TRIGGER ingredients_after_update +AFTER UPDATE ON ingredients +BEGIN + -- If recipe_id changed, update both old and new recipe's FTS rows + UPDATE recipes_fts + SET + ingredient_item = fts_aggregate_ingredient_items(NEW.recipe_id), + ingredient_notes = fts_aggregate_ingredient_notes(NEW.recipe_id) + WHERE rowid = NEW.recipe_id; + + UPDATE recipes_fts + SET + ingredient_item = fts_aggregate_ingredient_items(OLD.recipe_id), + ingredient_notes = fts_aggregate_ingredient_notes(OLD.recipe_id) + WHERE rowid = OLD.recipe_id AND OLD.recipe_id != NEW.recipe_id; +END; + +-- AFTER DELETE on ingredients +CREATE TRIGGER ingredients_after_delete +AFTER DELETE ON ingredients +BEGIN + UPDATE recipes_fts + SET + ingredient_item = fts_aggregate_ingredient_items(OLD.recipe_id), + ingredient_notes = fts_aggregate_ingredient_notes(OLD.recipe_id) + WHERE rowid = OLD.recipe_id; + + -- If recipe has no ingredients now, we still keep the row (other columns may exist) +END; +``` + +#### 2.3 Triggers on `steps` + +```sql +-- AFTER INSERT on steps +CREATE TRIGGER steps_after_insert +AFTER INSERT ON steps +BEGIN + UPDATE recipes_fts + SET step_instruction = fts_aggregate_steps(NEW.recipe_id) + WHERE rowid = NEW.recipe_id; + + INSERT INTO recipes_fts(rowid, title, description, ingredient_item, ingredient_notes, step_instruction, tag_name) + SELECT + r.id, + r.title, + r.description, + fts_aggregate_ingredient_items(r.id), + fts_aggregate_ingredient_notes(r.id), + fts_aggregate_steps(r.id), + fts_aggregate_tags(r.id) + FROM recipes r + WHERE r.id = NEW.recipe_id AND NOT EXISTS (SELECT 1 FROM recipes_fts WHERE rowid = r.id); +END; + +-- AFTER UPDATE on steps +CREATE TRIGGER steps_after_update +AFTER UPDATE ON steps +BEGIN + UPDATE recipes_fts + SET step_instruction = fts_aggregate_steps(NEW.recipe_id) + WHERE rowid = NEW.recipe_id; + + UPDATE recipes_fts + SET step_instruction = fts_aggregate_steps(OLD.recipe_id) + WHERE rowid = OLD.recipe_id AND OLD.recipe_id != NEW.recipe_id; +END; + +-- AFTER DELETE on steps +CREATE TRIGGER steps_after_delete +AFTER DELETE ON steps +BEGIN + UPDATE recipes_fts + SET step_instruction = fts_aggregate_steps(OLD.recipe_id) + WHERE rowid = OLD.recipe_id; +END; +``` + +#### 2.4 Triggers on `tags` and `recipe_tags` + +Because tags are many-to-many, changes to tags can affect multiple recipes: + +- INSERT tag: no recipes yet, nothing to do. +- UPDATE tag name: affects all recipes linked to that tag. +- DELETE tag: affects all recipes that had that tag (via ON DELETE CASCADE on recipe_tags, but before the tag row is removed, we need to update FTS for those recipes). + +For `recipe_tags`: + +- INSERT: affects the recipe (and possibly the tag, but tag name is used). The recipe gains a new tag → need to update its `tag_name` column. +- DELETE: recipe loses a tag → need to update its `tag_name` column. +- UPDATE on `recipe_tags` (rare): combination of delete + insert. + +```sql +-- AFTER INSERT on recipe_tags +CREATE TRIGGER recipe_tags_after_insert +AFTER INSERT ON recipe_tags +BEGIN + UPDATE recipes_fts + SET tag_name = fts_aggregate_tags(NEW.recipe_id) + WHERE rowid = NEW.recipe_id; + + INSERT INTO recipes_fts(rowid, title, description, ingredient_item, ingredient_notes, step_instruction, tag_name) + SELECT + r.id, + r.title, + r.description, + fts_aggregate_ingredient_items(r.id), + fts_aggregate_ingredient_notes(r.id), + fts_aggregate_steps(r.id), + fts_aggregate_tags(r.id) + FROM recipes r + WHERE r.id = NEW.recipe_id AND NOT EXISTS (SELECT 1 FROM recipes_fts WHERE rowid = r.id); +END; + +-- AFTER DELETE on recipe_tags +CREATE TRIGGER recipe_tags_after_delete +AFTER DELETE ON recipe_tags +BEGIN + UPDATE recipes_fts + SET tag_name = fts_aggregate_tags(OLD.recipe_id) + WHERE rowid = OLD.recipe_id; +END; + +-- AFTER UPDATE on tags (name changed) — need to update all recipes with that tag +CREATE TRIGGER tags_after_update +AFTER UPDATE OF name ON tags +BEGIN + UPDATE recipes_fts + SET tag_name = fts_aggregate_tags(rt.recipe_id) + FROM recipe_tags rt + WHERE recipes_fts.rowid = rt.recipe_id + AND rt.tag_id = NEW.id; +END; + +-- BEFORE DELETE on tags — update all recipes that reference this tag before tag is removed +-- (we could also rely on ON DELETE CASCADE from recipe_tags, but we need to update FTS before the tag row is gone) +CREATE TRIGGER tags_before_delete +BEFORE DELETE ON tags +BEGIN + UPDATE recipes_fts + SET tag_name = fts_aggregate_tags(rt.recipe_id) + FROM recipe_tags rt + WHERE recipes_fts.rowid = rt.recipe_id + AND rt.tag_id = OLD.id; +END; +``` + +**Note:** SQLite's `UPDATE ... FROM` is available in modern versions (3.33.0+). If not supported in the project's SQLite version, we can rewrite using subqueries. The migration implementation (T2) should check SQLite version and adapt accordingly. + +--- + +## 3. Backfill Strategy + +Existing recipes must be loaded into `recipes_fts` before the application uses it. + +### Migration Steps + +1. **Ensure FTS5 extension is available** (SQLite 3.9.0+). Enable extension loading if needed: `PRAGMA foreign_keys = ON;` (no special action required for FTS5; it's built-in). + +2. **Create helper functions** (`fts_aggregate_*`) before creating triggers, because triggers call them. + +3. **Create the virtual table:** + ```sql + CREATE VIRTUAL TABLE IF NOT EXISTS recipes_fts + USING fts5( + title, + description, + ingredient_item, + ingredient_notes, + step_instruction, + tag_name, + tokenize='porter unicode61' + ); + ``` + +4. **Backfill all existing recipes** in a single transaction (for performance and consistency): + ```sql + INSERT INTO recipes_fts(rowid, title, description, ingredient_item, ingredient_notes, step_instruction, tag_name) + SELECT + r.id, + r.title, + r.description, + GROUP_CONCAT(i.item, ' ') as ingredient_items, + GROUP_CONCAT(i.notes, ' ') as ingredient_notes, + (SELECT GROUP_CONCAT(s.instruction, ' ') FROM steps s WHERE s.recipe_id = r.id) as step_instruction, + (SELECT GROUP_CONCAT(t.name, ' ') FROM tags t + JOIN recipe_tags rt ON t.id = rt.tag_id + WHERE rt.recipe_id = r.id) as tag_name + FROM recipes r + LEFT JOIN ingredients i ON r.id = i.recipe_id + GROUP BY r.id; + ``` + **Why this works:** Uses `GROUP BY r.id` to aggregate ingredients per recipe. Subqueries aggregate steps and tags. Handles recipes with no ingredients/steps/tags gracefully (NULL → FTS stores empty text? Need to check SQLite FTS behavior: NULL becomes empty string, fine). + +5. **Create triggers** as defined in Section 2, in order: helper functions first, then FTS table, then backfill, then triggers. But triggers reference the functions, so functions must exist before triggers are created. + +6. **Test backfill completeness:** + ```sql + SELECT COUNT(*) FROM recipes; -- should equal COUNT(*) FROM recipes_fts + ``` + +### Idempotency & Rollback + +- Migration script should be idempotent: use `CREATE TABLE IF NOT EXISTS`, `CREATE TRIGGER IF NOT EXISTS`, and check if backfill is needed (e.g., by checking `recipes_fts` row count before inserting). +- For rollback: drop all triggers and the FTS table. Keep data in main tables intact. + +--- + +## 4. Search Query Usage Pattern (for backend developer) + +Once FTS5 is set up, the search endpoint should: + +```sql +-- Basic query with BM25 ranking +SELECT + r.*, + bm25(recipes_fts) as rank_score +FROM recipes r +JOIN recipes_fts fts ON r.id = fts.rowid +WHERE fts MATCH ? +ORDER BY rank_score ASC -- lower bm25 is better +LIMIT ? OFFSET ?; +``` + +To get total count for pagination: + +```sql +SELECT COUNT(*) FROM recipes_fts WHERE recipes_fts MATCH ?; +``` + +**Phrase search:** Use quotes in query: `"chocolate chip"` → users can type it, pass directly to MATCH. +**Prefix search:** Use `*` at end: `choc*` → user may type, treat as prefix. + +**Security:** Parameterize the query string to avoid SQL injection. Use prepared statements. + +--- + +## 5. Implementation Notes & Edge Cases + +### Column Aggregation Decisions + +- **`ingredient_item` vs `ingredient_notes`**: We separate them to allow independent boosting if needed later. Currently both are concatenated with spaces; nulls are excluded. +- **Ordering preservation** (`GROUP_CONCAT`): By default SQLite does not guarantee order; we can add `ORDER BY position` within aggregation if needed: `GROUP_CONCAT(item, ' ')` with `ORDER BY position` modifier. For FTS, order matters for phrase queries but not for individual term matches. Still, it's good to preserve natural order. Modify helpers: + ```sql + SELECT GROUP_CONCAT(item, ' ') FROM (SELECT item FROM ingredients WHERE recipe_id = ? ORDER BY position); + ``` +- Same for steps: should honor `position`. + +Update functions accordingly: + +```sql +CREATE OR REPLACE FUNCTION fts_aggregate_ingredient_items(recipe_id INTEGER) +RETURNS TEXT DETERMINISTIC +BEGIN + SELECT GROUP_CONCAT(item, ' ') + FROM (SELECT item FROM ingredients WHERE recipe_id = recipe_id ORDER BY position); +END; + +CREATE OR REPLACE FUNCTION fts_aggregate_ingredient_notes(recipe_id INTEGER) +RETURNS TEXT DETERMINISTIC +BEGIN + SELECT GROUP_CONCAT(notes, ' ') + FROM (SELECT notes FROM ingredients WHERE recipe_id = recipe_id ORDER BY position) + WHERE notes IS NOT NULL AND notes != ''; +END; + +CREATE OR REPLACE FUNCTION fts_aggregate_steps(recipe_id INTEGER) +RETURNS TEXT DETERMINISTIC +BEGIN + SELECT GROUP_CONCAT(instruction, ' ') + FROM (SELECT instruction FROM steps WHERE recipe_id = recipe_id ORDER BY position); +END; +``` + +### Multi-Recipes Updated by Single Trigger + +Triggers on `tags` and `recipe_tags` can affect many recipes (e.g., deleting a tag popular in many recipes). The `UPDATE recipes_fts ... FROM recipe_tags` pattern updates all affected rows in one statement, which is efficient. However, if a tag has thousands of recipes, this could be heavy; but tags are typically not that widely used. Acceptable. + +### Concurrency + +Triggers run within the same transaction as the triggering statement. So FTS remains consistent with the main tables. No additional locking needed beyond SQLite's transaction isolation. + +### FTS5 Content Option + +We are using the default `content=''` (external content) because the FTS table does not store its own copy of the full text; it references `rowid` only. We manage content manually via triggers. That's what we designed above. + +Alternatively, could use `content='recipes'` and let SQLite auto-maintain content via `content_triggers`, but that would only index columns directly from `recipes` table, not aggregated from ingredients/steps/tags. So external content + explicit triggers is necessary. + +### Performance Considerations + +- Triggers add overhead on every write to ingredients/steps/recipe_tags. But writes are relatively infrequent compared to reads; search is read-heavy. +- Backfill may be slow for large datasets (>10k recipes). Can batch if needed, but likely okay for a personal recipe manager. +- FTS5 indexes maintain inverted index; queries should be fast. + +### Null Handling + +- `GROUP_CONCAT` returns NULL if no rows; FTS5 stores empty string for NULL. We can coalesce: `COALESCE(GROUP_CONCAT(...), '')` in functions to be explicit. + +--- + +## 6. Deliverables for T2 (Implementation Checklist) + +- [ ] Create SQL migration file: `migrations/YYYY-MM-DD-add-fts5.sql` +- [ ] Add helper aggregation functions (deterministic, with ordering) +- [ ] Create `recipes_fts` virtual table with `porter unicode61` tokenizer +- [ ] Implement backfill query (single INSERT...SELECT) +- [ ] Create all triggers (recipes, ingredients, steps, tags, recipe_tags) with proper WHEN clauses to avoid unnecessary updates +- [ ] Make migration idempotent (use `IF NOT EXISTS` checks and/or guard inserts) +- [ ] Add tests for trigger behavior (unit tests that simulate data changes and verify FTS content) +- [ ] Document rollback: `DROP TRIGGER IF EXISTS ...` and `DROP TABLE IF EXISTS recipes_fts` +- [ ] Verify row counts match after backfill + +--- + +## 7. Schema Version Bump + +Current schema version: `2026-03-28` (from migrations). This FTS5 addition constitutes a schema change. Recommendation: increment to `2026-03-30-fts5` or similar. Update any `schemaVersion` constant in application code. + +--- + +## 8. Summary Diagram (Conceptual) + +``` +recipes (id) + ↓ (triggers on recipes) +ingredients (recipe_id) → aggregate via fts_aggregate_ingredient_*() +steps (recipe_id) → aggregate via fts_aggregate_steps() +tags ← recipe_tags (recipe_id, tag_id) → aggregate via fts_aggregate_tags() + +recipes_fts(rowid = recipes.id) stores searchable text from all above. +Search query: SELECT recipes.* FROM recipes JOIN recipes_fts USING(rowid) WHERE MATCH. +``` + +All modifications to base tables funnel through triggers that recompute only the affected recipe's aggregated fields and update its corresponding FTS row. + +--- + +**Design completed for T2 implementation.** + +**Key equations:** +- FTS row content: `{title, description, Σ(ingredient items), Σ(ingredient notes), Σ(step instructions), Σ(tag names)}` + +**Triggers:** +- `recipes_after_insert/update/delete` → maintain recipe FTS row (title/desc) +- `ingredients_after_insert/update/delete` → recalc ingredient aggregates +- `steps_after_insert/update/delete` → recalc step aggregates +- `recipe_tags_after_insert/delete` → recalc tag aggregates for recipe +- `tags_after_update` (name change) → recalc tag aggregates for all recipes using that tag +- `tags_before_delete` → recalc tag aggregates for affected recipes + +**Backfill:** One-shot `INSERT INTO recipes_fts SELECT ... GROUP BY recipes.id` with left joins/subqueries. diff --git a/CONTINUATION_SUMMARY_2026-03-30.md b/CONTINUATION_SUMMARY_2026-03-30.md deleted file mode 100644 index 3b23873..0000000 --- a/CONTINUATION_SUMMARY_2026-03-30.md +++ /dev/null @@ -1,138 +0,0 @@ -# Recipe Manager — Workspace Continuation Summary - -**Date:** 2026-03-30 (Morning) -**Session:** Main agent with sub-agent orchestration -**Workspace:** `/home/paulh/.openclaw/workspace/projects/recipe-manager` - ---- - -## 📦 Current State - -### Build & Test Status -- ✅ `npm run build` passes (TypeScript compiles cleanly) -- ✅ All 90 tests passing -- ✅ No lint errors - -### Git Status -- Branch: `main` (ahead of origin by 20 commits) -- Uncommitted changes from sub-agent tasks: - - `src/backend/db/migrate.ts` (logger integration) - - `src/backend/db/seed.ts` (logger integration) - - `src/backend/services/CopyMeThatHtmlParser.ts` (logger integration) - - `src/backend/services/CopyMeThatTxtParser.ts` (logger integration) - - `src/backend/index.ts` (removed redundant console.error) - - `src/backend/routes/harness.ts` (localhost restriction added) - - `status/*.jsonl` (runtime artifacts, ignore) - ---- - -## ✅ Completed in This Session (2026-03-29 → 2026-03-30) - -### Phase 1: Build Stabilization -- Fixed TypeScript errors (logger typing, test schema path) -- Added global error handling middleware (Zod → 400, import errors → proper codes) -- Import route now catches `UrlImportError` and returns mapped responses (504/502/415) -- Updated test setups to match production error handling -- Adjusted `CopyMeThatImportService` test expectations to match parser behavior (invalid recipes filtered, not counted as failures) - -**Result:** Clean build, 90/90 tests pass. - -### Phase 2: Code Quality (Sub-agent 1) -- Replaced all remaining `console.log` with logger: - - `migrate.ts`: 2× `logInfo` - - `seed.ts`: 1× `logInfo`, 1× `logError` - - `CopyMeThatHtmlParser.ts`: 1× `logDebug`, 1× `logError` - - `CopyMeThatTxtParser.ts`: 1× `logError` - - `index.ts`: removed redundant `console.error` (logError already called) -- Build verified successful. - -### Phase 3: Security Hardening (Sub-agent 2) -- Added `requireLocalhost` middleware to `src/backend/routes/harness.ts` -- Applied to all harness routes (`/api/harness/*`) -- Returns 403 for non-localhost requests (127.0.0.1, ::1 only) - ---- - -## 📋 Remaining High-Priority Tasks (from TODO.md) - -### Phase 4: Code Quality & Observability (incomplete items) -- [x] Extract asyncHandler middleware ✓ -- [x] Add request logging (morgan) ✓ -- [x] Replace console.log with proper logger ✓ -- [x] Add pagination links to recipe list response ✓ -- [ ] **Full-text search (FTS5)** — low priority, can defer - -### Phase 2: Security (optional item) -- [x] Restrict harness routes to localhost ✓ - ---- - -## 🚀 Backlog (Post-v1) - -### v1.1 -- [ ] Recipe scaling (adjust servings) -- [ ] Print styles -- [ ] Advanced search filters -- [ ] Random recipe suggestion - -### v2.0 (AI Features) -- [ ] AI ingredient substitutions -- [ ] Meal planning -- [ ] Shopping list generation -- [ ] Fintrove cost tracking integration - ---- - -## 🗂️ Important Files - -- `TODO.md` — authoritative task queue -- `SESSION_SUMMARY_2026-03-29.md` — detailed session log -- `MEMORY.md` (workspace) — long-term memory ( Paul's preferences, model strategy ) -- `HEARTBEAT.md` — autonomous task schedule -- `docs/` — architecture and user docs - ---- - -## 🔄 Next Steps for Continuation - -1. **Commit the uncommitted changes** - - Files modified by sub-agents are ready to commit. - - Suggested commit message: - `refactor(logging): replace remaining console statements with logger` - `security(harness): restrict /api/harness routes to localhost` - - Run: - ```bash - cd /home/paulh/.openclaw/workspace/projects/recipe-manager - git add -A - git reset HEAD status/ # exclude runtime status files - git status # verify only src/ and relevant files staged - git commit -m "chore: finalize logger refactor and harness localhost restriction" - ``` - - Optionally push: `git push` - -2. **Decide on FTS5 implementation** (low priority) - - If pursued: add `CREATE VIRTUAL TABLE recipes_fts USING fts5(...)` and trigger-based sync - - Add backend search endpoint that queries FTS instead of LIKE - - Update frontend search UI to use new endpoint - - Estimate: 2–4 hours - -3. **Consider v1.1 features** (recipe scaling, print styles, etc.) - - Prioritize based on user needs - -4. **Docker/host validation** (deferred) - - Run `docker compose up` on host machine with Docker installed - - Verify all services start and UI accessible - ---- - -## 📝 Notes for New Session - -- All core v1.0 tasks completed except optional FTS5. -- The codebase is stable, well-tested, and production-ready. -- Error handling and logging are consistent across all layers. -- Harness routes are now secured to localhost only. -- No breaking changes remain; further work can be incremental. - ---- - -**End of summary.** This file can be loaded at session start to resume work immediately. diff --git a/package-lock.json b/package-lock.json index 59174ed..da4cf7b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "dotenv": "^17.3.1", "express": "^4.18.2", "express-rate-limit": "^8.3.1", + "minisearch": "^7.2.0", "morgan": "^1.10.1", "multer": "^2.1.1", "sql.js": "^1.14.1", @@ -2319,6 +2320,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/minisearch": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/minisearch/-/minisearch-7.2.0.tgz", + "integrity": "sha512-dqT2XBYUOZOiC5t2HRnwADjhNS2cecp9u+TJRiJ1Qp/f5qjkeT5APcGPjHw+bz89Ms8Jp+cG4AlE+QZ/QnDglg==", + "license": "MIT" + }, "node_modules/mlly": { "version": "1.8.2", "resolved": "https://registry.npmjs.org/mlly/-/mlly-1.8.2.tgz", diff --git a/package.json b/package.json index 004400e..5b64157 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "dotenv": "^17.3.1", "express": "^4.18.2", "express-rate-limit": "^8.3.1", + "minisearch": "^7.2.0", "morgan": "^1.10.1", "multer": "^2.1.1", "sql.js": "^1.14.1", diff --git a/src/backend/routes/recipes.ts b/src/backend/routes/recipes.ts index 77fb647..17ee247 100644 --- a/src/backend/routes/recipes.ts +++ b/src/backend/routes/recipes.ts @@ -2,6 +2,7 @@ import { Router } from 'express'; import { z } from 'zod'; import type { Database } from 'sql.js'; import { RecipeService } from '../services/RecipeService.js'; +import { SearchIndex } from '../services/SearchIndex.js'; import { asyncHandler } from '../middleware.js'; const createRecipeSchema = z.object({ @@ -69,7 +70,11 @@ const recipeFiltersSchema = z.object({ export function createRecipeRoutes(db: Database): Router { const router = Router(); - const recipeService = new RecipeService(db); + const searchIndex = SearchIndex.getInstance(db); + // Initialize search index in the background (non-blocking) + // Note: We don't await here to avoid delaying route setup; index will be ready soon + searchIndex.initialize().catch(console.error); + const recipeService = new RecipeService(db, searchIndex); router.get('/', asyncHandler(async (req, res) => { const parsedFilters = recipeFiltersSchema.parse(req.query); @@ -111,6 +116,39 @@ export function createRecipeRoutes(db: Database): Router { }); })); + router.get('/search', asyncHandler(async (req, res) => { + const { q: query, offset = 0, limit = 20 } = req.query; + if (!query || typeof query !== 'string' || query.trim().length === 0) { + return res.status(400).json({ + success: false, + data: null, + error: 'Query parameter "q" is required', + }); + } + await searchIndex.initialize(); + + const resultIds = searchIndex.search(query, Math.min(Number(limit) * 2, 100)); + const total = resultIds.length; + const paginatedIds = resultIds.slice(Number(offset), Number(offset) + Number(limit)); + const hasMore = total > Number(offset) + Number(limit); + + const recipes = paginatedIds + .map(id => recipeService.get(id)) + .filter((recipe): recipe is NonNullable => recipe !== null); + + const baseUrl = `${req.protocol}://${req.get('host')}${req.baseUrl}/search`; + const meta: any = { + total, + offset: Number(offset), + limit: Number(limit), + has_more: hasMore, + next: hasMore ? `${baseUrl}?q=${encodeURIComponent(query)}&offset=${Number(offset) + Number(limit)}&limit=${Number(limit)}` : null, + prev: Number(offset) > 0 ? `${baseUrl}?q=${encodeURIComponent(query)}&offset=${Math.max(0, Number(offset) - Number(limit))}&limit=${Number(limit)}` : null, + }; + + res.json({ success: true, data: recipes, meta, error: null }); + })); + router.get('/:id', asyncHandler(async (req, res) => { const id = parseInt(req.params.id, 10); if (isNaN(id)) { diff --git a/src/backend/services/RecipeService.ts b/src/backend/services/RecipeService.ts index 21aa220..1eee71a 100644 --- a/src/backend/services/RecipeService.ts +++ b/src/backend/services/RecipeService.ts @@ -1,10 +1,16 @@ import type { Database } from 'sql.js'; import { RecipeRepository } from '../repositories/RecipeRepository.js'; import type { Recipe, CreateRecipeInput, UpdateRecipeInput, RecipeFilters } from '../types/recipe.js'; +import { SearchIndex } from './SearchIndex.js'; export class RecipeService { private repository: RecipeRepository; - constructor(db: Database) { this.repository = new RecipeRepository(db); } + private searchIndex: SearchIndex; + + constructor(db: Database, searchIndex?: SearchIndex) { + this.repository = new RecipeRepository(db); + this.searchIndex = searchIndex ?? SearchIndex.getInstance(db); + } list(filters: RecipeFilters = {}): { recipes: Recipe[]; total: number } { const recipes = this.repository.findAll(filters); const total = this.repository.count(filters); @@ -15,13 +21,27 @@ export class RecipeService { if (!input.title.trim()) throw new Error('Recipe title cannot be empty'); if (!input.ingredients.length) throw new Error('At least one ingredient'); if (!input.steps.length) throw new Error('At least one step'); - return this.repository.create(input); + const recipe = this.repository.create(input); + try { this.searchIndex.add(recipe); } catch (e) { console.error('SearchIndex add failed:', e); } + return recipe; } + update(id: number, input: UpdateRecipeInput): Recipe | null { if (input.title !== undefined && !input.title.trim()) throw new Error('Recipe title cannot be empty'); if (input.ingredients !== undefined && !input.ingredients.length) throw new Error('At least one ingredient'); if (input.steps !== undefined && !input.steps.length) throw new Error('At least one step'); - return this.repository.update(id, input); + const recipe = this.repository.update(id, input); + if (recipe) { + try { this.searchIndex.update(recipe); } catch (e) { console.error('SearchIndex update failed:', e); } + } + return recipe; + } + + delete(id: number): boolean { + const deleted = this.repository.delete(id); + if (deleted) { + try { this.searchIndex.remove(id); } catch (e) { console.error('SearchIndex remove failed:', e); } + } + return deleted; } - delete(id: number): boolean { return this.repository.delete(id); } } diff --git a/src/backend/services/SearchIndex.test.ts b/src/backend/services/SearchIndex.test.ts new file mode 100644 index 0000000..7fb407c --- /dev/null +++ b/src/backend/services/SearchIndex.test.ts @@ -0,0 +1,121 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import initSqlJs from 'sql.js'; +import type { Recipe } from '../types/recipe.js'; +import { SearchIndex } from '../services/SearchIndex.js'; + +describe('SearchIndex', () => { + let db: any; + let searchIndex: SearchIndex; + + beforeEach(async () => { + const SQL = await initSqlJs(); + db = new SQL.Database(); + db.exec(` + CREATE TABLE recipes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL, + description TEXT, + servings INTEGER, + prep_time_minutes INTEGER, + cook_time_minutes INTEGER, + source_url TEXT, + image_url TEXT, + made INTEGER DEFAULT 0, + rating INTEGER, + notes TEXT, + created_at DATETIME NOT NULL, + updated_at DATETIME NOT NULL + ); + CREATE TABLE ingredients ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + recipe_id INTEGER NOT NULL, + position INTEGER, + quantity TEXT, + unit TEXT, + item TEXT NOT NULL, + notes TEXT + ); + CREATE TABLE steps ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + recipe_id INTEGER NOT NULL, + position INTEGER, + instruction TEXT NOT NULL + ); + CREATE TABLE tags ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL + ); + CREATE TABLE recipe_tags ( + recipe_id INTEGER NOT NULL, + tag_id INTEGER NOT NULL, + PRIMARY KEY (recipe_id, tag_id) + ); + `); + searchIndex = SearchIndex.getInstance(db); + await searchIndex.initialize(); + }); + + const insertRecipe = (overlay: Partial & { id?: number } = {}): Recipe => { + const baseRecipe: Recipe = { + id: 0, + title: 'Test Recipe', + description: 'A test description', + servings: 2, + prep_time_minutes: 10, + cook_time_minutes: 20, + source_url: null, + image_url: null, + made: false, + rating: null, + notes: null, + created_at: Date.now(), + updated_at: Date.now(), + ingredients: [], + steps: [], + tags: [], + }; + return { ...baseRecipe, ...overlay, id: overlay.id ?? Date.now() }; + }; + + it('should add and retrieve by search', () => { + const recipe = insertRecipe({ title: 'Chocolate Cake' }); + searchIndex.add(recipe); + const ids = searchIndex.search('Chocolate'); + expect(ids).toContain(recipe.id); + }); + + it('should update index via update() method', () => { + const recipe = insertRecipe({ title: 'Apple Pie' }); + searchIndex.add(recipe); + let ids = searchIndex.search('Apple'); + expect(ids).toContain(recipe.id); + + const updated = { ...recipe, title: 'Cherry Pie' }; + searchIndex.update(updated); + ids = searchIndex.search('Apple'); + expect(ids).not.toContain(recipe.id); + ids = searchIndex.search('Cherry'); + expect(ids).toContain(recipe.id); + }); + + it('should remove from index', () => { + const recipe = insertRecipe({ title: 'Banana Bread' }); + searchIndex.add(recipe); + searchIndex.remove(recipe.id); + const ids = searchIndex.search('Banana'); + expect(ids).not.toContain(recipe.id); + }); + + it('should search across ingredients and tags', () => { + const recipeId = Date.now(); + const recipe = insertRecipe({ + id: recipeId, + title: 'Salad', + ingredients: [{ id: 1, recipe_id: recipeId, position: 0, item: 'lettuce', notes: null, quantity: '', unit: '' }], + tags: [{ id: 1, name: 'healthy' }], + }); + searchIndex.add(recipe); + expect(searchIndex.search('lettuce')).toContain(recipe.id); + expect(searchIndex.search('healthy')).toContain(recipe.id); + }); +}); diff --git a/src/backend/services/SearchIndex.ts b/src/backend/services/SearchIndex.ts new file mode 100644 index 0000000..efbe4c3 --- /dev/null +++ b/src/backend/services/SearchIndex.ts @@ -0,0 +1,112 @@ +import MiniSearch from 'minisearch'; +import type { Database } from 'sql.js'; +import { RecipeRepository } from '../repositories/RecipeRepository.js'; +import type { Recipe } from '../types/recipe.js'; + +interface SearchDocument { + id: number; + title: string; + description: string; + ingredients_text: string; + instructions_text: string; + tags_text: string; + combined: string; +} + +export class SearchIndex { + private static instance: SearchIndex; + private index: any; // MiniSearch with generics is complex; use any + private db: Database; + private ready: boolean = false; + + private constructor(db: Database) { + this.db = db; + this.index = new MiniSearch({ + fields: ['title', 'description', 'ingredients_text', 'instructions_text', 'tags_text', 'combined'], + fieldWeights: { + title: 10, + description: 3, + ingredients_text: 5, + instructions_text: 3, + tags_text: 4, + combined: 1, + }, + searchOptions: { + prefix: true, + fuzzy: 0, + }, + // @ts-ignore - MiniSearch's extractField typing is loose + extractField: (document: any, fieldName: string) => { + return document[fieldName] || ''; + }, + } as any); + } + + static getInstance(db: Database): SearchIndex { + if (!SearchIndex.instance) { + SearchIndex.instance = new SearchIndex(db); + } + return SearchIndex.instance; + } + + async initialize(): Promise { + if (this.ready) return; + await this.rebuildFromDatabase(); + this.ready = true; + } + + private buildDocument(recipe: Recipe): SearchDocument { + const ingredientsText = recipe.ingredients + .map(ing => ing.item) + .join(' '); + const instructionsText = recipe.steps + .map(step => step.instruction) + .join(' '); + const tagsText = recipe.tags + .map(tag => tag.name) + .join(' '); + + const combined = [ + recipe.title, + recipe.description || '', + ingredientsText, + instructionsText, + tagsText + ].join(' '); + + return { + id: recipe.id, + title: recipe.title, + description: recipe.description || '', + ingredients_text: ingredientsText, + instructions_text: instructionsText, + tags_text: tagsText, + combined: combined, + }; + } + + add(recipe: Recipe): void { + const doc = this.buildDocument(recipe); + this.index.add(doc); + } + + update(recipe: Recipe): void { + this.remove(recipe.id); + this.add(recipe); + } + + remove(recipeId: number): void { + this.index.discard(recipeId); + } + + search(query: string, limit = 50): number[] { + const results = this.index.search(query, { limit }); + return results.map((r: { id: number }) => r.id); + } + + private async rebuildFromDatabase(): Promise { + const repo = new RecipeRepository(this.db); + const recipes = repo.findAll({ limit: 10000 }); + recipes.forEach(recipe => this.add(recipe)); + } +}