From 3987083e46f952dc99130514782259b72e34c5fe Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 24 Mar 2026 21:47:27 -0400 Subject: [PATCH] feat(import): normalize parsed recipe into internal draft format --- TODO.md | 4 +- .../services/SchemaOrgRecipeParserService.ts | 131 +++++++++++------- src/backend/tests/import.test.ts | 35 ++++- 3 files changed, 117 insertions(+), 53 deletions(-) diff --git a/TODO.md b/TODO.md index 66c6ac9..ec8171d 100644 --- a/TODO.md +++ b/TODO.md @@ -26,8 +26,8 @@ MVP is functionally complete (core app + docs + tests). ### Phase 1: URL Import Foundation - [x] Add backend import endpoint: `POST /api/import/url` -- [ ] Implement Schema.org Recipe JSON-LD parser service -- [ ] Normalize parsed recipe into internal Recipe draft format +- [x] Implement Schema.org Recipe JSON-LD parser service +- [x] Normalize parsed recipe into internal Recipe draft format - [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD) ### Phase 2: Import UI diff --git a/src/backend/services/SchemaOrgRecipeParserService.ts b/src/backend/services/SchemaOrgRecipeParserService.ts index a033d7c..1ac8396 100644 --- a/src/backend/services/SchemaOrgRecipeParserService.ts +++ b/src/backend/services/SchemaOrgRecipeParserService.ts @@ -1,6 +1,22 @@ import { z } from 'zod'; import type { CreateRecipeInput } from '../types/recipe.js'; +interface SchemaOrgHowToStep { + text?: string; +} + +interface SchemaOrgRecipeCandidate { + '@type'?: string | string[]; + name?: string; + description?: string | null; + recipeIngredient?: string[]; + recipeInstructions?: string | string[] | SchemaOrgHowToStep[]; + url?: string; + recipeYield?: string | number; + prepTime?: string; + cookTime?: string; +} + /** * Parses and normalizes Schema.org Recipe JSON-LD blocks. */ @@ -10,81 +26,98 @@ export class SchemaOrgRecipeParserService { * Returns null if no valid Recipe is found. */ parseJsonLdBlock(json: string): CreateRecipeInput | null { - let obj: unknown; + let parsedJson: unknown; try { - obj = JSON.parse(json); - } catch (err) { - return null; // Malformed JSON block + parsedJson = JSON.parse(json); + } catch { + return null; } - // If block is an array, look for Recipe type within - if (Array.isArray(obj)) { - for (const entry of obj) { - const parsed = this.tryParseRecipe(entry); - if (parsed) return parsed; + if (Array.isArray(parsedJson)) { + for (const entry of parsedJson) { + const parsedRecipe = this.tryParseRecipe(entry); + if (parsedRecipe) return parsedRecipe; } return null; } - // Single object - return this.tryParseRecipe(obj); + return this.tryParseRecipe(parsedJson); } /** * Internal: attempts to extract Recipe data from an object if @type matches. */ - private tryParseRecipe(obj: any): CreateRecipeInput | null { - // Match Schema.org Recipe by @type - if (!obj || (obj['@type'] !== 'Recipe' && !(Array.isArray(obj['@type']) && obj['@type'].includes('Recipe')))) { - return null; - } - - // Zod validation schema for supported fields - const RecipeSchema = z.object({ + private tryParseRecipe(input: unknown): CreateRecipeInput | null { + const recipeSchema = z.object({ + '@type': z.union([z.string(), z.array(z.string())]).optional(), name: z.string().min(1), description: z.string().optional().nullable(), recipeIngredient: z.array(z.string()).optional(), - recipeInstructions: z.union([ - z.array(z.string()), - z.string(), - z.array(z.object({ text: z.string() })), - ]).optional(), + recipeInstructions: z + .union([ + z.array(z.string()), + z.string(), + z.array(z.object({ text: z.string().optional() })), + ]) + .optional(), url: z.string().optional(), - // Optionals for MVP only recipeYield: z.union([z.string(), z.number()]).optional(), prepTime: z.string().optional(), cookTime: z.string().optional(), }); - const parseResult = RecipeSchema.safeParse(obj); + const parseResult = recipeSchema.safeParse(input); if (!parseResult.success) return null; - const schemaRecipe = parseResult.data; - // Normalize instructions into string[] - let instructions: string[] = []; - if (schemaRecipe.recipeInstructions) { - if (typeof schemaRecipe.recipeInstructions === 'string') { - instructions = [schemaRecipe.recipeInstructions]; - } else if (Array.isArray(schemaRecipe.recipeInstructions)) { - if (typeof schemaRecipe.recipeInstructions[0] === 'string') { - instructions = schemaRecipe.recipeInstructions.slice(); - } else { - // Array of Step objects - instructions = (schemaRecipe.recipeInstructions as any[]).map((step) => step.text).filter(Boolean); - } - } + const recipe = parseResult.data as SchemaOrgRecipeCandidate; + if (!this.isRecipeType(recipe['@type'])) { + return null; } - // Normalize output to CreateRecipeInput shape - const result: CreateRecipeInput = { - title: schemaRecipe.name.trim(), - description: schemaRecipe.description?.trim() || undefined, - ingredients: schemaRecipe.recipeIngredient ?? [], - instructions, - source_url: schemaRecipe.url, - // These could be parsed from recipeYield/prepTime/cookTime if desired + return { + title: recipe.name!.trim(), + description: this.normalizeOptionalText(recipe.description), + ingredients: this.normalizeTextList(recipe.recipeIngredient ?? []), + instructions: this.normalizeInstructions(recipe.recipeInstructions), + source_url: this.normalizeOptionalText(recipe.url), }; + } - return result; + private isRecipeType(type: string | string[] | undefined): boolean { + if (!type) return false; + if (typeof type === 'string') return type === 'Recipe'; + return type.includes('Recipe'); + } + + private normalizeOptionalText(value: string | null | undefined): string | undefined { + if (!value) return undefined; + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; + } + + private normalizeTextList(values: string[]): string[] { + return values + .map((value) => value.trim()) + .filter((value) => value.length > 0); + } + + private normalizeInstructions( + instructions: string | string[] | SchemaOrgHowToStep[] | undefined, + ): string[] { + if (!instructions) return []; + + if (typeof instructions === 'string') { + return this.normalizeTextList([instructions]); + } + + if (instructions.length === 0) { + return []; + } + + if (typeof instructions[0] === 'string') { + return this.normalizeTextList(instructions as string[]); + } + + return this.normalizeTextList((instructions as SchemaOrgHowToStep[]).map((step) => step.text ?? '')); } } diff --git a/src/backend/tests/import.test.ts b/src/backend/tests/import.test.ts index 78d85cb..e578491 100644 --- a/src/backend/tests/import.test.ts +++ b/src/backend/tests/import.test.ts @@ -55,8 +55,39 @@ describe('Import API', () => { ]); expect(response.body.data.draft_recipe).toMatchObject({ title: 'Pancakes', - ingredients: ["Flour", "Eggs"], - instructions: ["Mix","Cook"] + ingredients: ['Flour', 'Eggs'], + instructions: ['Mix', 'Cook'] + }); + }); + + it('should normalize whitespace and HowToStep instructions into draft format', async () => { + const html = ` + + + + + + `; + + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + status: 200, + headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }), + text: async () => html, + } as Response); + + const response = await request(app) + .post('/api/import/url') + .send({ url: 'https://example.com/soup-page' }) + .expect(200); + + expect(response.body.success).toBe(true); + expect(response.body.data.draft_recipe).toEqual({ + title: 'Tomato Soup', + description: 'Cozy weeknight soup.', + ingredients: ['Tomato', 'Salt'], + instructions: ['Simmer tomatoes.', 'Blend and serve.'], + source_url: 'https://example.com/soup' }); });