From 64a1229ff0ceadd8edd1ab42e1abc0403ed24482 Mon Sep 17 00:00:00 2001 From: Paul Huliganga Date: Tue, 24 Mar 2026 20:37:46 -0400 Subject: [PATCH] feat(import): implement Schema.org Recipe JSON-LD parser and draft normalization for import endpoint --- src/backend/routes/import.ts | 13 ++- .../services/SchemaOrgRecipeParserService.ts | 90 +++++++++++++++++++ src/backend/tests/import.test.ts | 36 +++++++- 3 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 src/backend/services/SchemaOrgRecipeParserService.ts diff --git a/src/backend/routes/import.ts b/src/backend/routes/import.ts index d14032c..0e668ab 100644 --- a/src/backend/routes/import.ts +++ b/src/backend/routes/import.ts @@ -1,6 +1,7 @@ import { Router } from 'express'; import { z } from 'zod'; import { UrlImportService } from '../services/UrlImportService.js'; +import { SchemaOrgRecipeParserService } from '../services/SchemaOrgRecipeParserService.js'; const importUrlSchema = z.object({ url: z.string().url('A valid URL is required'), @@ -9,19 +10,27 @@ const importUrlSchema = z.object({ export function createImportRoutes(): Router { const router = Router(); const urlImportService = new UrlImportService(); + const schemaOrgParser = new SchemaOrgRecipeParserService(); /** * POST /api/import/url - * Fetch an external recipe page and return URL import foundation data. + * Fetch an external recipe page and return imported, normalized Recipe (if found) */ router.post('/url', async (req, res) => { try { const { url } = importUrlSchema.parse(req.body); const result = await urlImportService.fetchFromUrl(url); + // Try to parse and normalize Recipe from JSON-LD blocks + let draft: any = null; + for (const block of result.json_ld_blocks) { + draft = schemaOrgParser.parseJsonLdBlock(block); + if (draft) break; + } + res.status(200).json({ success: true, - data: result, + data: { ...result, draft_recipe: draft }, error: null, }); } catch (error) { diff --git a/src/backend/services/SchemaOrgRecipeParserService.ts b/src/backend/services/SchemaOrgRecipeParserService.ts new file mode 100644 index 0000000..a033d7c --- /dev/null +++ b/src/backend/services/SchemaOrgRecipeParserService.ts @@ -0,0 +1,90 @@ +import { z } from 'zod'; +import type { CreateRecipeInput } from '../types/recipe.js'; + +/** + * Parses and normalizes Schema.org Recipe JSON-LD blocks. + */ +export class SchemaOrgRecipeParserService { + /** + * Extracts and normalizes a Recipe, if present, from a JSON-LD string. + * Returns null if no valid Recipe is found. + */ + parseJsonLdBlock(json: string): CreateRecipeInput | null { + let obj: unknown; + try { + obj = JSON.parse(json); + } catch (err) { + return null; // Malformed JSON block + } + + // If block is an array, look for Recipe type within + if (Array.isArray(obj)) { + for (const entry of obj) { + const parsed = this.tryParseRecipe(entry); + if (parsed) return parsed; + } + return null; + } + + // Single object + return this.tryParseRecipe(obj); + } + + /** + * Internal: attempts to extract Recipe data from an object if @type matches. + */ + private tryParseRecipe(obj: any): CreateRecipeInput | null { + // Match Schema.org Recipe by @type + if (!obj || (obj['@type'] !== 'Recipe' && !(Array.isArray(obj['@type']) && obj['@type'].includes('Recipe')))) { + return null; + } + + // Zod validation schema for supported fields + const RecipeSchema = z.object({ + name: z.string().min(1), + description: z.string().optional().nullable(), + recipeIngredient: z.array(z.string()).optional(), + recipeInstructions: z.union([ + z.array(z.string()), + z.string(), + z.array(z.object({ text: z.string() })), + ]).optional(), + url: z.string().optional(), + // Optionals for MVP only + recipeYield: z.union([z.string(), z.number()]).optional(), + prepTime: z.string().optional(), + cookTime: z.string().optional(), + }); + + const parseResult = RecipeSchema.safeParse(obj); + if (!parseResult.success) return null; + const schemaRecipe = parseResult.data; + + // Normalize instructions into string[] + let instructions: string[] = []; + if (schemaRecipe.recipeInstructions) { + if (typeof schemaRecipe.recipeInstructions === 'string') { + instructions = [schemaRecipe.recipeInstructions]; + } else if (Array.isArray(schemaRecipe.recipeInstructions)) { + if (typeof schemaRecipe.recipeInstructions[0] === 'string') { + instructions = schemaRecipe.recipeInstructions.slice(); + } else { + // Array of Step objects + instructions = (schemaRecipe.recipeInstructions as any[]).map((step) => step.text).filter(Boolean); + } + } + } + + // Normalize output to CreateRecipeInput shape + const result: CreateRecipeInput = { + title: schemaRecipe.name.trim(), + description: schemaRecipe.description?.trim() || undefined, + ingredients: schemaRecipe.recipeIngredient ?? [], + instructions, + source_url: schemaRecipe.url, + // These could be parsed from recipeYield/prepTime/cookTime if desired + }; + + return result; + } +} diff --git a/src/backend/tests/import.test.ts b/src/backend/tests/import.test.ts index b9e3ccc..78d85cb 100644 --- a/src/backend/tests/import.test.ts +++ b/src/backend/tests/import.test.ts @@ -26,11 +26,11 @@ describe('Import API', () => { expect(response.body.error).toBeDefined(); }); - it('should return imported foundation data for valid HTML', async () => { + it('should return imported foundation data and normalized draft for valid Schema.org recipe', async () => { const html = ` - + Hello @@ -51,8 +51,38 @@ describe('Import API', () => { expect(response.body.success).toBe(true); expect(response.body.data.source_url).toBe('https://example.com/recipe'); expect(response.body.data.json_ld_blocks).toEqual([ - '{"@type":"Recipe","name":"Pancakes"}', + '{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}' ]); + expect(response.body.data.draft_recipe).toMatchObject({ + title: 'Pancakes', + ingredients: ["Flour", "Eggs"], + instructions: ["Mix","Cook"] + }); + }); + + it('should return draft_recipe as null for non-recipe JSON-LD', async () => { + const html = ` + + + + + + `; + + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + status: 200, + headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }), + text: async () => html, + } as Response); + + const response = await request(app) + .post('/api/import/url') + .send({ url: 'https://example.com/event' }) + .expect(200); + + expect(response.body.success).toBe(true); + expect(response.body.data.draft_recipe).toBeNull(); }); it('should return an error for non-HTML responses', async () => {