feat(import): implement Schema.org Recipe JSON-LD parser and draft normalization for import endpoint

This commit is contained in:
Paul Huliganga 2026-03-24 20:37:46 -04:00
parent 146dc3325a
commit 64a1229ff0
3 changed files with 134 additions and 5 deletions

View File

@ -1,6 +1,7 @@
import { Router } from 'express';
import { z } from 'zod';
import { UrlImportService } from '../services/UrlImportService.js';
import { SchemaOrgRecipeParserService } from '../services/SchemaOrgRecipeParserService.js';
const importUrlSchema = z.object({
url: z.string().url('A valid URL is required'),
@ -9,19 +10,27 @@ const importUrlSchema = z.object({
export function createImportRoutes(): Router {
const router = Router();
const urlImportService = new UrlImportService();
const schemaOrgParser = new SchemaOrgRecipeParserService();
/**
* POST /api/import/url
* Fetch an external recipe page and return URL import foundation data.
* Fetch an external recipe page and return imported, normalized Recipe (if found)
*/
router.post('/url', async (req, res) => {
try {
const { url } = importUrlSchema.parse(req.body);
const result = await urlImportService.fetchFromUrl(url);
// Try to parse and normalize Recipe from JSON-LD blocks
let draft: any = null;
for (const block of result.json_ld_blocks) {
draft = schemaOrgParser.parseJsonLdBlock(block);
if (draft) break;
}
res.status(200).json({
success: true,
data: result,
data: { ...result, draft_recipe: draft },
error: null,
});
} catch (error) {

View File

@ -0,0 +1,90 @@
import { z } from 'zod';
import type { CreateRecipeInput } from '../types/recipe.js';
/**
* Parses and normalizes Schema.org Recipe JSON-LD blocks.
*/
export class SchemaOrgRecipeParserService {
/**
* Extracts and normalizes a Recipe, if present, from a JSON-LD string.
* Returns null if no valid Recipe is found.
*/
parseJsonLdBlock(json: string): CreateRecipeInput | null {
let obj: unknown;
try {
obj = JSON.parse(json);
} catch (err) {
return null; // Malformed JSON block
}
// If block is an array, look for Recipe type within
if (Array.isArray(obj)) {
for (const entry of obj) {
const parsed = this.tryParseRecipe(entry);
if (parsed) return parsed;
}
return null;
}
// Single object
return this.tryParseRecipe(obj);
}
/**
* Internal: attempts to extract Recipe data from an object if @type matches.
*/
private tryParseRecipe(obj: any): CreateRecipeInput | null {
// Match Schema.org Recipe by @type
if (!obj || (obj['@type'] !== 'Recipe' && !(Array.isArray(obj['@type']) && obj['@type'].includes('Recipe')))) {
return null;
}
// Zod validation schema for supported fields
const RecipeSchema = z.object({
name: z.string().min(1),
description: z.string().optional().nullable(),
recipeIngredient: z.array(z.string()).optional(),
recipeInstructions: z.union([
z.array(z.string()),
z.string(),
z.array(z.object({ text: z.string() })),
]).optional(),
url: z.string().optional(),
// Optionals for MVP only
recipeYield: z.union([z.string(), z.number()]).optional(),
prepTime: z.string().optional(),
cookTime: z.string().optional(),
});
const parseResult = RecipeSchema.safeParse(obj);
if (!parseResult.success) return null;
const schemaRecipe = parseResult.data;
// Normalize instructions into string[]
let instructions: string[] = [];
if (schemaRecipe.recipeInstructions) {
if (typeof schemaRecipe.recipeInstructions === 'string') {
instructions = [schemaRecipe.recipeInstructions];
} else if (Array.isArray(schemaRecipe.recipeInstructions)) {
if (typeof schemaRecipe.recipeInstructions[0] === 'string') {
instructions = schemaRecipe.recipeInstructions.slice();
} else {
// Array of Step objects
instructions = (schemaRecipe.recipeInstructions as any[]).map((step) => step.text).filter(Boolean);
}
}
}
// Normalize output to CreateRecipeInput shape
const result: CreateRecipeInput = {
title: schemaRecipe.name.trim(),
description: schemaRecipe.description?.trim() || undefined,
ingredients: schemaRecipe.recipeIngredient ?? [],
instructions,
source_url: schemaRecipe.url,
// These could be parsed from recipeYield/prepTime/cookTime if desired
};
return result;
}
}

View File

@ -26,11 +26,11 @@ describe('Import API', () => {
expect(response.body.error).toBeDefined();
});
it('should return imported foundation data for valid HTML', async () => {
it('should return imported foundation data and normalized draft for valid Schema.org recipe', async () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"@type":"Recipe","name":"Pancakes"}</script>
<script type="application/ld+json">{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}</script>
</head>
<body>Hello</body>
</html>
@ -51,8 +51,38 @@ describe('Import API', () => {
expect(response.body.success).toBe(true);
expect(response.body.data.source_url).toBe('https://example.com/recipe');
expect(response.body.data.json_ld_blocks).toEqual([
'{"@type":"Recipe","name":"Pancakes"}',
'{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}'
]);
expect(response.body.data.draft_recipe).toMatchObject({
title: 'Pancakes',
ingredients: ["Flour", "Eggs"],
instructions: ["Mix","Cook"]
});
});
it('should return draft_recipe as null for non-recipe JSON-LD', async () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"@type":"Event","name":"Not a Recipe"}</script>
</head>
</html>
`;
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
status: 200,
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
text: async () => html,
} as Response);
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/event' })
.expect(200);
expect(response.body.success).toBe(true);
expect(response.body.data.draft_recipe).toBeNull();
});
it('should return an error for non-HTML responses', async () => {