feat(import): implement Schema.org Recipe JSON-LD parser and draft normalization for import endpoint
This commit is contained in:
parent
146dc3325a
commit
64a1229ff0
|
|
@ -1,6 +1,7 @@
|
||||||
import { Router } from 'express';
|
import { Router } from 'express';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import { UrlImportService } from '../services/UrlImportService.js';
|
import { UrlImportService } from '../services/UrlImportService.js';
|
||||||
|
import { SchemaOrgRecipeParserService } from '../services/SchemaOrgRecipeParserService.js';
|
||||||
|
|
||||||
const importUrlSchema = z.object({
|
const importUrlSchema = z.object({
|
||||||
url: z.string().url('A valid URL is required'),
|
url: z.string().url('A valid URL is required'),
|
||||||
|
|
@ -9,19 +10,27 @@ const importUrlSchema = z.object({
|
||||||
export function createImportRoutes(): Router {
|
export function createImportRoutes(): Router {
|
||||||
const router = Router();
|
const router = Router();
|
||||||
const urlImportService = new UrlImportService();
|
const urlImportService = new UrlImportService();
|
||||||
|
const schemaOrgParser = new SchemaOrgRecipeParserService();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* POST /api/import/url
|
* POST /api/import/url
|
||||||
* Fetch an external recipe page and return URL import foundation data.
|
* Fetch an external recipe page and return imported, normalized Recipe (if found)
|
||||||
*/
|
*/
|
||||||
router.post('/url', async (req, res) => {
|
router.post('/url', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { url } = importUrlSchema.parse(req.body);
|
const { url } = importUrlSchema.parse(req.body);
|
||||||
const result = await urlImportService.fetchFromUrl(url);
|
const result = await urlImportService.fetchFromUrl(url);
|
||||||
|
|
||||||
|
// Try to parse and normalize Recipe from JSON-LD blocks
|
||||||
|
let draft: any = null;
|
||||||
|
for (const block of result.json_ld_blocks) {
|
||||||
|
draft = schemaOrgParser.parseJsonLdBlock(block);
|
||||||
|
if (draft) break;
|
||||||
|
}
|
||||||
|
|
||||||
res.status(200).json({
|
res.status(200).json({
|
||||||
success: true,
|
success: true,
|
||||||
data: result,
|
data: { ...result, draft_recipe: draft },
|
||||||
error: null,
|
error: null,
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,90 @@
|
||||||
|
import { z } from 'zod';
|
||||||
|
import type { CreateRecipeInput } from '../types/recipe.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses and normalizes Schema.org Recipe JSON-LD blocks.
|
||||||
|
*/
|
||||||
|
export class SchemaOrgRecipeParserService {
|
||||||
|
/**
|
||||||
|
* Extracts and normalizes a Recipe, if present, from a JSON-LD string.
|
||||||
|
* Returns null if no valid Recipe is found.
|
||||||
|
*/
|
||||||
|
parseJsonLdBlock(json: string): CreateRecipeInput | null {
|
||||||
|
let obj: unknown;
|
||||||
|
try {
|
||||||
|
obj = JSON.parse(json);
|
||||||
|
} catch (err) {
|
||||||
|
return null; // Malformed JSON block
|
||||||
|
}
|
||||||
|
|
||||||
|
// If block is an array, look for Recipe type within
|
||||||
|
if (Array.isArray(obj)) {
|
||||||
|
for (const entry of obj) {
|
||||||
|
const parsed = this.tryParseRecipe(entry);
|
||||||
|
if (parsed) return parsed;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Single object
|
||||||
|
return this.tryParseRecipe(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal: attempts to extract Recipe data from an object if @type matches.
|
||||||
|
*/
|
||||||
|
private tryParseRecipe(obj: any): CreateRecipeInput | null {
|
||||||
|
// Match Schema.org Recipe by @type
|
||||||
|
if (!obj || (obj['@type'] !== 'Recipe' && !(Array.isArray(obj['@type']) && obj['@type'].includes('Recipe')))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zod validation schema for supported fields
|
||||||
|
const RecipeSchema = z.object({
|
||||||
|
name: z.string().min(1),
|
||||||
|
description: z.string().optional().nullable(),
|
||||||
|
recipeIngredient: z.array(z.string()).optional(),
|
||||||
|
recipeInstructions: z.union([
|
||||||
|
z.array(z.string()),
|
||||||
|
z.string(),
|
||||||
|
z.array(z.object({ text: z.string() })),
|
||||||
|
]).optional(),
|
||||||
|
url: z.string().optional(),
|
||||||
|
// Optionals for MVP only
|
||||||
|
recipeYield: z.union([z.string(), z.number()]).optional(),
|
||||||
|
prepTime: z.string().optional(),
|
||||||
|
cookTime: z.string().optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const parseResult = RecipeSchema.safeParse(obj);
|
||||||
|
if (!parseResult.success) return null;
|
||||||
|
const schemaRecipe = parseResult.data;
|
||||||
|
|
||||||
|
// Normalize instructions into string[]
|
||||||
|
let instructions: string[] = [];
|
||||||
|
if (schemaRecipe.recipeInstructions) {
|
||||||
|
if (typeof schemaRecipe.recipeInstructions === 'string') {
|
||||||
|
instructions = [schemaRecipe.recipeInstructions];
|
||||||
|
} else if (Array.isArray(schemaRecipe.recipeInstructions)) {
|
||||||
|
if (typeof schemaRecipe.recipeInstructions[0] === 'string') {
|
||||||
|
instructions = schemaRecipe.recipeInstructions.slice();
|
||||||
|
} else {
|
||||||
|
// Array of Step objects
|
||||||
|
instructions = (schemaRecipe.recipeInstructions as any[]).map((step) => step.text).filter(Boolean);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize output to CreateRecipeInput shape
|
||||||
|
const result: CreateRecipeInput = {
|
||||||
|
title: schemaRecipe.name.trim(),
|
||||||
|
description: schemaRecipe.description?.trim() || undefined,
|
||||||
|
ingredients: schemaRecipe.recipeIngredient ?? [],
|
||||||
|
instructions,
|
||||||
|
source_url: schemaRecipe.url,
|
||||||
|
// These could be parsed from recipeYield/prepTime/cookTime if desired
|
||||||
|
};
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -26,11 +26,11 @@ describe('Import API', () => {
|
||||||
expect(response.body.error).toBeDefined();
|
expect(response.body.error).toBeDefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return imported foundation data for valid HTML', async () => {
|
it('should return imported foundation data and normalized draft for valid Schema.org recipe', async () => {
|
||||||
const html = `
|
const html = `
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<script type="application/ld+json">{"@type":"Recipe","name":"Pancakes"}</script>
|
<script type="application/ld+json">{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}</script>
|
||||||
</head>
|
</head>
|
||||||
<body>Hello</body>
|
<body>Hello</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
@ -51,8 +51,38 @@ describe('Import API', () => {
|
||||||
expect(response.body.success).toBe(true);
|
expect(response.body.success).toBe(true);
|
||||||
expect(response.body.data.source_url).toBe('https://example.com/recipe');
|
expect(response.body.data.source_url).toBe('https://example.com/recipe');
|
||||||
expect(response.body.data.json_ld_blocks).toEqual([
|
expect(response.body.data.json_ld_blocks).toEqual([
|
||||||
'{"@type":"Recipe","name":"Pancakes"}',
|
'{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}'
|
||||||
]);
|
]);
|
||||||
|
expect(response.body.data.draft_recipe).toMatchObject({
|
||||||
|
title: 'Pancakes',
|
||||||
|
ingredients: ["Flour", "Eggs"],
|
||||||
|
instructions: ["Mix","Cook"]
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return draft_recipe as null for non-recipe JSON-LD', async () => {
|
||||||
|
const html = `
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<script type="application/ld+json">{"@type":"Event","name":"Not a Recipe"}</script>
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
|
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
||||||
|
text: async () => html,
|
||||||
|
} as Response);
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/import/url')
|
||||||
|
.send({ url: 'https://example.com/event' })
|
||||||
|
.expect(200);
|
||||||
|
|
||||||
|
expect(response.body.success).toBe(true);
|
||||||
|
expect(response.body.data.draft_recipe).toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return an error for non-HTML responses', async () => {
|
it('should return an error for non-HTML responses', async () => {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue