feat(import): normalize parsed recipe into internal draft format

This commit is contained in:
Paul Huliganga 2026-03-24 21:47:27 -04:00
parent 64a1229ff0
commit 3987083e46
3 changed files with 117 additions and 53 deletions

View File

@ -26,8 +26,8 @@ MVP is functionally complete (core app + docs + tests).
### Phase 1: URL Import Foundation ### Phase 1: URL Import Foundation
- [x] Add backend import endpoint: `POST /api/import/url` - [x] Add backend import endpoint: `POST /api/import/url`
- [ ] Implement Schema.org Recipe JSON-LD parser service - [x] Implement Schema.org Recipe JSON-LD parser service
- [ ] Normalize parsed recipe into internal Recipe draft format - [x] Normalize parsed recipe into internal Recipe draft format
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD) - [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
### Phase 2: Import UI ### Phase 2: Import UI

View File

@ -1,6 +1,22 @@
import { z } from 'zod'; import { z } from 'zod';
import type { CreateRecipeInput } from '../types/recipe.js'; import type { CreateRecipeInput } from '../types/recipe.js';
interface SchemaOrgHowToStep {
text?: string;
}
interface SchemaOrgRecipeCandidate {
'@type'?: string | string[];
name?: string;
description?: string | null;
recipeIngredient?: string[];
recipeInstructions?: string | string[] | SchemaOrgHowToStep[];
url?: string;
recipeYield?: string | number;
prepTime?: string;
cookTime?: string;
}
/** /**
* Parses and normalizes Schema.org Recipe JSON-LD blocks. * Parses and normalizes Schema.org Recipe JSON-LD blocks.
*/ */
@ -10,81 +26,98 @@ export class SchemaOrgRecipeParserService {
* Returns null if no valid Recipe is found. * Returns null if no valid Recipe is found.
*/ */
parseJsonLdBlock(json: string): CreateRecipeInput | null { parseJsonLdBlock(json: string): CreateRecipeInput | null {
let obj: unknown; let parsedJson: unknown;
try { try {
obj = JSON.parse(json); parsedJson = JSON.parse(json);
} catch (err) { } catch {
return null; // Malformed JSON block return null;
} }
// If block is an array, look for Recipe type within if (Array.isArray(parsedJson)) {
if (Array.isArray(obj)) { for (const entry of parsedJson) {
for (const entry of obj) { const parsedRecipe = this.tryParseRecipe(entry);
const parsed = this.tryParseRecipe(entry); if (parsedRecipe) return parsedRecipe;
if (parsed) return parsed;
} }
return null; return null;
} }
// Single object return this.tryParseRecipe(parsedJson);
return this.tryParseRecipe(obj);
} }
/** /**
* Internal: attempts to extract Recipe data from an object if @type matches. * Internal: attempts to extract Recipe data from an object if @type matches.
*/ */
private tryParseRecipe(obj: any): CreateRecipeInput | null { private tryParseRecipe(input: unknown): CreateRecipeInput | null {
// Match Schema.org Recipe by @type const recipeSchema = z.object({
if (!obj || (obj['@type'] !== 'Recipe' && !(Array.isArray(obj['@type']) && obj['@type'].includes('Recipe')))) { '@type': z.union([z.string(), z.array(z.string())]).optional(),
return null;
}
// Zod validation schema for supported fields
const RecipeSchema = z.object({
name: z.string().min(1), name: z.string().min(1),
description: z.string().optional().nullable(), description: z.string().optional().nullable(),
recipeIngredient: z.array(z.string()).optional(), recipeIngredient: z.array(z.string()).optional(),
recipeInstructions: z.union([ recipeInstructions: z
.union([
z.array(z.string()), z.array(z.string()),
z.string(), z.string(),
z.array(z.object({ text: z.string() })), z.array(z.object({ text: z.string().optional() })),
]).optional(), ])
.optional(),
url: z.string().optional(), url: z.string().optional(),
// Optionals for MVP only
recipeYield: z.union([z.string(), z.number()]).optional(), recipeYield: z.union([z.string(), z.number()]).optional(),
prepTime: z.string().optional(), prepTime: z.string().optional(),
cookTime: z.string().optional(), cookTime: z.string().optional(),
}); });
const parseResult = RecipeSchema.safeParse(obj); const parseResult = recipeSchema.safeParse(input);
if (!parseResult.success) return null; if (!parseResult.success) return null;
const schemaRecipe = parseResult.data;
// Normalize instructions into string[] const recipe = parseResult.data as SchemaOrgRecipeCandidate;
let instructions: string[] = []; if (!this.isRecipeType(recipe['@type'])) {
if (schemaRecipe.recipeInstructions) { return null;
if (typeof schemaRecipe.recipeInstructions === 'string') {
instructions = [schemaRecipe.recipeInstructions];
} else if (Array.isArray(schemaRecipe.recipeInstructions)) {
if (typeof schemaRecipe.recipeInstructions[0] === 'string') {
instructions = schemaRecipe.recipeInstructions.slice();
} else {
// Array of Step objects
instructions = (schemaRecipe.recipeInstructions as any[]).map((step) => step.text).filter(Boolean);
}
}
} }
// Normalize output to CreateRecipeInput shape return {
const result: CreateRecipeInput = { title: recipe.name!.trim(),
title: schemaRecipe.name.trim(), description: this.normalizeOptionalText(recipe.description),
description: schemaRecipe.description?.trim() || undefined, ingredients: this.normalizeTextList(recipe.recipeIngredient ?? []),
ingredients: schemaRecipe.recipeIngredient ?? [], instructions: this.normalizeInstructions(recipe.recipeInstructions),
instructions, source_url: this.normalizeOptionalText(recipe.url),
source_url: schemaRecipe.url,
// These could be parsed from recipeYield/prepTime/cookTime if desired
}; };
}
return result; private isRecipeType(type: string | string[] | undefined): boolean {
if (!type) return false;
if (typeof type === 'string') return type === 'Recipe';
return type.includes('Recipe');
}
private normalizeOptionalText(value: string | null | undefined): string | undefined {
if (!value) return undefined;
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : undefined;
}
private normalizeTextList(values: string[]): string[] {
return values
.map((value) => value.trim())
.filter((value) => value.length > 0);
}
private normalizeInstructions(
instructions: string | string[] | SchemaOrgHowToStep[] | undefined,
): string[] {
if (!instructions) return [];
if (typeof instructions === 'string') {
return this.normalizeTextList([instructions]);
}
if (instructions.length === 0) {
return [];
}
if (typeof instructions[0] === 'string') {
return this.normalizeTextList(instructions as string[]);
}
return this.normalizeTextList((instructions as SchemaOrgHowToStep[]).map((step) => step.text ?? ''));
} }
} }

View File

@ -55,8 +55,39 @@ describe('Import API', () => {
]); ]);
expect(response.body.data.draft_recipe).toMatchObject({ expect(response.body.data.draft_recipe).toMatchObject({
title: 'Pancakes', title: 'Pancakes',
ingredients: ["Flour", "Eggs"], ingredients: ['Flour', 'Eggs'],
instructions: ["Mix","Cook"] instructions: ['Mix', 'Cook']
});
});
it('should normalize whitespace and HowToStep instructions into draft format', async () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"@type":["Thing","Recipe"],"name":" Tomato Soup ","description":" Cozy weeknight soup. ","recipeIngredient":[" Tomato ",""," Salt "],"recipeInstructions":[{"text":" Simmer tomatoes. "},{"text":" Blend and serve. "}],"url":" https://example.com/soup "}</script>
</head>
</html>
`;
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
status: 200,
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
text: async () => html,
} as Response);
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/soup-page' })
.expect(200);
expect(response.body.success).toBe(true);
expect(response.body.data.draft_recipe).toEqual({
title: 'Tomato Soup',
description: 'Cozy weeknight soup.',
ingredients: ['Tomato', 'Salt'],
instructions: ['Simmer tomatoes.', 'Blend and serve.'],
source_url: 'https://example.com/soup'
}); });
}); });