feat(import): normalize parsed recipe into internal draft format

This commit is contained in:
Paul Huliganga 2026-03-24 21:47:27 -04:00
parent 64a1229ff0
commit 3987083e46
3 changed files with 117 additions and 53 deletions

View File

@ -26,8 +26,8 @@ MVP is functionally complete (core app + docs + tests).
### Phase 1: URL Import Foundation
- [x] Add backend import endpoint: `POST /api/import/url`
- [ ] Implement Schema.org Recipe JSON-LD parser service
- [ ] Normalize parsed recipe into internal Recipe draft format
- [x] Implement Schema.org Recipe JSON-LD parser service
- [x] Normalize parsed recipe into internal Recipe draft format
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
### Phase 2: Import UI

View File

@ -1,6 +1,22 @@
import { z } from 'zod';
import type { CreateRecipeInput } from '../types/recipe.js';
interface SchemaOrgHowToStep {
text?: string;
}
interface SchemaOrgRecipeCandidate {
'@type'?: string | string[];
name?: string;
description?: string | null;
recipeIngredient?: string[];
recipeInstructions?: string | string[] | SchemaOrgHowToStep[];
url?: string;
recipeYield?: string | number;
prepTime?: string;
cookTime?: string;
}
/**
* Parses and normalizes Schema.org Recipe JSON-LD blocks.
*/
@ -10,81 +26,98 @@ export class SchemaOrgRecipeParserService {
* Returns null if no valid Recipe is found.
*/
parseJsonLdBlock(json: string): CreateRecipeInput | null {
let obj: unknown;
let parsedJson: unknown;
try {
obj = JSON.parse(json);
} catch (err) {
return null; // Malformed JSON block
parsedJson = JSON.parse(json);
} catch {
return null;
}
// If block is an array, look for Recipe type within
if (Array.isArray(obj)) {
for (const entry of obj) {
const parsed = this.tryParseRecipe(entry);
if (parsed) return parsed;
if (Array.isArray(parsedJson)) {
for (const entry of parsedJson) {
const parsedRecipe = this.tryParseRecipe(entry);
if (parsedRecipe) return parsedRecipe;
}
return null;
}
// Single object
return this.tryParseRecipe(obj);
return this.tryParseRecipe(parsedJson);
}
/**
* Internal: attempts to extract Recipe data from an object if @type matches.
*/
private tryParseRecipe(obj: any): CreateRecipeInput | null {
// Match Schema.org Recipe by @type
if (!obj || (obj['@type'] !== 'Recipe' && !(Array.isArray(obj['@type']) && obj['@type'].includes('Recipe')))) {
return null;
}
// Zod validation schema for supported fields
const RecipeSchema = z.object({
private tryParseRecipe(input: unknown): CreateRecipeInput | null {
const recipeSchema = z.object({
'@type': z.union([z.string(), z.array(z.string())]).optional(),
name: z.string().min(1),
description: z.string().optional().nullable(),
recipeIngredient: z.array(z.string()).optional(),
recipeInstructions: z.union([
recipeInstructions: z
.union([
z.array(z.string()),
z.string(),
z.array(z.object({ text: z.string() })),
]).optional(),
z.array(z.object({ text: z.string().optional() })),
])
.optional(),
url: z.string().optional(),
// Optionals for MVP only
recipeYield: z.union([z.string(), z.number()]).optional(),
prepTime: z.string().optional(),
cookTime: z.string().optional(),
});
const parseResult = RecipeSchema.safeParse(obj);
const parseResult = recipeSchema.safeParse(input);
if (!parseResult.success) return null;
const schemaRecipe = parseResult.data;
// Normalize instructions into string[]
let instructions: string[] = [];
if (schemaRecipe.recipeInstructions) {
if (typeof schemaRecipe.recipeInstructions === 'string') {
instructions = [schemaRecipe.recipeInstructions];
} else if (Array.isArray(schemaRecipe.recipeInstructions)) {
if (typeof schemaRecipe.recipeInstructions[0] === 'string') {
instructions = schemaRecipe.recipeInstructions.slice();
} else {
// Array of Step objects
instructions = (schemaRecipe.recipeInstructions as any[]).map((step) => step.text).filter(Boolean);
}
}
const recipe = parseResult.data as SchemaOrgRecipeCandidate;
if (!this.isRecipeType(recipe['@type'])) {
return null;
}
// Normalize output to CreateRecipeInput shape
const result: CreateRecipeInput = {
title: schemaRecipe.name.trim(),
description: schemaRecipe.description?.trim() || undefined,
ingredients: schemaRecipe.recipeIngredient ?? [],
instructions,
source_url: schemaRecipe.url,
// These could be parsed from recipeYield/prepTime/cookTime if desired
return {
title: recipe.name!.trim(),
description: this.normalizeOptionalText(recipe.description),
ingredients: this.normalizeTextList(recipe.recipeIngredient ?? []),
instructions: this.normalizeInstructions(recipe.recipeInstructions),
source_url: this.normalizeOptionalText(recipe.url),
};
}
return result;
private isRecipeType(type: string | string[] | undefined): boolean {
if (!type) return false;
if (typeof type === 'string') return type === 'Recipe';
return type.includes('Recipe');
}
private normalizeOptionalText(value: string | null | undefined): string | undefined {
if (!value) return undefined;
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : undefined;
}
private normalizeTextList(values: string[]): string[] {
return values
.map((value) => value.trim())
.filter((value) => value.length > 0);
}
private normalizeInstructions(
instructions: string | string[] | SchemaOrgHowToStep[] | undefined,
): string[] {
if (!instructions) return [];
if (typeof instructions === 'string') {
return this.normalizeTextList([instructions]);
}
if (instructions.length === 0) {
return [];
}
if (typeof instructions[0] === 'string') {
return this.normalizeTextList(instructions as string[]);
}
return this.normalizeTextList((instructions as SchemaOrgHowToStep[]).map((step) => step.text ?? ''));
}
}

View File

@ -55,8 +55,39 @@ describe('Import API', () => {
]);
expect(response.body.data.draft_recipe).toMatchObject({
title: 'Pancakes',
ingredients: ["Flour", "Eggs"],
instructions: ["Mix","Cook"]
ingredients: ['Flour', 'Eggs'],
instructions: ['Mix', 'Cook']
});
});
it('should normalize whitespace and HowToStep instructions into draft format', async () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"@type":["Thing","Recipe"],"name":" Tomato Soup ","description":" Cozy weeknight soup. ","recipeIngredient":[" Tomato ",""," Salt "],"recipeInstructions":[{"text":" Simmer tomatoes. "},{"text":" Blend and serve. "}],"url":" https://example.com/soup "}</script>
</head>
</html>
`;
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
status: 200,
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
text: async () => html,
} as Response);
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/soup-page' })
.expect(200);
expect(response.body.success).toBe(true);
expect(response.body.data.draft_recipe).toEqual({
title: 'Tomato Soup',
description: 'Cozy weeknight soup.',
ingredients: ['Tomato', 'Salt'],
instructions: ['Simmer tomatoes.', 'Blend and serve.'],
source_url: 'https://example.com/soup'
});
});