feat(import): normalize parsed recipe into internal draft format
This commit is contained in:
parent
64a1229ff0
commit
3987083e46
4
TODO.md
4
TODO.md
|
|
@ -26,8 +26,8 @@ MVP is functionally complete (core app + docs + tests).
|
|||
|
||||
### Phase 1: URL Import Foundation
|
||||
- [x] Add backend import endpoint: `POST /api/import/url`
|
||||
- [ ] Implement Schema.org Recipe JSON-LD parser service
|
||||
- [ ] Normalize parsed recipe into internal Recipe draft format
|
||||
- [x] Implement Schema.org Recipe JSON-LD parser service
|
||||
- [x] Normalize parsed recipe into internal Recipe draft format
|
||||
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
|
||||
|
||||
### Phase 2: Import UI
|
||||
|
|
|
|||
|
|
@ -1,6 +1,22 @@
|
|||
import { z } from 'zod';
|
||||
import type { CreateRecipeInput } from '../types/recipe.js';
|
||||
|
||||
interface SchemaOrgHowToStep {
|
||||
text?: string;
|
||||
}
|
||||
|
||||
interface SchemaOrgRecipeCandidate {
|
||||
'@type'?: string | string[];
|
||||
name?: string;
|
||||
description?: string | null;
|
||||
recipeIngredient?: string[];
|
||||
recipeInstructions?: string | string[] | SchemaOrgHowToStep[];
|
||||
url?: string;
|
||||
recipeYield?: string | number;
|
||||
prepTime?: string;
|
||||
cookTime?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses and normalizes Schema.org Recipe JSON-LD blocks.
|
||||
*/
|
||||
|
|
@ -10,81 +26,98 @@ export class SchemaOrgRecipeParserService {
|
|||
* Returns null if no valid Recipe is found.
|
||||
*/
|
||||
parseJsonLdBlock(json: string): CreateRecipeInput | null {
|
||||
let obj: unknown;
|
||||
let parsedJson: unknown;
|
||||
try {
|
||||
obj = JSON.parse(json);
|
||||
} catch (err) {
|
||||
return null; // Malformed JSON block
|
||||
parsedJson = JSON.parse(json);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
// If block is an array, look for Recipe type within
|
||||
if (Array.isArray(obj)) {
|
||||
for (const entry of obj) {
|
||||
const parsed = this.tryParseRecipe(entry);
|
||||
if (parsed) return parsed;
|
||||
if (Array.isArray(parsedJson)) {
|
||||
for (const entry of parsedJson) {
|
||||
const parsedRecipe = this.tryParseRecipe(entry);
|
||||
if (parsedRecipe) return parsedRecipe;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Single object
|
||||
return this.tryParseRecipe(obj);
|
||||
return this.tryParseRecipe(parsedJson);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal: attempts to extract Recipe data from an object if @type matches.
|
||||
*/
|
||||
private tryParseRecipe(obj: any): CreateRecipeInput | null {
|
||||
// Match Schema.org Recipe by @type
|
||||
if (!obj || (obj['@type'] !== 'Recipe' && !(Array.isArray(obj['@type']) && obj['@type'].includes('Recipe')))) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Zod validation schema for supported fields
|
||||
const RecipeSchema = z.object({
|
||||
private tryParseRecipe(input: unknown): CreateRecipeInput | null {
|
||||
const recipeSchema = z.object({
|
||||
'@type': z.union([z.string(), z.array(z.string())]).optional(),
|
||||
name: z.string().min(1),
|
||||
description: z.string().optional().nullable(),
|
||||
recipeIngredient: z.array(z.string()).optional(),
|
||||
recipeInstructions: z.union([
|
||||
z.array(z.string()),
|
||||
z.string(),
|
||||
z.array(z.object({ text: z.string() })),
|
||||
]).optional(),
|
||||
recipeInstructions: z
|
||||
.union([
|
||||
z.array(z.string()),
|
||||
z.string(),
|
||||
z.array(z.object({ text: z.string().optional() })),
|
||||
])
|
||||
.optional(),
|
||||
url: z.string().optional(),
|
||||
// Optionals for MVP only
|
||||
recipeYield: z.union([z.string(), z.number()]).optional(),
|
||||
prepTime: z.string().optional(),
|
||||
cookTime: z.string().optional(),
|
||||
});
|
||||
|
||||
const parseResult = RecipeSchema.safeParse(obj);
|
||||
const parseResult = recipeSchema.safeParse(input);
|
||||
if (!parseResult.success) return null;
|
||||
const schemaRecipe = parseResult.data;
|
||||
|
||||
// Normalize instructions into string[]
|
||||
let instructions: string[] = [];
|
||||
if (schemaRecipe.recipeInstructions) {
|
||||
if (typeof schemaRecipe.recipeInstructions === 'string') {
|
||||
instructions = [schemaRecipe.recipeInstructions];
|
||||
} else if (Array.isArray(schemaRecipe.recipeInstructions)) {
|
||||
if (typeof schemaRecipe.recipeInstructions[0] === 'string') {
|
||||
instructions = schemaRecipe.recipeInstructions.slice();
|
||||
} else {
|
||||
// Array of Step objects
|
||||
instructions = (schemaRecipe.recipeInstructions as any[]).map((step) => step.text).filter(Boolean);
|
||||
}
|
||||
}
|
||||
const recipe = parseResult.data as SchemaOrgRecipeCandidate;
|
||||
if (!this.isRecipeType(recipe['@type'])) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Normalize output to CreateRecipeInput shape
|
||||
const result: CreateRecipeInput = {
|
||||
title: schemaRecipe.name.trim(),
|
||||
description: schemaRecipe.description?.trim() || undefined,
|
||||
ingredients: schemaRecipe.recipeIngredient ?? [],
|
||||
instructions,
|
||||
source_url: schemaRecipe.url,
|
||||
// These could be parsed from recipeYield/prepTime/cookTime if desired
|
||||
return {
|
||||
title: recipe.name!.trim(),
|
||||
description: this.normalizeOptionalText(recipe.description),
|
||||
ingredients: this.normalizeTextList(recipe.recipeIngredient ?? []),
|
||||
instructions: this.normalizeInstructions(recipe.recipeInstructions),
|
||||
source_url: this.normalizeOptionalText(recipe.url),
|
||||
};
|
||||
}
|
||||
|
||||
return result;
|
||||
private isRecipeType(type: string | string[] | undefined): boolean {
|
||||
if (!type) return false;
|
||||
if (typeof type === 'string') return type === 'Recipe';
|
||||
return type.includes('Recipe');
|
||||
}
|
||||
|
||||
private normalizeOptionalText(value: string | null | undefined): string | undefined {
|
||||
if (!value) return undefined;
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
|
||||
private normalizeTextList(values: string[]): string[] {
|
||||
return values
|
||||
.map((value) => value.trim())
|
||||
.filter((value) => value.length > 0);
|
||||
}
|
||||
|
||||
private normalizeInstructions(
|
||||
instructions: string | string[] | SchemaOrgHowToStep[] | undefined,
|
||||
): string[] {
|
||||
if (!instructions) return [];
|
||||
|
||||
if (typeof instructions === 'string') {
|
||||
return this.normalizeTextList([instructions]);
|
||||
}
|
||||
|
||||
if (instructions.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (typeof instructions[0] === 'string') {
|
||||
return this.normalizeTextList(instructions as string[]);
|
||||
}
|
||||
|
||||
return this.normalizeTextList((instructions as SchemaOrgHowToStep[]).map((step) => step.text ?? ''));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,8 +55,39 @@ describe('Import API', () => {
|
|||
]);
|
||||
expect(response.body.data.draft_recipe).toMatchObject({
|
||||
title: 'Pancakes',
|
||||
ingredients: ["Flour", "Eggs"],
|
||||
instructions: ["Mix","Cook"]
|
||||
ingredients: ['Flour', 'Eggs'],
|
||||
instructions: ['Mix', 'Cook']
|
||||
});
|
||||
});
|
||||
|
||||
it('should normalize whitespace and HowToStep instructions into draft format', async () => {
|
||||
const html = `
|
||||
<html>
|
||||
<head>
|
||||
<script type="application/ld+json">{"@type":["Thing","Recipe"],"name":" Tomato Soup ","description":" Cozy weeknight soup. ","recipeIngredient":[" Tomato ",""," Salt "],"recipeInstructions":[{"text":" Simmer tomatoes. "},{"text":" Blend and serve. "}],"url":" https://example.com/soup "}</script>
|
||||
</head>
|
||||
</html>
|
||||
`;
|
||||
|
||||
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
||||
text: async () => html,
|
||||
} as Response);
|
||||
|
||||
const response = await request(app)
|
||||
.post('/api/import/url')
|
||||
.send({ url: 'https://example.com/soup-page' })
|
||||
.expect(200);
|
||||
|
||||
expect(response.body.success).toBe(true);
|
||||
expect(response.body.data.draft_recipe).toEqual({
|
||||
title: 'Tomato Soup',
|
||||
description: 'Cozy weeknight soup.',
|
||||
ingredients: ['Tomato', 'Salt'],
|
||||
instructions: ['Simmer tomatoes.', 'Blend and serve.'],
|
||||
source_url: 'https://example.com/soup'
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue