feat(import): normalize parsed recipe into internal draft format
This commit is contained in:
parent
64a1229ff0
commit
3987083e46
4
TODO.md
4
TODO.md
|
|
@ -26,8 +26,8 @@ MVP is functionally complete (core app + docs + tests).
|
||||||
|
|
||||||
### Phase 1: URL Import Foundation
|
### Phase 1: URL Import Foundation
|
||||||
- [x] Add backend import endpoint: `POST /api/import/url`
|
- [x] Add backend import endpoint: `POST /api/import/url`
|
||||||
- [ ] Implement Schema.org Recipe JSON-LD parser service
|
- [x] Implement Schema.org Recipe JSON-LD parser service
|
||||||
- [ ] Normalize parsed recipe into internal Recipe draft format
|
- [x] Normalize parsed recipe into internal Recipe draft format
|
||||||
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
|
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
|
||||||
|
|
||||||
### Phase 2: Import UI
|
### Phase 2: Import UI
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,22 @@
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import type { CreateRecipeInput } from '../types/recipe.js';
|
import type { CreateRecipeInput } from '../types/recipe.js';
|
||||||
|
|
||||||
|
interface SchemaOrgHowToStep {
|
||||||
|
text?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SchemaOrgRecipeCandidate {
|
||||||
|
'@type'?: string | string[];
|
||||||
|
name?: string;
|
||||||
|
description?: string | null;
|
||||||
|
recipeIngredient?: string[];
|
||||||
|
recipeInstructions?: string | string[] | SchemaOrgHowToStep[];
|
||||||
|
url?: string;
|
||||||
|
recipeYield?: string | number;
|
||||||
|
prepTime?: string;
|
||||||
|
cookTime?: string;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses and normalizes Schema.org Recipe JSON-LD blocks.
|
* Parses and normalizes Schema.org Recipe JSON-LD blocks.
|
||||||
*/
|
*/
|
||||||
|
|
@ -10,81 +26,98 @@ export class SchemaOrgRecipeParserService {
|
||||||
* Returns null if no valid Recipe is found.
|
* Returns null if no valid Recipe is found.
|
||||||
*/
|
*/
|
||||||
parseJsonLdBlock(json: string): CreateRecipeInput | null {
|
parseJsonLdBlock(json: string): CreateRecipeInput | null {
|
||||||
let obj: unknown;
|
let parsedJson: unknown;
|
||||||
try {
|
try {
|
||||||
obj = JSON.parse(json);
|
parsedJson = JSON.parse(json);
|
||||||
} catch (err) {
|
} catch {
|
||||||
return null; // Malformed JSON block
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If block is an array, look for Recipe type within
|
if (Array.isArray(parsedJson)) {
|
||||||
if (Array.isArray(obj)) {
|
for (const entry of parsedJson) {
|
||||||
for (const entry of obj) {
|
const parsedRecipe = this.tryParseRecipe(entry);
|
||||||
const parsed = this.tryParseRecipe(entry);
|
if (parsedRecipe) return parsedRecipe;
|
||||||
if (parsed) return parsed;
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Single object
|
return this.tryParseRecipe(parsedJson);
|
||||||
return this.tryParseRecipe(obj);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal: attempts to extract Recipe data from an object if @type matches.
|
* Internal: attempts to extract Recipe data from an object if @type matches.
|
||||||
*/
|
*/
|
||||||
private tryParseRecipe(obj: any): CreateRecipeInput | null {
|
private tryParseRecipe(input: unknown): CreateRecipeInput | null {
|
||||||
// Match Schema.org Recipe by @type
|
const recipeSchema = z.object({
|
||||||
if (!obj || (obj['@type'] !== 'Recipe' && !(Array.isArray(obj['@type']) && obj['@type'].includes('Recipe')))) {
|
'@type': z.union([z.string(), z.array(z.string())]).optional(),
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Zod validation schema for supported fields
|
|
||||||
const RecipeSchema = z.object({
|
|
||||||
name: z.string().min(1),
|
name: z.string().min(1),
|
||||||
description: z.string().optional().nullable(),
|
description: z.string().optional().nullable(),
|
||||||
recipeIngredient: z.array(z.string()).optional(),
|
recipeIngredient: z.array(z.string()).optional(),
|
||||||
recipeInstructions: z.union([
|
recipeInstructions: z
|
||||||
z.array(z.string()),
|
.union([
|
||||||
z.string(),
|
z.array(z.string()),
|
||||||
z.array(z.object({ text: z.string() })),
|
z.string(),
|
||||||
]).optional(),
|
z.array(z.object({ text: z.string().optional() })),
|
||||||
|
])
|
||||||
|
.optional(),
|
||||||
url: z.string().optional(),
|
url: z.string().optional(),
|
||||||
// Optionals for MVP only
|
|
||||||
recipeYield: z.union([z.string(), z.number()]).optional(),
|
recipeYield: z.union([z.string(), z.number()]).optional(),
|
||||||
prepTime: z.string().optional(),
|
prepTime: z.string().optional(),
|
||||||
cookTime: z.string().optional(),
|
cookTime: z.string().optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
const parseResult = RecipeSchema.safeParse(obj);
|
const parseResult = recipeSchema.safeParse(input);
|
||||||
if (!parseResult.success) return null;
|
if (!parseResult.success) return null;
|
||||||
const schemaRecipe = parseResult.data;
|
|
||||||
|
|
||||||
// Normalize instructions into string[]
|
const recipe = parseResult.data as SchemaOrgRecipeCandidate;
|
||||||
let instructions: string[] = [];
|
if (!this.isRecipeType(recipe['@type'])) {
|
||||||
if (schemaRecipe.recipeInstructions) {
|
return null;
|
||||||
if (typeof schemaRecipe.recipeInstructions === 'string') {
|
|
||||||
instructions = [schemaRecipe.recipeInstructions];
|
|
||||||
} else if (Array.isArray(schemaRecipe.recipeInstructions)) {
|
|
||||||
if (typeof schemaRecipe.recipeInstructions[0] === 'string') {
|
|
||||||
instructions = schemaRecipe.recipeInstructions.slice();
|
|
||||||
} else {
|
|
||||||
// Array of Step objects
|
|
||||||
instructions = (schemaRecipe.recipeInstructions as any[]).map((step) => step.text).filter(Boolean);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize output to CreateRecipeInput shape
|
return {
|
||||||
const result: CreateRecipeInput = {
|
title: recipe.name!.trim(),
|
||||||
title: schemaRecipe.name.trim(),
|
description: this.normalizeOptionalText(recipe.description),
|
||||||
description: schemaRecipe.description?.trim() || undefined,
|
ingredients: this.normalizeTextList(recipe.recipeIngredient ?? []),
|
||||||
ingredients: schemaRecipe.recipeIngredient ?? [],
|
instructions: this.normalizeInstructions(recipe.recipeInstructions),
|
||||||
instructions,
|
source_url: this.normalizeOptionalText(recipe.url),
|
||||||
source_url: schemaRecipe.url,
|
|
||||||
// These could be parsed from recipeYield/prepTime/cookTime if desired
|
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
private isRecipeType(type: string | string[] | undefined): boolean {
|
||||||
|
if (!type) return false;
|
||||||
|
if (typeof type === 'string') return type === 'Recipe';
|
||||||
|
return type.includes('Recipe');
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeOptionalText(value: string | null | undefined): string | undefined {
|
||||||
|
if (!value) return undefined;
|
||||||
|
const trimmed = value.trim();
|
||||||
|
return trimmed.length > 0 ? trimmed : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeTextList(values: string[]): string[] {
|
||||||
|
return values
|
||||||
|
.map((value) => value.trim())
|
||||||
|
.filter((value) => value.length > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeInstructions(
|
||||||
|
instructions: string | string[] | SchemaOrgHowToStep[] | undefined,
|
||||||
|
): string[] {
|
||||||
|
if (!instructions) return [];
|
||||||
|
|
||||||
|
if (typeof instructions === 'string') {
|
||||||
|
return this.normalizeTextList([instructions]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instructions.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof instructions[0] === 'string') {
|
||||||
|
return this.normalizeTextList(instructions as string[]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.normalizeTextList((instructions as SchemaOrgHowToStep[]).map((step) => step.text ?? ''));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,8 +55,39 @@ describe('Import API', () => {
|
||||||
]);
|
]);
|
||||||
expect(response.body.data.draft_recipe).toMatchObject({
|
expect(response.body.data.draft_recipe).toMatchObject({
|
||||||
title: 'Pancakes',
|
title: 'Pancakes',
|
||||||
ingredients: ["Flour", "Eggs"],
|
ingredients: ['Flour', 'Eggs'],
|
||||||
instructions: ["Mix","Cook"]
|
instructions: ['Mix', 'Cook']
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should normalize whitespace and HowToStep instructions into draft format', async () => {
|
||||||
|
const html = `
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<script type="application/ld+json">{"@type":["Thing","Recipe"],"name":" Tomato Soup ","description":" Cozy weeknight soup. ","recipeIngredient":[" Tomato ",""," Salt "],"recipeInstructions":[{"text":" Simmer tomatoes. "},{"text":" Blend and serve. "}],"url":" https://example.com/soup "}</script>
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
|
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
||||||
|
text: async () => html,
|
||||||
|
} as Response);
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/import/url')
|
||||||
|
.send({ url: 'https://example.com/soup-page' })
|
||||||
|
.expect(200);
|
||||||
|
|
||||||
|
expect(response.body.success).toBe(true);
|
||||||
|
expect(response.body.data.draft_recipe).toEqual({
|
||||||
|
title: 'Tomato Soup',
|
||||||
|
description: 'Cozy weeknight soup.',
|
||||||
|
ingredients: ['Tomato', 'Salt'],
|
||||||
|
instructions: ['Simmer tomatoes.', 'Blend and serve.'],
|
||||||
|
source_url: 'https://example.com/soup'
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue