import { Router } from 'express'; import { z } from 'zod'; import { parseSchemaOrgRecipe } from '../services/SchemaOrgRecipeParserService.js'; import { parseHeuristicRecipe } from '../services/HeuristicRecipeParserService.js'; import { UrlImportError, UrlImportService } from '../services/UrlImportService.js'; import type { CreateRecipeInput } from '../types/recipe.js'; import { asyncHandler } from '../middleware.js'; const importUrlSchema = z.object({ url: z.string().url('Please provide a valid URL (including https://).'), }); interface ImportRouteDraftRecipe { title: string; description?: string; servings?: number; prep_time_minutes?: number; cook_time_minutes?: number; source_url?: string; image_url?: string; ingredients: { item: string; quantity?: string | null; unit?: string | null; notes?: string | null }[]; instructions: string[]; tagIds?: number[]; } interface ImportRouteResult { title: string; source_url: string; json_ld_blocks: unknown[]; draft_recipe: ImportRouteDraftRecipe; ingredients: string[]; instructions: string[]; parse: { schema_org_used: boolean; heuristic_used: boolean; warnings: string[]; }; } export function createImportRoutes(urlImportService = new UrlImportService()) { const router = Router(); router.post('/url', asyncHandler(async (req, res, next) => { const { url } = importUrlSchema.parse(req.body); let fetched; try { fetched = await urlImportService.fetchFromUrl(url); } catch (err: any) { if (err.code && err.code.startsWith('IMPORT_')) { const mapped = mapUrlImportError(err); return res.status(mapped.status).json({ success: false, error: mapped.message }); } return next(err); } const parseWarnings: string[] = []; const parsedJsonLdBlocks = parseJsonLdBlocks(fetched.json_ld_blocks, parseWarnings); const schemaCandidate = findSchemaOrgRecipeCandidate(parsedJsonLdBlocks); const schemaDraft = schemaCandidate ? toImportDraftSafe(parseSchemaOrgRecipe(schemaCandidate), fetched.source_url) : null; const heuristicDraft = schemaDraft ? null : toHeuristicImportDraft(fetched.html, fetched.source_url); const draft = schemaDraft ?? heuristicDraft; if (!draft) { res.status(422).json({ success: false, data: null, error: 'Parse failed: Could not extract a usable recipe from this page.', }); return; } const response: ImportRouteResult = { title: draft.title, source_url: fetched.source_url, json_ld_blocks: parsedJsonLdBlocks, draft_recipe: draft, ingredients: draft.ingredients.map((item) => item.item), instructions: draft.instructions, parse: { schema_org_used: Boolean(schemaDraft), heuristic_used: Boolean(!schemaDraft && heuristicDraft), warnings: parseWarnings, }, }; res.json({ success: true, data: response, error: null }); })); return router; } function mapUrlImportError(error: UrlImportError): { status: number; message: string } { switch (error.code) { case 'IMPORT_TIMEOUT': return { status: 504, message: error.message }; case 'IMPORT_NETWORK': return { status: 502, message: error.message }; case 'IMPORT_UNSUPPORTED_CONTENT': return { status: 415, message: error.message }; case 'IMPORT_FETCH_FAILED': default: return { status: error.status && error.status >= 400 ? error.status : 502, message: error.message }; } } function parseJsonLdBlocks(blocks: string[], warnings: string[]): unknown[] { const parsed: unknown[] = []; for (const raw of blocks) { try { const value = JSON.parse(raw) as unknown; parsed.push(value); } catch { warnings.push('Skipped malformed JSON-LD block.'); } } return parsed; } function findSchemaOrgRecipeCandidate(blocks: unknown[]): Record | null { const candidates: Record[] = []; for (const block of blocks) { collectRecipeCandidates(block, candidates); } if (candidates.length === 0) { return null; } return candidates.find((candidate) => typeof candidate.name === 'string') ?? candidates[0]; } function collectRecipeCandidates(value: unknown, sink: Record[]): void { if (!value) return; if (Array.isArray(value)) { for (const item of value) { collectRecipeCandidates(item, sink); } return; } if (typeof value !== 'object') { return; } const obj = value as Record; if (isRecipeType(obj['@type'])) { sink.push(obj); } if ('@graph' in obj) { collectRecipeCandidates(obj['@graph'], sink); } for (const nested of Object.values(obj)) { if (nested && typeof nested === 'object') { collectRecipeCandidates(nested, sink); } } } function isRecipeType(typeValue: unknown): boolean { if (typeof typeValue === 'string') { return typeValue.toLowerCase().includes('recipe'); } if (Array.isArray(typeValue)) { return typeValue.some((value) => typeof value === 'string' && value.toLowerCase().includes('recipe')); } return false; } function toImportDraftSafe(parsed: CreateRecipeInput, sourceUrl: string): ImportRouteDraftRecipe | null { const title = parsed.title?.trim(); const ingredients = Array.isArray(parsed.ingredients) ? parsed.ingredients .map((ingredient) => ({ item: typeof ingredient.item === 'string' ? ingredient.item.trim() : '', quantity: typeof ingredient.quantity === 'string' ? ingredient.quantity : null, unit: typeof ingredient.unit === 'string' ? ingredient.unit : null, notes: typeof ingredient.notes === 'string' ? ingredient.notes : null, })) .filter((ingredient) => ingredient.item.length > 0) : []; const instructions = Array.isArray(parsed.steps) ? parsed.steps .map((step) => (typeof step.instruction === 'string' ? step.instruction.trim() : '')) .filter((step) => step.length > 0) : []; if (!title || ingredients.length === 0 || instructions.length === 0) { return null; } return { title, description: parsed.description, servings: parsed.servings, prep_time_minutes: parsed.prep_time_minutes, cook_time_minutes: parsed.cook_time_minutes, source_url: parsed.source_url || sourceUrl, image_url: parsed.image_url, ingredients, instructions, tagIds: parsed.tagIds, }; } function toHeuristicImportDraft(html: string, sourceUrl: string): ImportRouteDraftRecipe | null { const title = extractTitle(html) || 'Imported Recipe'; const ingredients = extractListItems(html, ['ingredient']); const instructions = extractListItems(html, ['instruction', 'direction', 'method', 'step']); const createInput = parseHeuristicRecipe({ title, ingredients, steps: instructions, source_url: sourceUrl, }); return toImportDraftSafe(createInput, sourceUrl); } function extractTitle(html: string): string | null { const titleMatch = html.match(/]*>([\s\S]*?)<\/title>/i); if (!titleMatch || !titleMatch[1]) { return null; } return normalizeText(titleMatch[1]); } function extractListItems(html: string, headingKeywords: string[]): string[] { const sectionPattern = new RegExp( `<(?:h2|h3|h4)[^>]*>([\\s\\S]*?)<\\/(?:h2|h3|h4)>[\\s\\S]*?]*>([\\s\\S]*?)<\\/ul>`, 'gi', ); const items: string[] = []; let match = sectionPattern.exec(html); while (match) { const headingText = normalizeText(match[1]); if (headingKeywords.some((keyword) => headingText.toLowerCase().includes(keyword))) { const listHtml = match[2] ?? ''; const liPattern = /]*>([\s\S]*?)<\/li>/gi; let liMatch = liPattern.exec(listHtml); while (liMatch) { const text = normalizeText(liMatch[1] ?? ''); if (text) { items.push(text); } liMatch = liPattern.exec(listHtml); } } match = sectionPattern.exec(html); } return dedupe(items); } function normalizeText(text: string): string { return text .replace(/<[^>]+>/g, ' ') .replace(/ /g, ' ') .replace(/&/g, '&') .replace(/\s+/g, ' ') .trim(); } function dedupe(values: string[]): string[] { return [...new Set(values)]; }