285 lines
8.2 KiB
TypeScript
285 lines
8.2 KiB
TypeScript
import { Router } from 'express';
|
|
import { z } from 'zod';
|
|
import { parseSchemaOrgRecipe } from '../services/SchemaOrgRecipeParserService.js';
|
|
import { parseHeuristicRecipe } from '../services/HeuristicRecipeParserService.js';
|
|
import { UrlImportError, UrlImportService } from '../services/UrlImportService.js';
|
|
import type { CreateRecipeInput } from '../types/recipe.js';
|
|
import { asyncHandler } from '../middleware.js';
|
|
|
|
const importUrlSchema = z.object({
|
|
url: z.string().url('Please provide a valid URL (including https://).'),
|
|
});
|
|
|
|
interface ImportRouteDraftRecipe {
|
|
title: string;
|
|
description?: string;
|
|
servings?: number;
|
|
prep_time_minutes?: number;
|
|
cook_time_minutes?: number;
|
|
source_url?: string;
|
|
image_url?: string;
|
|
ingredients: { item: string; quantity?: string | null; unit?: string | null; notes?: string | null }[];
|
|
instructions: string[];
|
|
tagIds?: number[];
|
|
}
|
|
|
|
interface ImportRouteResult {
|
|
title: string;
|
|
source_url: string;
|
|
json_ld_blocks: unknown[];
|
|
draft_recipe: ImportRouteDraftRecipe;
|
|
ingredients: string[];
|
|
instructions: string[];
|
|
parse: {
|
|
schema_org_used: boolean;
|
|
heuristic_used: boolean;
|
|
warnings: string[];
|
|
};
|
|
}
|
|
|
|
export function createImportRoutes(urlImportService = new UrlImportService()) {
|
|
const router = Router();
|
|
|
|
router.post('/url', asyncHandler(async (req, res, next) => {
|
|
const { url } = importUrlSchema.parse(req.body);
|
|
let fetched;
|
|
try {
|
|
fetched = await urlImportService.fetchFromUrl(url);
|
|
} catch (err: any) {
|
|
if (err.code && err.code.startsWith('IMPORT_')) {
|
|
const mapped = mapUrlImportError(err);
|
|
return res.status(mapped.status).json({ success: false, error: mapped.message });
|
|
}
|
|
return next(err);
|
|
}
|
|
|
|
const parseWarnings: string[] = [];
|
|
const parsedJsonLdBlocks = parseJsonLdBlocks(fetched.json_ld_blocks, parseWarnings);
|
|
|
|
const schemaCandidate = findSchemaOrgRecipeCandidate(parsedJsonLdBlocks);
|
|
const schemaDraft = schemaCandidate ? toImportDraftSafe(parseSchemaOrgRecipe(schemaCandidate), fetched.source_url) : null;
|
|
|
|
const heuristicDraft = schemaDraft
|
|
? null
|
|
: toHeuristicImportDraft(fetched.html, fetched.source_url);
|
|
|
|
const draft = schemaDraft ?? heuristicDraft;
|
|
|
|
if (!draft) {
|
|
res.status(422).json({
|
|
success: false,
|
|
data: null,
|
|
error: 'Parse failed: Could not extract a usable recipe from this page.',
|
|
});
|
|
return;
|
|
}
|
|
|
|
const response: ImportRouteResult = {
|
|
title: draft.title,
|
|
source_url: fetched.source_url,
|
|
json_ld_blocks: parsedJsonLdBlocks,
|
|
draft_recipe: draft,
|
|
ingredients: draft.ingredients.map((item) => item.item),
|
|
instructions: draft.instructions,
|
|
parse: {
|
|
schema_org_used: Boolean(schemaDraft),
|
|
heuristic_used: Boolean(!schemaDraft && heuristicDraft),
|
|
warnings: parseWarnings,
|
|
},
|
|
};
|
|
|
|
res.json({ success: true, data: response, error: null });
|
|
}));
|
|
|
|
return router;
|
|
}
|
|
|
|
function mapUrlImportError(error: UrlImportError): { status: number; message: string } {
|
|
switch (error.code) {
|
|
case 'IMPORT_TIMEOUT':
|
|
return { status: 504, message: error.message };
|
|
case 'IMPORT_NETWORK':
|
|
return { status: 502, message: error.message };
|
|
case 'IMPORT_UNSUPPORTED_CONTENT':
|
|
return { status: 415, message: error.message };
|
|
case 'IMPORT_FETCH_FAILED':
|
|
default:
|
|
return { status: error.status && error.status >= 400 ? error.status : 502, message: error.message };
|
|
}
|
|
}
|
|
|
|
function parseJsonLdBlocks(blocks: string[], warnings: string[]): unknown[] {
|
|
const parsed: unknown[] = [];
|
|
|
|
for (const raw of blocks) {
|
|
try {
|
|
const value = JSON.parse(raw) as unknown;
|
|
parsed.push(value);
|
|
} catch {
|
|
warnings.push('Skipped malformed JSON-LD block.');
|
|
}
|
|
}
|
|
|
|
return parsed;
|
|
}
|
|
|
|
function findSchemaOrgRecipeCandidate(blocks: unknown[]): Record<string, unknown> | null {
|
|
const candidates: Record<string, unknown>[] = [];
|
|
|
|
for (const block of blocks) {
|
|
collectRecipeCandidates(block, candidates);
|
|
}
|
|
|
|
if (candidates.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
return candidates.find((candidate) => typeof candidate.name === 'string') ?? candidates[0];
|
|
}
|
|
|
|
function collectRecipeCandidates(value: unknown, sink: Record<string, unknown>[]): void {
|
|
if (!value) return;
|
|
|
|
if (Array.isArray(value)) {
|
|
for (const item of value) {
|
|
collectRecipeCandidates(item, sink);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (typeof value !== 'object') {
|
|
return;
|
|
}
|
|
|
|
const obj = value as Record<string, unknown>;
|
|
|
|
if (isRecipeType(obj['@type'])) {
|
|
sink.push(obj);
|
|
}
|
|
|
|
if ('@graph' in obj) {
|
|
collectRecipeCandidates(obj['@graph'], sink);
|
|
}
|
|
|
|
for (const nested of Object.values(obj)) {
|
|
if (nested && typeof nested === 'object') {
|
|
collectRecipeCandidates(nested, sink);
|
|
}
|
|
}
|
|
}
|
|
|
|
function isRecipeType(typeValue: unknown): boolean {
|
|
if (typeof typeValue === 'string') {
|
|
return typeValue.toLowerCase().includes('recipe');
|
|
}
|
|
|
|
if (Array.isArray(typeValue)) {
|
|
return typeValue.some((value) => typeof value === 'string' && value.toLowerCase().includes('recipe'));
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
function toImportDraftSafe(parsed: CreateRecipeInput, sourceUrl: string): ImportRouteDraftRecipe | null {
|
|
const title = parsed.title?.trim();
|
|
const ingredients = Array.isArray(parsed.ingredients)
|
|
? parsed.ingredients
|
|
.map((ingredient) => ({
|
|
item: typeof ingredient.item === 'string' ? ingredient.item.trim() : '',
|
|
quantity: typeof ingredient.quantity === 'string' ? ingredient.quantity : null,
|
|
unit: typeof ingredient.unit === 'string' ? ingredient.unit : null,
|
|
notes: typeof ingredient.notes === 'string' ? ingredient.notes : null,
|
|
}))
|
|
.filter((ingredient) => ingredient.item.length > 0)
|
|
: [];
|
|
|
|
const instructions = Array.isArray(parsed.steps)
|
|
? parsed.steps
|
|
.map((step) => (typeof step.instruction === 'string' ? step.instruction.trim() : ''))
|
|
.filter((step) => step.length > 0)
|
|
: [];
|
|
|
|
if (!title || ingredients.length === 0 || instructions.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
title,
|
|
description: parsed.description,
|
|
servings: parsed.servings,
|
|
prep_time_minutes: parsed.prep_time_minutes,
|
|
cook_time_minutes: parsed.cook_time_minutes,
|
|
source_url: parsed.source_url || sourceUrl,
|
|
image_url: parsed.image_url,
|
|
ingredients,
|
|
instructions,
|
|
tagIds: parsed.tagIds,
|
|
};
|
|
}
|
|
|
|
function toHeuristicImportDraft(html: string, sourceUrl: string): ImportRouteDraftRecipe | null {
|
|
const title = extractTitle(html) || 'Imported Recipe';
|
|
const ingredients = extractListItems(html, ['ingredient']);
|
|
const instructions = extractListItems(html, ['instruction', 'direction', 'method', 'step']);
|
|
|
|
const createInput = parseHeuristicRecipe({
|
|
title,
|
|
ingredients,
|
|
steps: instructions,
|
|
source_url: sourceUrl,
|
|
});
|
|
|
|
return toImportDraftSafe(createInput, sourceUrl);
|
|
}
|
|
|
|
function extractTitle(html: string): string | null {
|
|
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
if (!titleMatch || !titleMatch[1]) {
|
|
return null;
|
|
}
|
|
|
|
return normalizeText(titleMatch[1]);
|
|
}
|
|
|
|
function extractListItems(html: string, headingKeywords: string[]): string[] {
|
|
const sectionPattern = new RegExp(
|
|
`<(?:h2|h3|h4)[^>]*>([\\s\\S]*?)<\\/(?:h2|h3|h4)>[\\s\\S]*?<ul[^>]*>([\\s\\S]*?)<\\/ul>`,
|
|
'gi',
|
|
);
|
|
|
|
const items: string[] = [];
|
|
let match = sectionPattern.exec(html);
|
|
while (match) {
|
|
const headingText = normalizeText(match[1]);
|
|
if (headingKeywords.some((keyword) => headingText.toLowerCase().includes(keyword))) {
|
|
const listHtml = match[2] ?? '';
|
|
const liPattern = /<li[^>]*>([\s\S]*?)<\/li>/gi;
|
|
let liMatch = liPattern.exec(listHtml);
|
|
while (liMatch) {
|
|
const text = normalizeText(liMatch[1] ?? '');
|
|
if (text) {
|
|
items.push(text);
|
|
}
|
|
liMatch = liPattern.exec(listHtml);
|
|
}
|
|
}
|
|
|
|
match = sectionPattern.exec(html);
|
|
}
|
|
|
|
return dedupe(items);
|
|
}
|
|
|
|
function normalizeText(text: string): string {
|
|
return text
|
|
.replace(/<[^>]+>/g, ' ')
|
|
.replace(/ /g, ' ')
|
|
.replace(/&/g, '&')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function dedupe(values: string[]): string[] {
|
|
return [...new Set(values)];
|
|
}
|