recipe-manager/src/backend/routes/import.ts

import { Router } from 'express';
import { z } from 'zod';
import { parseSchemaOrgRecipe } from '../services/SchemaOrgRecipeParserService.js';
import { parseHeuristicRecipe } from '../services/HeuristicRecipeParserService.js';
import { UrlImportError, UrlImportService } from '../services/UrlImportService.js';
import type { CreateRecipeInput } from '../types/recipe.js';
import { asyncHandler } from '../middleware.js';

const importUrlSchema = z.object({
  url: z.string().url('Please provide a valid URL (including https://).'),
});

interface ImportRouteDraftRecipe {
  title: string;
  description?: string;
  servings?: number;
  prep_time_minutes?: number;
  cook_time_minutes?: number;
  source_url?: string;
  image_url?: string;
  ingredients: { item: string; quantity?: string | null; unit?: string | null; notes?: string | null }[];
  instructions: string[];
  tagIds?: number[];
}

interface ImportRouteResult {
  title: string;
  source_url: string;
  json_ld_blocks: unknown[];
  draft_recipe: ImportRouteDraftRecipe;
  ingredients: string[];
  instructions: string[];
  parse: {
    schema_org_used: boolean;
    heuristic_used: boolean;
    warnings: string[];
  };
}

export function createImportRoutes(urlImportService = new UrlImportService()) {
  const router = Router();

  router.post('/url', asyncHandler(async (req, res, next) => {
    const { url } = importUrlSchema.parse(req.body);
    let fetched;
    try {
      fetched = await urlImportService.fetchFromUrl(url);
    } catch (err: any) {
      if (err.code && err.code.startsWith('IMPORT_')) {
        const mapped = mapUrlImportError(err);
        return res.status(mapped.status).json({ success: false, error: mapped.message });
      }
      return next(err);
    }

    const parseWarnings: string[] = [];
    const parsedJsonLdBlocks = parseJsonLdBlocks(fetched.json_ld_blocks, parseWarnings);

    const schemaCandidate = findSchemaOrgRecipeCandidate(parsedJsonLdBlocks);
    const schemaDraft = schemaCandidate ? toImportDraftSafe(parseSchemaOrgRecipe(schemaCandidate), fetched.source_url) : null;

    const heuristicDraft = schemaDraft
      ? null
      : toHeuristicImportDraft(fetched.html, fetched.source_url);

    const draft = schemaDraft ?? heuristicDraft;

    if (!draft) {
      res.status(422).json({
        success: false,
        data: null,
        error: 'Parse failed: Could not extract a usable recipe from this page.',
      });
      return;
    }

    const response: ImportRouteResult = {
      title: draft.title,
      source_url: fetched.source_url,
      json_ld_blocks: parsedJsonLdBlocks,
      draft_recipe: draft,
      ingredients: draft.ingredients.map((item) => item.item),
      instructions: draft.instructions,
      parse: {
        schema_org_used: Boolean(schemaDraft),
        heuristic_used: Boolean(!schemaDraft && heuristicDraft),
        warnings: parseWarnings,
      },
    };

    res.json({ success: true, data: response, error: null });
  }));

  return router;
}

function mapUrlImportError(error: UrlImportError): { status: number; message: string } {
  switch (error.code) {
    case 'IMPORT_TIMEOUT':
      return { status: 504, message: error.message };
    case 'IMPORT_NETWORK':
      return { status: 502, message: error.message };
    case 'IMPORT_UNSUPPORTED_CONTENT':
      return { status: 415, message: error.message };
    case 'IMPORT_FETCH_FAILED':
    default:
      return { status: error.status && error.status >= 400 ? error.status : 502, message: error.message };
  }
}

function parseJsonLdBlocks(blocks: string[], warnings: string[]): unknown[] {
  const parsed: unknown[] = [];

  for (const raw of blocks) {
    try {
      const value = JSON.parse(raw) as unknown;
      parsed.push(value);
    } catch {
      warnings.push('Skipped malformed JSON-LD block.');
    }
  }

  return parsed;
}

function findSchemaOrgRecipeCandidate(blocks: unknown[]): Record<string, unknown> | null {
  const candidates: Record<string, unknown>[] = [];

  for (const block of blocks) {
    collectRecipeCandidates(block, candidates);
  }

  if (candidates.length === 0) {
    return null;
  }

  return candidates.find((candidate) => typeof candidate.name === 'string') ?? candidates[0];
}

function collectRecipeCandidates(value: unknown, sink: Record<string, unknown>[]): void {
  if (!value) return;

  if (Array.isArray(value)) {
    for (const item of value) {
      collectRecipeCandidates(item, sink);
    }
    return;
  }

  if (typeof value !== 'object') {
    return;
  }

  const obj = value as Record<string, unknown>;

  if (isRecipeType(obj['@type'])) {
    sink.push(obj);
  }

  if ('@graph' in obj) {
    collectRecipeCandidates(obj['@graph'], sink);
  }

  for (const nested of Object.values(obj)) {
    if (nested && typeof nested === 'object') {
      collectRecipeCandidates(nested, sink);
    }
  }
}

function isRecipeType(typeValue: unknown): boolean {
  if (typeof typeValue === 'string') {
    return typeValue.toLowerCase().includes('recipe');
  }

  if (Array.isArray(typeValue)) {
    return typeValue.some((value) => typeof value === 'string' && value.toLowerCase().includes('recipe'));
  }

  return false;
}

function toImportDraftSafe(parsed: CreateRecipeInput, sourceUrl: string): ImportRouteDraftRecipe | null {
  const title = parsed.title?.trim();
  const ingredients = Array.isArray(parsed.ingredients)
    ? parsed.ingredients
        .map((ingredient) => ({
          item: typeof ingredient.item === 'string' ? ingredient.item.trim() : '',
          quantity: typeof ingredient.quantity === 'string' ? ingredient.quantity : null,
          unit: typeof ingredient.unit === 'string' ? ingredient.unit : null,
          notes: typeof ingredient.notes === 'string' ? ingredient.notes : null,
        }))
        .filter((ingredient) => ingredient.item.length > 0)
    : [];

  const instructions = Array.isArray(parsed.steps)
    ? parsed.steps
        .map((step) => (typeof step.instruction === 'string' ? step.instruction.trim() : ''))
        .filter((step) => step.length > 0)
    : [];

  if (!title || ingredients.length === 0 || instructions.length === 0) {
    return null;
  }

  return {
    title,
    description: parsed.description,
    servings: parsed.servings,
    prep_time_minutes: parsed.prep_time_minutes,
    cook_time_minutes: parsed.cook_time_minutes,
    source_url: parsed.source_url || sourceUrl,
    image_url: parsed.image_url,
    ingredients,
    instructions,
    tagIds: parsed.tagIds,
  };
}

function toHeuristicImportDraft(html: string, sourceUrl: string): ImportRouteDraftRecipe | null {
  const title = extractTitle(html) || 'Imported Recipe';
  const ingredients = extractListItems(html, ['ingredient']);
  const instructions = extractListItems(html, ['instruction', 'direction', 'method', 'step']);

  const createInput = parseHeuristicRecipe({
    title,
    ingredients,
    steps: instructions,
    source_url: sourceUrl,
  });

  return toImportDraftSafe(createInput, sourceUrl);
}

function extractTitle(html: string): string | null {
  const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
  if (!titleMatch || !titleMatch[1]) {
    return null;
  }

  return normalizeText(titleMatch[1]);
}

function extractListItems(html: string, headingKeywords: string[]): string[] {
  const sectionPattern = new RegExp(
    `<(?:h2|h3|h4)[^>]*>([\\s\\S]*?)<\\/(?:h2|h3|h4)>[\\s\\S]*?<ul[^>]*>([\\s\\S]*?)<\\/ul>`,
    'gi',
  );

  const items: string[] = [];
  let match = sectionPattern.exec(html);
  while (match) {
    const headingText = normalizeText(match[1]);
    if (headingKeywords.some((keyword) => headingText.toLowerCase().includes(keyword))) {
      const listHtml = match[2] ?? '';
      const liPattern = /<li[^>]*>([\s\S]*?)<\/li>/gi;
      let liMatch = liPattern.exec(listHtml);
      while (liMatch) {
        const text = normalizeText(liMatch[1] ?? '');
        if (text) {
          items.push(text);
        }
        liMatch = liPattern.exec(listHtml);
      }
    }

    match = sectionPattern.exec(html);
  }

  return dedupe(items);
}

function normalizeText(text: string): string {
  return text
    .replace(/<[^>]+>/g, ' ')
    .replace(/&nbsp;/g, ' ')
    .replace(/&amp;/g, '&')
    .replace(/\s+/g, ' ')
    .trim();
}

function dedupe(values: string[]): string[] {
  return [...new Set(values)];
}