Add URL import retry/timeout handling and friendly failure messaging

This commit is contained in:
Paul Huliganga 2026-03-24 23:54:23 -04:00
parent d3eeeb2833
commit 1ca21889ca
5 changed files with 158 additions and 14 deletions

View File

@ -38,7 +38,7 @@ MVP is functionally complete (core app + docs + tests).
### Phase 3: Fallback Parsing + Hardening ### Phase 3: Fallback Parsing + Hardening
- [x] Add heuristic fallback parser when Schema.org missing - [x] Add heuristic fallback parser when Schema.org missing
- [ ] Add timeout/retry + user-friendly import failure messages - [x] Add timeout/retry + user-friendly import failure messages
- [ ] Add logging/telemetry for import success/failure reasons - [ ] Add logging/telemetry for import success/failure reasons
### Phase 4: Browser Extension (after URL import stable) ### Phase 4: Browser Extension (after URL import stable)

View File

@ -34,6 +34,20 @@ function getImportErrorDetails(message: string): { type: ImportErrorType; messag
}; };
} }
if (normalized.includes('network error') || normalized.includes('could not fetch the page')) {
return {
type: 'generic',
message: 'We could not reach that recipe page right now. Please try again in a moment.',
};
}
if (normalized.includes('did not return an html page')) {
return {
type: 'generic',
message: 'That link did not point to an HTML recipe page. Try the direct recipe URL.',
};
}
return { return {
type: 'generic', type: 'generic',
message, message,

View File

@ -1,6 +1,6 @@
import { Router } from 'express'; import { Router } from 'express';
import { z } from 'zod'; import { z } from 'zod';
import { UrlImportService } from '../services/UrlImportService.js'; import { UrlImportError, UrlImportService } from '../services/UrlImportService.js';
import { SchemaOrgRecipeParserService } from '../services/SchemaOrgRecipeParserService.js'; import { SchemaOrgRecipeParserService } from '../services/SchemaOrgRecipeParserService.js';
import { HeuristicRecipeParserService } from '../services/HeuristicRecipeParserService.js'; import { HeuristicRecipeParserService } from '../services/HeuristicRecipeParserService.js';
@ -8,6 +8,16 @@ const importUrlSchema = z.object({
url: z.string().url('A valid URL is required'), url: z.string().url('A valid URL is required'),
}); });
function mapImportErrorToStatus(error: UrlImportError): number {
if (error.code === 'IMPORT_TIMEOUT') return 504;
if (error.code === 'IMPORT_NETWORK') return 502;
if (error.code === 'IMPORT_FETCH_FAILED') {
if (error.status !== undefined && error.status >= 500) return 502;
return 400;
}
return 400;
}
export function createImportRoutes(): Router { export function createImportRoutes(): Router {
const router = Router(); const router = Router();
const urlImportService = new UrlImportService(); const urlImportService = new UrlImportService();
@ -50,9 +60,17 @@ export function createImportRoutes(): Router {
return; return;
} }
if (error instanceof UrlImportError) {
res.status(mapImportErrorToStatus(error)).json({
success: false,
data: null,
error: error.message,
});
return;
}
if (error instanceof Error) { if (error instanceof Error) {
const status = error.message.includes('timed out') ? 504 : 400; res.status(500).json({
res.status(status).json({
success: false, success: false,
data: null, data: null,
error: error.message, error: error.message,

View File

@ -4,14 +4,32 @@ export interface UrlImportFetchResult {
json_ld_blocks: string[]; json_ld_blocks: string[];
} }
export type UrlImportErrorCode =
| 'IMPORT_TIMEOUT'
| 'IMPORT_NETWORK'
| 'IMPORT_FETCH_FAILED'
| 'IMPORT_UNSUPPORTED_CONTENT';
export class UrlImportError extends Error {
constructor(
public readonly code: UrlImportErrorCode,
message: string,
public readonly status?: number,
) {
super(message);
this.name = 'UrlImportError';
}
}
/** /**
* Foundation service for importing recipe content from public URLs. * Foundation service for importing recipe content from public URLs.
*/ */
export class UrlImportService { export class UrlImportService {
private static readonly DEFAULT_TIMEOUT_MS = 10000; private static readonly DEFAULT_TIMEOUT_MS = 10000;
private static readonly MAX_RETRIES = 2;
async fetchFromUrl(url: string): Promise<UrlImportFetchResult> { async fetchFromUrl(url: string): Promise<UrlImportFetchResult> {
const html = await this.fetchHtml(url); const html = await this.fetchHtmlWithRetry(url);
const jsonLdBlocks = this.extractJsonLdBlocks(html); const jsonLdBlocks = this.extractJsonLdBlocks(html);
return { return {
@ -21,7 +39,35 @@ export class UrlImportService {
}; };
} }
private async fetchHtml(url: string): Promise<string> { private async fetchHtmlWithRetry(url: string): Promise<string> {
let lastError: UrlImportError | null = null;
for (let attempt = 0; attempt <= UrlImportService.MAX_RETRIES; attempt += 1) {
try {
return await this.fetchHtmlOnce(url);
} catch (error) {
if (error instanceof UrlImportError) {
lastError = error;
// Retry only on transient failures
if (
(error.code === 'IMPORT_TIMEOUT' || error.code === 'IMPORT_NETWORK' || (error.status !== undefined && error.status >= 500)) &&
attempt < UrlImportService.MAX_RETRIES
) {
continue;
}
throw error;
}
throw error;
}
}
throw lastError ?? new UrlImportError('IMPORT_FETCH_FAILED', 'Unable to import this URL right now. Please try again.');
}
private async fetchHtmlOnce(url: string): Promise<string> {
const controller = new AbortController(); const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), UrlImportService.DEFAULT_TIMEOUT_MS); const timeout = setTimeout(() => controller.abort(), UrlImportService.DEFAULT_TIMEOUT_MS);
@ -36,25 +82,42 @@ export class UrlImportService {
}); });
if (!response.ok) { if (!response.ok) {
throw new Error(`Failed to fetch URL: HTTP ${response.status}`); throw new UrlImportError(
'IMPORT_FETCH_FAILED',
`Could not fetch the page (HTTP ${response.status}).`,
response.status,
);
} }
const contentType = response.headers.get('content-type') ?? ''; const contentType = response.headers.get('content-type') ?? '';
if (!contentType.includes('text/html')) { if (!contentType.includes('text/html')) {
throw new Error('URL did not return an HTML document'); throw new UrlImportError(
'IMPORT_UNSUPPORTED_CONTENT',
'The URL did not return an HTML page. Please use a direct recipe page URL.',
);
} }
return await response.text(); return await response.text();
} catch (error) { } catch (error) {
if (error instanceof Error && error.name === 'AbortError') { if (error instanceof UrlImportError) {
throw new Error('Import request timed out while fetching URL');
}
if (error instanceof Error) {
throw error; throw error;
} }
throw new Error('Unknown error while fetching URL'); if (error instanceof Error && error.name === 'AbortError') {
throw new UrlImportError(
'IMPORT_TIMEOUT',
'Import timed out while contacting the recipe page. Please try again.',
);
}
if (error instanceof Error) {
throw new UrlImportError(
'IMPORT_NETWORK',
'Network error while fetching recipe URL. Please try again.',
);
}
throw new UrlImportError('IMPORT_FETCH_FAILED', 'Unknown error while fetching URL');
} finally { } finally {
clearTimeout(timeout); clearTimeout(timeout);
} }

View File

@ -189,6 +189,55 @@ describe('Import API', () => {
expect(response.body.data.draft_recipe).toBeNull(); expect(response.body.data.draft_recipe).toBeNull();
}); });
it('should retry transient fetch failures and eventually succeed', async () => {
const html = '<html><body><h1>Retry Recipe</h1><h2>Ingredients</h2><ul><li>1 egg</li></ul><h2>Instructions</h2><ol><li>Cook it.</li></ol></body></html>';
let callCount = 0;
vi.spyOn(globalThis, 'fetch').mockImplementation(async () => {
callCount += 1;
if (callCount < 3) {
throw new Error('temporary network issue');
}
return {
ok: true,
status: 200,
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
text: async () => html,
} as Response;
});
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/retry-recipe' })
.expect(200);
expect(callCount).toBe(3);
expect(response.body.success).toBe(true);
expect(response.body.data.draft_recipe).toMatchObject({
title: 'Retry Recipe',
ingredients: ['1 egg'],
instructions: ['Cook it.'],
});
});
it('should return timeout-friendly message after retries are exhausted', async () => {
const timeoutError = new Error('aborted');
timeoutError.name = 'AbortError';
vi.spyOn(globalThis, 'fetch').mockRejectedValue(timeoutError);
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/slow-recipe' })
.expect(504);
expect(response.body.success).toBe(false);
expect(response.body.error).toContain('timed out');
});
it('should return an error for non-HTML responses', async () => { it('should return an error for non-HTML responses', async () => {
vi.spyOn(globalThis, 'fetch').mockResolvedValue({ vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true, ok: true,