feat(backend): add URL import endpoint foundation
This commit is contained in:
parent
0653107f62
commit
146dc3325a
2
TODO.md
2
TODO.md
|
|
@ -25,7 +25,7 @@ MVP is functionally complete (core app + docs + tests).
|
||||||
## 🎯 Active Tasks — v1.0 Recipe Import
|
## 🎯 Active Tasks — v1.0 Recipe Import
|
||||||
|
|
||||||
### Phase 1: URL Import Foundation
|
### Phase 1: URL Import Foundation
|
||||||
- [ ] Add backend import endpoint: `POST /api/import/url`
|
- [x] Add backend import endpoint: `POST /api/import/url`
|
||||||
- [ ] Implement Schema.org Recipe JSON-LD parser service
|
- [ ] Implement Schema.org Recipe JSON-LD parser service
|
||||||
- [ ] Normalize parsed recipe into internal Recipe draft format
|
- [ ] Normalize parsed recipe into internal Recipe draft format
|
||||||
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
|
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import express from 'express';
|
||||||
import { getDatabase, saveDatabase } from './db/database.js';
|
import { getDatabase, saveDatabase } from './db/database.js';
|
||||||
import { createRecipeRoutes } from './routes/recipes.js';
|
import { createRecipeRoutes } from './routes/recipes.js';
|
||||||
import { createTagRoutes } from './routes/tags.js';
|
import { createTagRoutes } from './routes/tags.js';
|
||||||
|
import { createImportRoutes } from './routes/import.js';
|
||||||
|
|
||||||
const app = express();
|
const app = express();
|
||||||
const port = 3000;
|
const port = 3000;
|
||||||
|
|
@ -41,6 +42,7 @@ async function startServer() {
|
||||||
// Mount API routes
|
// Mount API routes
|
||||||
app.use('/api/recipes', createRecipeRoutes(db));
|
app.use('/api/recipes', createRecipeRoutes(db));
|
||||||
app.use('/api/tags', createTagRoutes(db));
|
app.use('/api/tags', createTagRoutes(db));
|
||||||
|
app.use('/api/import', createImportRoutes());
|
||||||
|
|
||||||
// Save database periodically (every 5 seconds)
|
// Save database periodically (every 5 seconds)
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
|
|
@ -82,6 +84,8 @@ async function startServer() {
|
||||||
console.log(` GET /api/tags/recipes/:id/tags - Get recipe tags`);
|
console.log(` GET /api/tags/recipes/:id/tags - Get recipe tags`);
|
||||||
console.log(` POST /api/tags/recipes/:id/tags - Assign tag`);
|
console.log(` POST /api/tags/recipes/:id/tags - Assign tag`);
|
||||||
console.log(` DELETE /api/tags/recipes/:id/tags/:id - Remove tag`);
|
console.log(` DELETE /api/tags/recipes/:id/tags/:id - Remove tag`);
|
||||||
|
console.log(` Import:`);
|
||||||
|
console.log(` POST /api/import/url - Import recipe foundation data from URL`);
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to start server:', error);
|
console.error('Failed to start server:', error);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
import { Router } from 'express';
|
||||||
|
import { z } from 'zod';
|
||||||
|
import { UrlImportService } from '../services/UrlImportService.js';
|
||||||
|
|
||||||
|
const importUrlSchema = z.object({
|
||||||
|
url: z.string().url('A valid URL is required'),
|
||||||
|
});
|
||||||
|
|
||||||
|
export function createImportRoutes(): Router {
|
||||||
|
const router = Router();
|
||||||
|
const urlImportService = new UrlImportService();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/import/url
|
||||||
|
* Fetch an external recipe page and return URL import foundation data.
|
||||||
|
*/
|
||||||
|
router.post('/url', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { url } = importUrlSchema.parse(req.body);
|
||||||
|
const result = await urlImportService.fetchFromUrl(url);
|
||||||
|
|
||||||
|
res.status(200).json({
|
||||||
|
success: true,
|
||||||
|
data: result,
|
||||||
|
error: null,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof z.ZodError) {
|
||||||
|
res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
data: null,
|
||||||
|
error: error.errors,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error instanceof Error) {
|
||||||
|
const status = error.message.includes('timed out') ? 504 : 400;
|
||||||
|
res.status(status).json({
|
||||||
|
success: false,
|
||||||
|
data: null,
|
||||||
|
error: error.message,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
data: null,
|
||||||
|
error: 'Internal server error',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return router;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
export interface UrlImportFetchResult {
|
||||||
|
source_url: string;
|
||||||
|
html: string;
|
||||||
|
json_ld_blocks: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Foundation service for importing recipe content from public URLs.
|
||||||
|
*/
|
||||||
|
export class UrlImportService {
|
||||||
|
private static readonly DEFAULT_TIMEOUT_MS = 10000;
|
||||||
|
|
||||||
|
async fetchFromUrl(url: string): Promise<UrlImportFetchResult> {
|
||||||
|
const html = await this.fetchHtml(url);
|
||||||
|
const jsonLdBlocks = this.extractJsonLdBlocks(html);
|
||||||
|
|
||||||
|
return {
|
||||||
|
source_url: url,
|
||||||
|
html,
|
||||||
|
json_ld_blocks: jsonLdBlocks,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async fetchHtml(url: string): Promise<string> {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = setTimeout(() => controller.abort(), UrlImportService.DEFAULT_TIMEOUT_MS);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'GET',
|
||||||
|
signal: controller.signal,
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'RecipeManagerBot/1.0 (+https://recipes.paje.ca)',
|
||||||
|
Accept: 'text/html,application/xhtml+xml',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to fetch URL: HTTP ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const contentType = response.headers.get('content-type') ?? '';
|
||||||
|
if (!contentType.includes('text/html')) {
|
||||||
|
throw new Error('URL did not return an HTML document');
|
||||||
|
}
|
||||||
|
|
||||||
|
return await response.text();
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof Error && error.name === 'AbortError') {
|
||||||
|
throw new Error('Import request timed out while fetching URL');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error instanceof Error) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Unknown error while fetching URL');
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractJsonLdBlocks(html: string): string[] {
|
||||||
|
const scriptTagPattern = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
|
||||||
|
const blocks: string[] = [];
|
||||||
|
|
||||||
|
let match = scriptTagPattern.exec(html);
|
||||||
|
while (match) {
|
||||||
|
const content = match[1]?.trim();
|
||||||
|
if (content) {
|
||||||
|
blocks.push(content);
|
||||||
|
}
|
||||||
|
match = scriptTagPattern.exec(html);
|
||||||
|
}
|
||||||
|
|
||||||
|
return blocks;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||||
|
import express from 'express';
|
||||||
|
import request from 'supertest';
|
||||||
|
import { createImportRoutes } from '../routes/import.js';
|
||||||
|
|
||||||
|
describe('Import API', () => {
|
||||||
|
let app: express.Application;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
app = express();
|
||||||
|
app.use(express.json());
|
||||||
|
app.use('/api/import', createImportRoutes());
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should validate URL request payload', async () => {
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/import/url')
|
||||||
|
.send({ url: 'not-a-url' })
|
||||||
|
.expect(400);
|
||||||
|
|
||||||
|
expect(response.body.success).toBe(false);
|
||||||
|
expect(response.body.error).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return imported foundation data for valid HTML', async () => {
|
||||||
|
const html = `
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<script type="application/ld+json">{"@type":"Recipe","name":"Pancakes"}</script>
|
||||||
|
</head>
|
||||||
|
<body>Hello</body>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
|
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
||||||
|
text: async () => html,
|
||||||
|
} as Response);
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/import/url')
|
||||||
|
.send({ url: 'https://example.com/recipe' })
|
||||||
|
.expect(200);
|
||||||
|
|
||||||
|
expect(response.body.success).toBe(true);
|
||||||
|
expect(response.body.data.source_url).toBe('https://example.com/recipe');
|
||||||
|
expect(response.body.data.json_ld_blocks).toEqual([
|
||||||
|
'{"@type":"Recipe","name":"Pancakes"}',
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return an error for non-HTML responses', async () => {
|
||||||
|
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
status: 200,
|
||||||
|
headers: new Headers({ 'content-type': 'application/json' }),
|
||||||
|
text: async () => '{"ok":true}',
|
||||||
|
} as Response);
|
||||||
|
|
||||||
|
const response = await request(app)
|
||||||
|
.post('/api/import/url')
|
||||||
|
.send({ url: 'https://example.com/data.json' })
|
||||||
|
.expect(400);
|
||||||
|
|
||||||
|
expect(response.body.success).toBe(false);
|
||||||
|
expect(response.body.error).toContain('HTML');
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Reference in New Issue