Compare commits
4 Commits
0653107f62
...
37b17f7284
| Author | SHA1 | Date |
|---|---|---|
|
|
37b17f7284 | |
|
|
3987083e46 | |
|
|
64a1229ff0 | |
|
|
146dc3325a |
|
|
@ -9,6 +9,15 @@
|
|||
|
||||
### 1. Orientation First
|
||||
Every time you wake up (new session):
|
||||
|
||||
**PRE-FLIGHT GUARD (mandatory):**
|
||||
- Verify these files exist at absolute path `/home/paulh/.openclaw/workspace/projects/recipe-manager/`:
|
||||
- `AGENT_INSTRUCTIONS.md`
|
||||
- `TODO.md`
|
||||
- If either is missing, stop immediately and report:
|
||||
- `STUCK: bad working dir or missing harness files at /home/paulh/.openclaw/workspace/projects/recipe-manager`
|
||||
|
||||
Then continue orientation:
|
||||
1. Read `PROJECT.md` — What are we building?
|
||||
2. Read `ARCHITECTURE.md` — How are we building it?
|
||||
3. Read `ROADMAP.md` — What's next?
|
||||
|
|
|
|||
6
TODO.md
6
TODO.md
|
|
@ -25,9 +25,9 @@ MVP is functionally complete (core app + docs + tests).
|
|||
## 🎯 Active Tasks — v1.0 Recipe Import
|
||||
|
||||
### Phase 1: URL Import Foundation
|
||||
- [ ] Add backend import endpoint: `POST /api/import/url`
|
||||
- [ ] Implement Schema.org Recipe JSON-LD parser service
|
||||
- [ ] Normalize parsed recipe into internal Recipe draft format
|
||||
- [x] Add backend import endpoint: `POST /api/import/url`
|
||||
- [x] Implement Schema.org Recipe JSON-LD parser service
|
||||
- [x] Normalize parsed recipe into internal Recipe draft format
|
||||
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
|
||||
|
||||
### Phase 2: Import UI
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import express from 'express';
|
|||
import { getDatabase, saveDatabase } from './db/database.js';
|
||||
import { createRecipeRoutes } from './routes/recipes.js';
|
||||
import { createTagRoutes } from './routes/tags.js';
|
||||
import { createImportRoutes } from './routes/import.js';
|
||||
|
||||
const app = express();
|
||||
const port = 3000;
|
||||
|
|
@ -41,6 +42,7 @@ async function startServer() {
|
|||
// Mount API routes
|
||||
app.use('/api/recipes', createRecipeRoutes(db));
|
||||
app.use('/api/tags', createTagRoutes(db));
|
||||
app.use('/api/import', createImportRoutes());
|
||||
|
||||
// Save database periodically (every 5 seconds)
|
||||
setInterval(() => {
|
||||
|
|
@ -82,6 +84,8 @@ async function startServer() {
|
|||
console.log(` GET /api/tags/recipes/:id/tags - Get recipe tags`);
|
||||
console.log(` POST /api/tags/recipes/:id/tags - Assign tag`);
|
||||
console.log(` DELETE /api/tags/recipes/:id/tags/:id - Remove tag`);
|
||||
console.log(` Import:`);
|
||||
console.log(` POST /api/import/url - Import recipe foundation data from URL`);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Failed to start server:', error);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
import { Router } from 'express';
|
||||
import { z } from 'zod';
|
||||
import { UrlImportService } from '../services/UrlImportService.js';
|
||||
import { SchemaOrgRecipeParserService } from '../services/SchemaOrgRecipeParserService.js';
|
||||
|
||||
const importUrlSchema = z.object({
|
||||
url: z.string().url('A valid URL is required'),
|
||||
});
|
||||
|
||||
export function createImportRoutes(): Router {
|
||||
const router = Router();
|
||||
const urlImportService = new UrlImportService();
|
||||
const schemaOrgParser = new SchemaOrgRecipeParserService();
|
||||
|
||||
/**
|
||||
* POST /api/import/url
|
||||
* Fetch an external recipe page and return imported, normalized Recipe (if found)
|
||||
*/
|
||||
router.post('/url', async (req, res) => {
|
||||
try {
|
||||
const { url } = importUrlSchema.parse(req.body);
|
||||
const result = await urlImportService.fetchFromUrl(url);
|
||||
|
||||
// Try to parse and normalize Recipe from JSON-LD blocks
|
||||
let draft: any = null;
|
||||
for (const block of result.json_ld_blocks) {
|
||||
draft = schemaOrgParser.parseJsonLdBlock(block);
|
||||
if (draft) break;
|
||||
}
|
||||
|
||||
res.status(200).json({
|
||||
success: true,
|
||||
data: { ...result, draft_recipe: draft },
|
||||
error: null,
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
data: null,
|
||||
error: error.errors,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (error instanceof Error) {
|
||||
const status = error.message.includes('timed out') ? 504 : 400;
|
||||
res.status(status).json({
|
||||
success: false,
|
||||
data: null,
|
||||
error: error.message,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
data: null,
|
||||
error: 'Internal server error',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
import { z } from 'zod';
|
||||
import type { CreateRecipeInput } from '../types/recipe.js';
|
||||
|
||||
interface SchemaOrgHowToStep {
|
||||
text?: string;
|
||||
}
|
||||
|
||||
interface SchemaOrgRecipeCandidate {
|
||||
'@type'?: string | string[];
|
||||
name?: string;
|
||||
description?: string | null;
|
||||
recipeIngredient?: string[];
|
||||
recipeInstructions?: string | string[] | SchemaOrgHowToStep[];
|
||||
url?: string;
|
||||
recipeYield?: string | number;
|
||||
prepTime?: string;
|
||||
cookTime?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses and normalizes Schema.org Recipe JSON-LD blocks.
|
||||
*/
|
||||
export class SchemaOrgRecipeParserService {
|
||||
/**
|
||||
* Extracts and normalizes a Recipe, if present, from a JSON-LD string.
|
||||
* Returns null if no valid Recipe is found.
|
||||
*/
|
||||
parseJsonLdBlock(json: string): CreateRecipeInput | null {
|
||||
let parsedJson: unknown;
|
||||
try {
|
||||
parsedJson = JSON.parse(json);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Array.isArray(parsedJson)) {
|
||||
for (const entry of parsedJson) {
|
||||
const parsedRecipe = this.tryParseRecipe(entry);
|
||||
if (parsedRecipe) return parsedRecipe;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
return this.tryParseRecipe(parsedJson);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal: attempts to extract Recipe data from an object if @type matches.
|
||||
*/
|
||||
private tryParseRecipe(input: unknown): CreateRecipeInput | null {
|
||||
const recipeSchema = z.object({
|
||||
'@type': z.union([z.string(), z.array(z.string())]).optional(),
|
||||
name: z.string().min(1),
|
||||
description: z.string().optional().nullable(),
|
||||
recipeIngredient: z.array(z.string()).optional(),
|
||||
recipeInstructions: z
|
||||
.union([
|
||||
z.array(z.string()),
|
||||
z.string(),
|
||||
z.array(z.object({ text: z.string().optional() })),
|
||||
])
|
||||
.optional(),
|
||||
url: z.string().optional(),
|
||||
recipeYield: z.union([z.string(), z.number()]).optional(),
|
||||
prepTime: z.string().optional(),
|
||||
cookTime: z.string().optional(),
|
||||
});
|
||||
|
||||
const parseResult = recipeSchema.safeParse(input);
|
||||
if (!parseResult.success) return null;
|
||||
|
||||
const recipe = parseResult.data as SchemaOrgRecipeCandidate;
|
||||
if (!this.isRecipeType(recipe['@type'])) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
title: recipe.name!.trim(),
|
||||
description: this.normalizeOptionalText(recipe.description),
|
||||
ingredients: this.normalizeTextList(recipe.recipeIngredient ?? []),
|
||||
instructions: this.normalizeInstructions(recipe.recipeInstructions),
|
||||
source_url: this.normalizeOptionalText(recipe.url),
|
||||
};
|
||||
}
|
||||
|
||||
private isRecipeType(type: string | string[] | undefined): boolean {
|
||||
if (!type) return false;
|
||||
if (typeof type === 'string') return type === 'Recipe';
|
||||
return type.includes('Recipe');
|
||||
}
|
||||
|
||||
private normalizeOptionalText(value: string | null | undefined): string | undefined {
|
||||
if (!value) return undefined;
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
|
||||
private normalizeTextList(values: string[]): string[] {
|
||||
return values
|
||||
.map((value) => value.trim())
|
||||
.filter((value) => value.length > 0);
|
||||
}
|
||||
|
||||
private normalizeInstructions(
|
||||
instructions: string | string[] | SchemaOrgHowToStep[] | undefined,
|
||||
): string[] {
|
||||
if (!instructions) return [];
|
||||
|
||||
if (typeof instructions === 'string') {
|
||||
return this.normalizeTextList([instructions]);
|
||||
}
|
||||
|
||||
if (instructions.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (typeof instructions[0] === 'string') {
|
||||
return this.normalizeTextList(instructions as string[]);
|
||||
}
|
||||
|
||||
return this.normalizeTextList((instructions as SchemaOrgHowToStep[]).map((step) => step.text ?? ''));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
export interface UrlImportFetchResult {
|
||||
source_url: string;
|
||||
html: string;
|
||||
json_ld_blocks: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Foundation service for importing recipe content from public URLs.
|
||||
*/
|
||||
export class UrlImportService {
|
||||
private static readonly DEFAULT_TIMEOUT_MS = 10000;
|
||||
|
||||
async fetchFromUrl(url: string): Promise<UrlImportFetchResult> {
|
||||
const html = await this.fetchHtml(url);
|
||||
const jsonLdBlocks = this.extractJsonLdBlocks(html);
|
||||
|
||||
return {
|
||||
source_url: url,
|
||||
html,
|
||||
json_ld_blocks: jsonLdBlocks,
|
||||
};
|
||||
}
|
||||
|
||||
private async fetchHtml(url: string): Promise<string> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), UrlImportService.DEFAULT_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'User-Agent': 'RecipeManagerBot/1.0 (+https://recipes.paje.ca)',
|
||||
Accept: 'text/html,application/xhtml+xml',
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch URL: HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type') ?? '';
|
||||
if (!contentType.includes('text/html')) {
|
||||
throw new Error('URL did not return an HTML document');
|
||||
}
|
||||
|
||||
return await response.text();
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'AbortError') {
|
||||
throw new Error('Import request timed out while fetching URL');
|
||||
}
|
||||
|
||||
if (error instanceof Error) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
throw new Error('Unknown error while fetching URL');
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
private extractJsonLdBlocks(html: string): string[] {
|
||||
const scriptTagPattern = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
|
||||
const blocks: string[] = [];
|
||||
|
||||
let match = scriptTagPattern.exec(html);
|
||||
while (match) {
|
||||
const content = match[1]?.trim();
|
||||
if (content) {
|
||||
blocks.push(content);
|
||||
}
|
||||
match = scriptTagPattern.exec(html);
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import express from 'express';
|
||||
import request from 'supertest';
|
||||
import { createImportRoutes } from '../routes/import.js';
|
||||
|
||||
describe('Import API', () => {
|
||||
let app: express.Application;
|
||||
|
||||
beforeEach(() => {
|
||||
app = express();
|
||||
app.use(express.json());
|
||||
app.use('/api/import', createImportRoutes());
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('should validate URL request payload', async () => {
|
||||
const response = await request(app)
|
||||
.post('/api/import/url')
|
||||
.send({ url: 'not-a-url' })
|
||||
.expect(400);
|
||||
|
||||
expect(response.body.success).toBe(false);
|
||||
expect(response.body.error).toBeDefined();
|
||||
});
|
||||
|
||||
it('should return imported foundation data and normalized draft for valid Schema.org recipe', async () => {
|
||||
const html = `
|
||||
<html>
|
||||
<head>
|
||||
<script type="application/ld+json">{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}</script>
|
||||
</head>
|
||||
<body>Hello</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
||||
text: async () => html,
|
||||
} as Response);
|
||||
|
||||
const response = await request(app)
|
||||
.post('/api/import/url')
|
||||
.send({ url: 'https://example.com/recipe' })
|
||||
.expect(200);
|
||||
|
||||
expect(response.body.success).toBe(true);
|
||||
expect(response.body.data.source_url).toBe('https://example.com/recipe');
|
||||
expect(response.body.data.json_ld_blocks).toEqual([
|
||||
'{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}'
|
||||
]);
|
||||
expect(response.body.data.draft_recipe).toMatchObject({
|
||||
title: 'Pancakes',
|
||||
ingredients: ['Flour', 'Eggs'],
|
||||
instructions: ['Mix', 'Cook']
|
||||
});
|
||||
});
|
||||
|
||||
it('should normalize whitespace and HowToStep instructions into draft format', async () => {
|
||||
const html = `
|
||||
<html>
|
||||
<head>
|
||||
<script type="application/ld+json">{"@type":["Thing","Recipe"],"name":" Tomato Soup ","description":" Cozy weeknight soup. ","recipeIngredient":[" Tomato ",""," Salt "],"recipeInstructions":[{"text":" Simmer tomatoes. "},{"text":" Blend and serve. "}],"url":" https://example.com/soup "}</script>
|
||||
</head>
|
||||
</html>
|
||||
`;
|
||||
|
||||
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
||||
text: async () => html,
|
||||
} as Response);
|
||||
|
||||
const response = await request(app)
|
||||
.post('/api/import/url')
|
||||
.send({ url: 'https://example.com/soup-page' })
|
||||
.expect(200);
|
||||
|
||||
expect(response.body.success).toBe(true);
|
||||
expect(response.body.data.draft_recipe).toEqual({
|
||||
title: 'Tomato Soup',
|
||||
description: 'Cozy weeknight soup.',
|
||||
ingredients: ['Tomato', 'Salt'],
|
||||
instructions: ['Simmer tomatoes.', 'Blend and serve.'],
|
||||
source_url: 'https://example.com/soup'
|
||||
});
|
||||
});
|
||||
|
||||
it('should return draft_recipe as null for non-recipe JSON-LD', async () => {
|
||||
const html = `
|
||||
<html>
|
||||
<head>
|
||||
<script type="application/ld+json">{"@type":"Event","name":"Not a Recipe"}</script>
|
||||
</head>
|
||||
</html>
|
||||
`;
|
||||
|
||||
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
|
||||
text: async () => html,
|
||||
} as Response);
|
||||
|
||||
const response = await request(app)
|
||||
.post('/api/import/url')
|
||||
.send({ url: 'https://example.com/event' })
|
||||
.expect(200);
|
||||
|
||||
expect(response.body.success).toBe(true);
|
||||
expect(response.body.data.draft_recipe).toBeNull();
|
||||
});
|
||||
|
||||
it('should return an error for non-HTML responses', async () => {
|
||||
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'application/json' }),
|
||||
text: async () => '{"ok":true}',
|
||||
} as Response);
|
||||
|
||||
const response = await request(app)
|
||||
.post('/api/import/url')
|
||||
.send({ url: 'https://example.com/data.json' })
|
||||
.expect(400);
|
||||
|
||||
expect(response.body.success).toBe(false);
|
||||
expect(response.body.error).toContain('HTML');
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue