Compare commits

...

4 Commits

Author SHA1 Message Date
Paul Huliganga 37b17f7284 chore(harness): add mandatory pre-flight guard for working directory
- Require absolute-path existence check for AGENT_INSTRUCTIONS.md and TODO.md
- Fail fast with STUCK if harness files missing
- Prevent repeated bad-working-dir iterations
2026-03-24 21:50:33 -04:00
Paul Huliganga 3987083e46 feat(import): normalize parsed recipe into internal draft format 2026-03-24 21:47:27 -04:00
Paul Huliganga 64a1229ff0 feat(import): implement Schema.org Recipe JSON-LD parser and draft normalization for import endpoint 2026-03-24 20:37:46 -04:00
Paul Huliganga 146dc3325a feat(backend): add URL import endpoint foundation 2026-03-24 20:25:35 -04:00
7 changed files with 417 additions and 3 deletions

View File

@ -9,6 +9,15 @@
### 1. Orientation First ### 1. Orientation First
Every time you wake up (new session): Every time you wake up (new session):
**PRE-FLIGHT GUARD (mandatory):**
- Verify these files exist at absolute path `/home/paulh/.openclaw/workspace/projects/recipe-manager/`:
- `AGENT_INSTRUCTIONS.md`
- `TODO.md`
- If either is missing, stop immediately and report:
- `STUCK: bad working dir or missing harness files at /home/paulh/.openclaw/workspace/projects/recipe-manager`
Then continue orientation:
1. Read `PROJECT.md` — What are we building? 1. Read `PROJECT.md` — What are we building?
2. Read `ARCHITECTURE.md` — How are we building it? 2. Read `ARCHITECTURE.md` — How are we building it?
3. Read `ROADMAP.md` — What's next? 3. Read `ROADMAP.md` — What's next?

View File

@ -25,9 +25,9 @@ MVP is functionally complete (core app + docs + tests).
## 🎯 Active Tasks — v1.0 Recipe Import ## 🎯 Active Tasks — v1.0 Recipe Import
### Phase 1: URL Import Foundation ### Phase 1: URL Import Foundation
- [ ] Add backend import endpoint: `POST /api/import/url` - [x] Add backend import endpoint: `POST /api/import/url`
- [ ] Implement Schema.org Recipe JSON-LD parser service - [x] Implement Schema.org Recipe JSON-LD parser service
- [ ] Normalize parsed recipe into internal Recipe draft format - [x] Normalize parsed recipe into internal Recipe draft format
- [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD) - [ ] Add import endpoint tests (valid recipe page, non-recipe page, malformed JSON-LD)
### Phase 2: Import UI ### Phase 2: Import UI

View File

@ -2,6 +2,7 @@ import express from 'express';
import { getDatabase, saveDatabase } from './db/database.js'; import { getDatabase, saveDatabase } from './db/database.js';
import { createRecipeRoutes } from './routes/recipes.js'; import { createRecipeRoutes } from './routes/recipes.js';
import { createTagRoutes } from './routes/tags.js'; import { createTagRoutes } from './routes/tags.js';
import { createImportRoutes } from './routes/import.js';
const app = express(); const app = express();
const port = 3000; const port = 3000;
@ -41,6 +42,7 @@ async function startServer() {
// Mount API routes // Mount API routes
app.use('/api/recipes', createRecipeRoutes(db)); app.use('/api/recipes', createRecipeRoutes(db));
app.use('/api/tags', createTagRoutes(db)); app.use('/api/tags', createTagRoutes(db));
app.use('/api/import', createImportRoutes());
// Save database periodically (every 5 seconds) // Save database periodically (every 5 seconds)
setInterval(() => { setInterval(() => {
@ -82,6 +84,8 @@ async function startServer() {
console.log(` GET /api/tags/recipes/:id/tags - Get recipe tags`); console.log(` GET /api/tags/recipes/:id/tags - Get recipe tags`);
console.log(` POST /api/tags/recipes/:id/tags - Assign tag`); console.log(` POST /api/tags/recipes/:id/tags - Assign tag`);
console.log(` DELETE /api/tags/recipes/:id/tags/:id - Remove tag`); console.log(` DELETE /api/tags/recipes/:id/tags/:id - Remove tag`);
console.log(` Import:`);
console.log(` POST /api/import/url - Import recipe foundation data from URL`);
}); });
} catch (error) { } catch (error) {
console.error('Failed to start server:', error); console.error('Failed to start server:', error);

View File

@ -0,0 +1,65 @@
import { Router } from 'express';
import { z } from 'zod';
import { UrlImportService } from '../services/UrlImportService.js';
import { SchemaOrgRecipeParserService } from '../services/SchemaOrgRecipeParserService.js';
const importUrlSchema = z.object({
url: z.string().url('A valid URL is required'),
});
export function createImportRoutes(): Router {
const router = Router();
const urlImportService = new UrlImportService();
const schemaOrgParser = new SchemaOrgRecipeParserService();
/**
* POST /api/import/url
* Fetch an external recipe page and return imported, normalized Recipe (if found)
*/
router.post('/url', async (req, res) => {
try {
const { url } = importUrlSchema.parse(req.body);
const result = await urlImportService.fetchFromUrl(url);
// Try to parse and normalize Recipe from JSON-LD blocks
let draft: any = null;
for (const block of result.json_ld_blocks) {
draft = schemaOrgParser.parseJsonLdBlock(block);
if (draft) break;
}
res.status(200).json({
success: true,
data: { ...result, draft_recipe: draft },
error: null,
});
} catch (error) {
if (error instanceof z.ZodError) {
res.status(400).json({
success: false,
data: null,
error: error.errors,
});
return;
}
if (error instanceof Error) {
const status = error.message.includes('timed out') ? 504 : 400;
res.status(status).json({
success: false,
data: null,
error: error.message,
});
return;
}
res.status(500).json({
success: false,
data: null,
error: 'Internal server error',
});
}
});
return router;
}

View File

@ -0,0 +1,123 @@
import { z } from 'zod';
import type { CreateRecipeInput } from '../types/recipe.js';
interface SchemaOrgHowToStep {
text?: string;
}
interface SchemaOrgRecipeCandidate {
'@type'?: string | string[];
name?: string;
description?: string | null;
recipeIngredient?: string[];
recipeInstructions?: string | string[] | SchemaOrgHowToStep[];
url?: string;
recipeYield?: string | number;
prepTime?: string;
cookTime?: string;
}
/**
* Parses and normalizes Schema.org Recipe JSON-LD blocks.
*/
export class SchemaOrgRecipeParserService {
/**
* Extracts and normalizes a Recipe, if present, from a JSON-LD string.
* Returns null if no valid Recipe is found.
*/
parseJsonLdBlock(json: string): CreateRecipeInput | null {
let parsedJson: unknown;
try {
parsedJson = JSON.parse(json);
} catch {
return null;
}
if (Array.isArray(parsedJson)) {
for (const entry of parsedJson) {
const parsedRecipe = this.tryParseRecipe(entry);
if (parsedRecipe) return parsedRecipe;
}
return null;
}
return this.tryParseRecipe(parsedJson);
}
/**
* Internal: attempts to extract Recipe data from an object if @type matches.
*/
private tryParseRecipe(input: unknown): CreateRecipeInput | null {
const recipeSchema = z.object({
'@type': z.union([z.string(), z.array(z.string())]).optional(),
name: z.string().min(1),
description: z.string().optional().nullable(),
recipeIngredient: z.array(z.string()).optional(),
recipeInstructions: z
.union([
z.array(z.string()),
z.string(),
z.array(z.object({ text: z.string().optional() })),
])
.optional(),
url: z.string().optional(),
recipeYield: z.union([z.string(), z.number()]).optional(),
prepTime: z.string().optional(),
cookTime: z.string().optional(),
});
const parseResult = recipeSchema.safeParse(input);
if (!parseResult.success) return null;
const recipe = parseResult.data as SchemaOrgRecipeCandidate;
if (!this.isRecipeType(recipe['@type'])) {
return null;
}
return {
title: recipe.name!.trim(),
description: this.normalizeOptionalText(recipe.description),
ingredients: this.normalizeTextList(recipe.recipeIngredient ?? []),
instructions: this.normalizeInstructions(recipe.recipeInstructions),
source_url: this.normalizeOptionalText(recipe.url),
};
}
private isRecipeType(type: string | string[] | undefined): boolean {
if (!type) return false;
if (typeof type === 'string') return type === 'Recipe';
return type.includes('Recipe');
}
private normalizeOptionalText(value: string | null | undefined): string | undefined {
if (!value) return undefined;
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : undefined;
}
private normalizeTextList(values: string[]): string[] {
return values
.map((value) => value.trim())
.filter((value) => value.length > 0);
}
private normalizeInstructions(
instructions: string | string[] | SchemaOrgHowToStep[] | undefined,
): string[] {
if (!instructions) return [];
if (typeof instructions === 'string') {
return this.normalizeTextList([instructions]);
}
if (instructions.length === 0) {
return [];
}
if (typeof instructions[0] === 'string') {
return this.normalizeTextList(instructions as string[]);
}
return this.normalizeTextList((instructions as SchemaOrgHowToStep[]).map((step) => step.text ?? ''));
}
}

View File

@ -0,0 +1,78 @@
export interface UrlImportFetchResult {
source_url: string;
html: string;
json_ld_blocks: string[];
}
/**
* Foundation service for importing recipe content from public URLs.
*/
export class UrlImportService {
private static readonly DEFAULT_TIMEOUT_MS = 10000;
async fetchFromUrl(url: string): Promise<UrlImportFetchResult> {
const html = await this.fetchHtml(url);
const jsonLdBlocks = this.extractJsonLdBlocks(html);
return {
source_url: url,
html,
json_ld_blocks: jsonLdBlocks,
};
}
private async fetchHtml(url: string): Promise<string> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), UrlImportService.DEFAULT_TIMEOUT_MS);
try {
const response = await fetch(url, {
method: 'GET',
signal: controller.signal,
headers: {
'User-Agent': 'RecipeManagerBot/1.0 (+https://recipes.paje.ca)',
Accept: 'text/html,application/xhtml+xml',
},
});
if (!response.ok) {
throw new Error(`Failed to fetch URL: HTTP ${response.status}`);
}
const contentType = response.headers.get('content-type') ?? '';
if (!contentType.includes('text/html')) {
throw new Error('URL did not return an HTML document');
}
return await response.text();
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
throw new Error('Import request timed out while fetching URL');
}
if (error instanceof Error) {
throw error;
}
throw new Error('Unknown error while fetching URL');
} finally {
clearTimeout(timeout);
}
}
private extractJsonLdBlocks(html: string): string[] {
const scriptTagPattern = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
const blocks: string[] = [];
let match = scriptTagPattern.exec(html);
while (match) {
const content = match[1]?.trim();
if (content) {
blocks.push(content);
}
match = scriptTagPattern.exec(html);
}
return blocks;
}
}

View File

@ -0,0 +1,135 @@
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import express from 'express';
import request from 'supertest';
import { createImportRoutes } from '../routes/import.js';
describe('Import API', () => {
let app: express.Application;
beforeEach(() => {
app = express();
app.use(express.json());
app.use('/api/import', createImportRoutes());
});
afterEach(() => {
vi.restoreAllMocks();
});
it('should validate URL request payload', async () => {
const response = await request(app)
.post('/api/import/url')
.send({ url: 'not-a-url' })
.expect(400);
expect(response.body.success).toBe(false);
expect(response.body.error).toBeDefined();
});
it('should return imported foundation data and normalized draft for valid Schema.org recipe', async () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}</script>
</head>
<body>Hello</body>
</html>
`;
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
status: 200,
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
text: async () => html,
} as Response);
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/recipe' })
.expect(200);
expect(response.body.success).toBe(true);
expect(response.body.data.source_url).toBe('https://example.com/recipe');
expect(response.body.data.json_ld_blocks).toEqual([
'{"@type":"Recipe","name":"Pancakes","recipeIngredient":["Flour","Eggs"],"recipeInstructions":["Mix","Cook"]}'
]);
expect(response.body.data.draft_recipe).toMatchObject({
title: 'Pancakes',
ingredients: ['Flour', 'Eggs'],
instructions: ['Mix', 'Cook']
});
});
it('should normalize whitespace and HowToStep instructions into draft format', async () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"@type":["Thing","Recipe"],"name":" Tomato Soup ","description":" Cozy weeknight soup. ","recipeIngredient":[" Tomato ",""," Salt "],"recipeInstructions":[{"text":" Simmer tomatoes. "},{"text":" Blend and serve. "}],"url":" https://example.com/soup "}</script>
</head>
</html>
`;
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
status: 200,
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
text: async () => html,
} as Response);
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/soup-page' })
.expect(200);
expect(response.body.success).toBe(true);
expect(response.body.data.draft_recipe).toEqual({
title: 'Tomato Soup',
description: 'Cozy weeknight soup.',
ingredients: ['Tomato', 'Salt'],
instructions: ['Simmer tomatoes.', 'Blend and serve.'],
source_url: 'https://example.com/soup'
});
});
it('should return draft_recipe as null for non-recipe JSON-LD', async () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"@type":"Event","name":"Not a Recipe"}</script>
</head>
</html>
`;
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
status: 200,
headers: new Headers({ 'content-type': 'text/html; charset=utf-8' }),
text: async () => html,
} as Response);
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/event' })
.expect(200);
expect(response.body.success).toBe(true);
expect(response.body.data.draft_recipe).toBeNull();
});
it('should return an error for non-HTML responses', async () => {
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
ok: true,
status: 200,
headers: new Headers({ 'content-type': 'application/json' }),
text: async () => '{"ok":true}',
} as Response);
const response = await request(app)
.post('/api/import/url')
.send({ url: 'https://example.com/data.json' })
.expect(400);
expect(response.body.success).toBe(false);
expect(response.body.error).toContain('HTML');
});
});