48 lines
1.7 KiB
JavaScript
48 lines
1.7 KiB
JavaScript
// Direct parser test without server
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const htmlPath = 'data/exports/Copy_Me_That_HTML_20260328_58775_z1p5lpjsgz/recipes.html';
|
|
const html = fs.readFileSync(htmlPath, 'utf-8');
|
|
|
|
console.log('HTML file size:', html.length, 'bytes');
|
|
|
|
// Test basic extraction
|
|
const recipeRegex = /<div\s+class\s*=\s*["']recipe["'][^>]*>([\s\S]*?)(?=<div\s+class\s*=\s*["']recipe["']|<\/body>|$)/gi;
|
|
const matches = [];
|
|
let match;
|
|
while ((match = recipeRegex.exec(html)) !== null) {
|
|
matches.push(match[0]);
|
|
}
|
|
|
|
console.log(`Found ${matches.length} recipe blocks\n`);
|
|
|
|
if (matches.length > 0) {
|
|
const firstRecipe = matches[0];
|
|
console.log('First recipe block length:', firstRecipe.length);
|
|
|
|
// Test title extraction
|
|
const titleMatch = /<div\s+id\s*=\s*["']name["'][^>]*>([\s\S]*?)<\/div>/i.exec(firstRecipe);
|
|
console.log('Title match:', titleMatch ? titleMatch[1].trim() : 'NO MATCH');
|
|
|
|
// Test ingredients
|
|
const ingRegex = /<li\s+class\s*=\s*["']recipeIngredient["'][^>]*>([\s\S]*?)<\/li>/gi;
|
|
const ingredients = [];
|
|
let ing;
|
|
while ((ing = ingRegex.exec(firstRecipe)) !== null) {
|
|
ingredients.push(ing[1].trim());
|
|
}
|
|
console.log('Ingredients found:', ingredients.length);
|
|
if (ingredients.length > 0) console.log('First ingredient:', ingredients[0]);
|
|
|
|
// Test instructions
|
|
const instRegex = /<li\s+class\s*=\s*["']instruction["'][^>]*>([\s\S]*?)<\/li>/gi;
|
|
const instructions = [];
|
|
let inst;
|
|
while ((inst = instRegex.exec(firstRecipe)) !== null) {
|
|
instructions.push(inst[1].trim());
|
|
}
|
|
console.log('Instructions found:', instructions.length);
|
|
if (instructions.length > 0) console.log('First instruction:', instructions[0].substring(0, 100));
|
|
}
|