thingamablog-v2/backend/hsqldbParser.js

80 lines
2.5 KiB
JavaScript

const fs = require('fs');
const path = require('path');
function parseHSQLDB(dbDir) {
const scriptPath = path.join(dbDir, 'database.script');
const dataPath = path.join(dbDir, 'database.data');
if (!fs.existsSync(scriptPath) || !fs.existsSync(dataPath)) {
throw new Error('Missing HSQLDB files (database.script or database.data)');
}
const dataBuffer = fs.readFileSync(dataPath);
const script = fs.readFileSync(scriptPath, 'utf-8');
// Parse table structure from script
const entryTableMatch = script.match(
/CREATE CACHED TABLE (ENTRY_TABLE_\d+)\((.*?)\)/
);
if (!entryTableMatch) throw new Error('Could not find ENTRY_TABLE in script');
const tableName = entryTableMatch[1];
const tableDefStr = entryTableMatch[2];
const columns = tableDefStr
.split(',')
.map(col => col.trim().split(/\s+/)[0]);
// Parse INDEX line (note: no space between INDEX and ')
const indexMatch = script.match(
new RegExp(`SET TABLE ${tableName} INDEX'(.*?)'`)
);
if (!indexMatch) throw new Error('Could not find INDEX for ENTRY_TABLE');
const [pageIndex, rowCount] = indexMatch[1].split(' ').map(Number);
// HSQLDB binary format parsing (simplified for common structure)
// This is a basic extraction—HSQLDB uses a specific binary format
const entries = [];
const entries_raw = extractEntriesFromBinary(dataBuffer, pageIndex, rowCount);
for (const raw of entries_raw) {
if (raw && raw.length >= columns.length) {
const entry = {};
columns.forEach((col, idx) => {
entry[col] = raw[idx];
});
entries.push(entry);
}
}
return entries;
}
function extractEntriesFromBinary(buffer, pageIndex, rowCount) {
// HSQLDB stores data in a binary format with page offsets
// This is a simplified extraction—looking for text patterns in the binary data
const entries = [];
const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true });
// Convert buffer to text, extract null-terminated strings
const bufStr = textDecoder.decode(buffer);
const nullSeparated = bufStr.split('\0');
// Group entries by looking for patterns (ID, timestamp, title, categories, entry, draft, modified, author)
let currentEntry = [];
for (const part of nullSeparated) {
if (part.trim()) {
currentEntry.push(part.trim());
// Rough heuristic: HSQLDB entries typically have ~8 fields
if (currentEntry.length === 8) {
entries.push([...currentEntry]);
currentEntry = [];
}
}
}
return entries;
}
module.exports = { parseHSQLDB };