const fs = require('fs'); const path = require('path'); const yauzl = require('yauzl'); const ABO_TEMPLATE_DIR = path.join(__dirname, '..', 'templates', 'abo'); const COMPARISONS = [ { label: 'DE', previous: path.join(ABO_TEMPLATE_DIR, 'abo-contract-DE.docx'), next: path.join(ABO_TEMPLATE_DIR, 'new', 'abo-contract-DE-NEW.docx'), }, { label: 'SL', previous: path.join(ABO_TEMPLATE_DIR, 'abo-contract-SL.docx'), next: path.join(ABO_TEMPLATE_DIR, 'new', 'abo-contract-SL-NEW.docx'), }, ]; function decodeXml(value) { return String(value || '') .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'"); } function openZip(zipPath) { return new Promise((resolve, reject) => { yauzl.open(zipPath, { lazyEntries: true }, (error, zipFile) => { if (error) { reject(error); return; } resolve(zipFile); }); }); } function readEntry(zipFile, entry) { return new Promise((resolve, reject) => { zipFile.openReadStream(entry, (error, stream) => { if (error) { reject(error); return; } const chunks = []; stream.on('data', (chunk) => chunks.push(chunk)); stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))); stream.on('error', reject); }); }); } async function readWordXmlEntries(zipPath) { const zipFile = await openZip(zipPath); const documents = []; return new Promise((resolve, reject) => { zipFile.readEntry(); zipFile.on('entry', (entry) => { if (!/^word\/(document|header\d+|footer\d+|footnotes|endnotes)\.xml$/i.test(entry.fileName)) { zipFile.readEntry(); return; } readEntry(zipFile, entry) .then((xml) => { documents.push({ fileName: entry.fileName, xml }); zipFile.readEntry(); }) .catch(reject); }); zipFile.on('end', () => { zipFile.close(); resolve(documents); }); zipFile.on('error', reject); }); } function xmlToLines(xml) { const withStructuralBreaks = String(xml || '') .replace(/]*\/>/gi, '\t') .replace(/]*\/>/gi, '\n') .replace(/<\/w:p>/gi, '\n') .replace(/<\/w:tr>/gi, '\n') .replace(/<\/w:tc>/gi, '\t') .replace(/]*>([\s\S]*?)<\/w:t>/gi, (_, value) => decodeXml(value)) .replace(/]*>([\s\S]*?)<\/w:delText>/gi, (_, value) => decodeXml(value)); const textOnly = decodeXml(withStructuralBreaks.replace(/<[^>]+>/g, ' ')); return textOnly .split(/\r?\n/) .map((line) => line.replace(/[ \t]+/g, ' ').trim()) .filter((line) => line.length > 0); } async function extractDocxLines(docxPath) { const entries = await readWordXmlEntries(docxPath); const ordered = entries.sort((left, right) => left.fileName.localeCompare(right.fileName)); return ordered.flatMap((entry) => xmlToLines(entry.xml)); } function buildLcsMatrix(left, right) { const matrix = Array.from({ length: left.length + 1 }, () => Array(right.length + 1).fill(0)); for (let leftIndex = left.length - 1; leftIndex >= 0; leftIndex -= 1) { for (let rightIndex = right.length - 1; rightIndex >= 0; rightIndex -= 1) { if (left[leftIndex] === right[rightIndex]) { matrix[leftIndex][rightIndex] = matrix[leftIndex + 1][rightIndex + 1] + 1; } else { matrix[leftIndex][rightIndex] = Math.max( matrix[leftIndex + 1][rightIndex], matrix[leftIndex][rightIndex + 1], ); } } } return matrix; } function diffLines(previousLines, nextLines) { const matrix = buildLcsMatrix(previousLines, nextLines); const changes = []; let previousIndex = 0; let nextIndex = 0; while (previousIndex < previousLines.length && nextIndex < nextLines.length) { if (previousLines[previousIndex] === nextLines[nextIndex]) { previousIndex += 1; nextIndex += 1; continue; } if (matrix[previousIndex + 1][nextIndex] >= matrix[previousIndex][nextIndex + 1]) { changes.push({ type: 'removed', line: previousIndex + 1, text: previousLines[previousIndex] }); previousIndex += 1; } else { changes.push({ type: 'added', line: nextIndex + 1, text: nextLines[nextIndex] }); nextIndex += 1; } } while (previousIndex < previousLines.length) { changes.push({ type: 'removed', line: previousIndex + 1, text: previousLines[previousIndex] }); previousIndex += 1; } while (nextIndex < nextLines.length) { changes.push({ type: 'added', line: nextIndex + 1, text: nextLines[nextIndex] }); nextIndex += 1; } return changes; } async function comparePair({ label, previous, next }) { const previousExists = fs.existsSync(previous); const nextExists = fs.existsSync(next); if (!previousExists || !nextExists) { throw new Error(`${label}: missing input file(s)`); } const [previousLines, nextLines] = await Promise.all([ extractDocxLines(previous), extractDocxLines(next), ]); const changes = diffLines(previousLines, nextLines); return { label, previous, next, previousLineCount: previousLines.length, nextLineCount: nextLines.length, changes, }; } function printComparison(result) { console.log(`\n=== ${result.label} ===`); console.log(`Old: ${path.basename(result.previous)} (${result.previousLineCount} lines)`); console.log(`New: ${path.basename(result.next)} (${result.nextLineCount} lines)`); if (!result.changes.length) { console.log('No textual differences detected.'); return; } console.log(`Detected ${result.changes.length} text-level changes:`); result.changes.forEach((change) => { const marker = change.type === 'added' ? '+' : '-'; console.log(`${marker} [${change.line}] ${change.text}`); }); } async function main() { const results = []; for (const comparison of COMPARISONS) { results.push(await comparePair(comparison)); } results.forEach(printComparison); } main().catch((error) => { console.error('[compareAboContractDocxVersions] failed:', error?.message || error); process.exitCode = 1; });