From bb9e26ab077849ede004b40caf9832c51827880c Mon Sep 17 00:00:00 2001 From: Andras Serfozo Date: Thu, 16 Apr 2026 03:14:32 +0100 Subject: [PATCH 1/5] basic pdf generation --- scripts/cv-pdf.ts | 217 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 scripts/cv-pdf.ts diff --git a/scripts/cv-pdf.ts b/scripts/cv-pdf.ts new file mode 100644 index 0000000..83c5a95 --- /dev/null +++ b/scripts/cv-pdf.ts @@ -0,0 +1,217 @@ +#!/usr/bin/env bun +import path from "node:path" +import pdfmake from "pdfmake" +import type { Content, TDocumentDefinitions, TFontDictionary } from "pdfmake/interfaces" + +const BODY_FONT = "Helvetica" +const HEADER_FONT = "Times" + +type PdfmakeServer = typeof pdfmake & { + setUrlAccessPolicy(callback: (url: string) => boolean): void + setFonts(fonts: TFontDictionary): void +} + +function assertFontRegistered(family: string, fonts: TFontDictionary) { + if (fonts[family] == null) { + throw new Error(`Font family "${family}" is not registered. Available: ${Object.keys(fonts).join(", ")}`) + } +} + +const pdfFonts: TFontDictionary = { + Courier: { + normal: "Courier", + bold: "Courier-Bold", + italics: "Courier-Oblique", + bolditalics: "Courier-BoldOblique" + }, + Helvetica: { + normal: "Helvetica", + bold: "Helvetica-Bold", + italics: "Helvetica-Oblique", + bolditalics: "Helvetica-BoldOblique" + }, + Times: { + normal: "Times-Roman", + bold: "Times-Bold", + italics: "Times-Italic", + bolditalics: "Times-BoldItalic" + }, + Symbol: { + normal: "Symbol" + }, + ZapfDingbats: { + normal: "ZapfDingbats" + } +} + +const pdfm = pdfmake as PdfmakeServer +pdfm.setFonts(pdfFonts) +assertFontRegistered(BODY_FONT, pdfFonts) +assertFontRegistered(HEADER_FONT, pdfFonts) +void pdfm.setUrlAccessPolicy(() => false) + +function parseCvMarkdown(markdown: string): Content[] { + const lines = markdown.split(/\n/) + const content: Content[] = [] + const pendingPara: string[] = [] + const pendingBullets: string[] = [] + let nextParagraphIsSubtitle = false + + function flushParagraph() { + if (pendingPara.length === 0) return + const text = pendingPara.join(" ").trim() + pendingPara.length = 0 + if (!text) return + + if (nextParagraphIsSubtitle) { + nextParagraphIsSubtitle = false + content.push({ + text, + font: HEADER_FONT, + fontSize: 11, + bold: true, + margin: [0, 0, 0, 6] + }) + return + } + + content.push({ + text, + fontSize: 10, + margin: [0, 0, 0, 6] + }) + } + + function flushBullets() { + if (pendingBullets.length === 0) return + const items = pendingBullets.splice(0, pendingBullets.length) + content.push({ + ul: items, + fontSize: 10, + margin: [0, 0, 0, 6] + }) + } + + function flushBulletsIfLineIsNotBullet(line: string) { + if (pendingBullets.length === 0) return + if (line.startsWith("- ")) return + flushBullets() + } + + for (const rawLine of lines) { + const line = rawLine.trimEnd() + const trimmed = line.trim() + + if (trimmed === "") { + flushParagraph() + continue + } + + if (trimmed.startsWith("# ")) { + flushBullets() + flushParagraph() + content.push({ + text: trimmed.slice(2), + font: HEADER_FONT, + fontSize: 22, + bold: true, + margin: [0, 0, 0, 4] + }) + nextParagraphIsSubtitle = true + continue + } + + if (trimmed.startsWith("## ")) { + flushBullets() + flushParagraph() + content.push({ + text: trimmed.slice(3), + font: HEADER_FONT, + fontSize: 13, + bold: true, + margin: [0, 12, 0, 6] + }) + continue + } + + if (trimmed.startsWith("### ")) { + flushBullets() + flushParagraph() + content.push({ + text: trimmed.slice(4), + font: HEADER_FONT, + fontSize: 11, + bold: true, + margin: [0, 8, 0, 2] + }) + continue + } + + if (trimmed.startsWith("- ")) { + flushParagraph() + pendingBullets.push(trimmed.slice(2).trim()) + continue + } + + if (trimmed.startsWith("Skills used:")) { + flushBullets() + flushParagraph() + const rest = trimmed.slice("Skills used:".length).trim() + content.push({ + margin: [0, 0, 0, 8], + text: [ + { text: "Skills used: ", bold: true, fontSize: 9 }, + { text: rest, italics: true, fontSize: 9 } + ] + }) + continue + } + + flushBulletsIfLineIsNotBullet(trimmed) + pendingPara.push(trimmed) + } + + flushBullets() + flushParagraph() + + return content +} + +async function main() { + const args = process.argv.slice(2).filter((a) => a !== "--") + if (args.length < 1) { + console.error("Usage: bun scripts/cv-pdf.ts [output.pdf]") + process.exit(1) + } + + const inputPath = path.resolve(args[0]) + const outputPath = + args[1] !== undefined + ? path.resolve(args[1]) + : path.join(path.dirname(inputPath), `${path.basename(inputPath, path.extname(inputPath))}.pdf`) + + const inputFile = Bun.file(inputPath) + if (!(await inputFile.exists())) { + console.error(`Input not found: ${inputPath}`) + process.exit(1) + } + + const markdown = await inputFile.text() + const pdfContent = parseCvMarkdown(markdown) + + + const docDefinition: TDocumentDefinitions = { + pageMargins: [48, 48, 48, 48], + content: pdfContent, + defaultStyle: { + font: BODY_FONT, + lineHeight: 1.35 + } + } + + const pdf = pdfm.createPdf(docDefinition) + await pdf.write(outputPath) + console.error(`Wrote ${outputPath}`) +} + +await main() From a5d23cd0b4160ecff4a6f41285b4ae7e3428b807 Mon Sep 17 00:00:00 2001 From: Andras Serfozo Date: Thu, 16 Apr 2026 04:02:18 +0100 Subject: [PATCH 2/5] parse with markdown-it --- bun.lock | 13 +++ package.json | 1 + scripts/cv-pdf.ts | 259 ++++++++++++++++++++++++++++------------------ 3 files changed, 173 insertions(+), 100 deletions(-) diff --git a/bun.lock b/bun.lock index 8d77c4a..4dc0535 100644 --- a/bun.lock +++ b/bun.lock @@ -5,6 +5,7 @@ "": { "name": "apply-agent", "dependencies": { + "markdown-it": "^14.1.1", "ollama": "^0.6.3", "p-limit": "^7.3.0", "pdfmake": "^0.3.7", @@ -260,6 +261,8 @@ "enquirer": ["enquirer@2.4.1", "", { "dependencies": { "ansi-colors": "^4.1.1", "strip-ansi": "^6.0.1" } }, "sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ=="], + "entities": ["entities@4.5.0", "", {}, "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="], + "env-ci": ["env-ci@11.2.0", "", { "dependencies": { "execa": "^8.0.0", "java-properties": "^1.0.2" } }, "sha512-D5kWfzkmaOQDioPmiviWAVtKmpPT4/iJmMVQxWxMPJTFyTkdc5JQUfc5iXEeWxcOdsYTKSAiA/Age4NUOqKsRA=="], "env-paths": ["env-paths@2.2.1", "", {}, "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A=="], @@ -414,6 +417,8 @@ "lines-and-columns": ["lines-and-columns@1.2.4", "", {}, "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="], + "linkify-it": ["linkify-it@5.0.0", "", { "dependencies": { "uc.micro": "^2.0.0" } }, "sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ=="], + "load-json-file": ["load-json-file@4.0.0", "", { "dependencies": { "graceful-fs": "^4.1.2", "parse-json": "^4.0.0", "pify": "^3.0.0", "strip-bom": "^3.0.0" } }, "sha512-Kx8hMakjX03tiGTLAIdJ+lL0htKnXjEZN6hk/tozf/WOuYGdZBJrZ+rCJRbVCugsjB3jMLn9746NsQIf5VjBMw=="], "locate-path": ["locate-path@2.0.0", "", { "dependencies": { "p-locate": "^2.0.0", "path-exists": "^3.0.0" } }, "sha512-NCI2kiDkyR7VeEKm27Kda/iQHyKJe1Bu0FlTbYp3CqJu+9IFe9bLyAjMxf5ZDDbEg+iMPzB5zYyUTSm8wVTKmA=="], @@ -438,10 +443,14 @@ "make-asynchronous": ["make-asynchronous@1.0.1", "", { "dependencies": { "p-event": "^6.0.0", "type-fest": "^4.6.0", "web-worker": "1.2.0" } }, "sha512-T9BPOmEOhp6SmV25SwLVcHK4E6JyG/coH3C6F1NjNXSziv/fd4GmsqMk8YR6qpPOswfaOCApSNkZv6fxoaYFcQ=="], + "markdown-it": ["markdown-it@14.1.1", "", { "dependencies": { "argparse": "^2.0.1", "entities": "^4.4.0", "linkify-it": "^5.0.0", "mdurl": "^2.0.0", "punycode.js": "^2.3.1", "uc.micro": "^2.1.0" }, "bin": { "markdown-it": "bin/markdown-it.mjs" } }, "sha512-BuU2qnTti9YKgK5N+IeMubp14ZUKUUw7yeJbkjtosvHiP0AZ5c8IAgEMk79D0eC8F23r4Ac/q8cAIFdm2FtyoA=="], + "marked": ["marked@15.0.12", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA=="], "marked-terminal": ["marked-terminal@7.3.0", "", { "dependencies": { "ansi-escapes": "^7.0.0", "ansi-regex": "^6.1.0", "chalk": "^5.4.1", "cli-highlight": "^2.1.11", "cli-table3": "^0.6.5", "node-emoji": "^2.2.0", "supports-hyperlinks": "^3.1.0" }, "peerDependencies": { "marked": ">=1 <16" } }, "sha512-t4rBvPsHc57uE/2nJOLmMbZCQ4tgAccAED3ngXQqW6g+TxA488JzJ+FK3lQkzBQOI1mRV/r/Kq+1ZlJ4D0owQw=="], + "mdurl": ["mdurl@2.0.0", "", {}, "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w=="], + "meow": ["meow@13.2.0", "", {}, "sha512-pxQJQzB6djGPXh08dacEloMFopsOqGVRKFPYvPOt9XDZ1HasbgDZA74CJGreSU4G3Ak7EFJGoiH2auq+yXISgA=="], "merge-stream": ["merge-stream@2.0.0", "", {}, "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w=="], @@ -564,6 +573,8 @@ "pump": ["pump@3.0.3", "", { "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" } }, "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA=="], + "punycode.js": ["punycode.js@2.3.1", "", {}, "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA=="], + "quansync": ["quansync@0.2.11", "", {}, "sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA=="], "queue-microtask": ["queue-microtask@1.2.3", "", {}, "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A=="], @@ -696,6 +707,8 @@ "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + "uc.micro": ["uc.micro@2.1.0", "", {}, "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A=="], + "uglify-js": ["uglify-js@3.19.3", "", { "bin": { "uglifyjs": "bin/uglifyjs" } }, "sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ=="], "undici": ["undici@7.22.0", "", {}, "sha512-RqslV2Us5BrllB+JeiZnK4peryVTndy9Dnqq62S3yYRRTj0tFQCwEniUy2167skdGOy3vqRzEvl1Dm4sV2ReDg=="], diff --git a/package.json b/package.json index 7fae7a3..5027f1a 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ }, "packageManager": "bun@1.3.12", "dependencies": { + "markdown-it": "^14.1.1", "ollama": "^0.6.3", "p-limit": "^7.3.0", "pdfmake": "^0.3.7", diff --git a/scripts/cv-pdf.ts b/scripts/cv-pdf.ts index 83c5a95..3526b3d 100644 --- a/scripts/cv-pdf.ts +++ b/scripts/cv-pdf.ts @@ -1,5 +1,6 @@ #!/usr/bin/env bun import path from "node:path" +import MarkdownIt from "markdown-it" import pdfmake from "pdfmake" import type { Content, TDocumentDefinitions, TFontDictionary } from "pdfmake/interfaces" @@ -50,129 +51,188 @@ assertFontRegistered(BODY_FONT, pdfFonts) assertFontRegistered(HEADER_FONT, pdfFonts) void pdfm.setUrlAccessPolicy(() => false) -function parseCvMarkdown(markdown: string): Content[] { - const lines = markdown.split(/\n/) - const content: Content[] = [] - const pendingPara: string[] = [] - const pendingBullets: string[] = [] - let nextParagraphIsSubtitle = false - - function flushParagraph() { - if (pendingPara.length === 0) return - const text = pendingPara.join(" ").trim() - pendingPara.length = 0 - if (!text) return - - if (nextParagraphIsSubtitle) { - nextParagraphIsSubtitle = false - content.push({ - text, - font: HEADER_FONT, - fontSize: 11, - bold: true, - margin: [0, 0, 0, 6] - }) - return - } +const markdownParser = new MarkdownIt({ + html: false, + linkify: true, + typographer: false +}) - content.push({ - text, - fontSize: 10, - margin: [0, 0, 0, 6] - }) +const styles = { + heading1: { + font: HEADER_FONT, + fontSize: 22, + bold: true, + margin: [0, 0, 0, 6] as [number, number, number, number] + }, + heading2: { + font: HEADER_FONT, + fontSize: 13, + bold: true, + margin: [0, 12, 0, 6] as [number, number, number, number] + }, + heading3: { + font: HEADER_FONT, + fontSize: 11, + bold: true, + margin: [0, 8, 0, 4] as [number, number, number, number] + }, + paragraph: { + fontSize: 10, + margin: [0, 0, 0, 6] as [number, number, number, number] + }, + list: { + fontSize: 10, + margin: [0, 0, 0, 6] as [number, number, number, number] } +} - function flushBullets() { - if (pendingBullets.length === 0) return - const items = pendingBullets.splice(0, pendingBullets.length) - content.push({ - ul: items, - fontSize: 10, - margin: [0, 0, 0, 6] - }) - } +function getHeadingStyle(level: number) { + if (level === 1) return styles.heading1 + if (level === 2) return styles.heading2 + return styles.heading3 +} - function flushBulletsIfLineIsNotBullet(line: string) { - if (pendingBullets.length === 0) return - if (line.startsWith("- ")) return - flushBullets() - } +function inlineTokenToPdfText(inlineToken: NonNullable[number]>) { + const children = inlineToken.children ?? [] + const textParts: Array<{ + text: string + bold?: true + italics?: true + link?: string + decoration?: "underline" + }> = [] + const styleStack: Array<{ + bold?: true + italics?: true + link?: string + decoration?: "underline" + }> = [{}] - for (const rawLine of lines) { - const line = rawLine.trimEnd() - const trimmed = line.trim() + function currentStyle() { + return styleStack[styleStack.length - 1] ?? {} + } - if (trimmed === "") { - flushParagraph() + for (const token of children) { + if (token.type === "strong_open") { + styleStack.push({ ...currentStyle(), bold: true }) continue } - - if (trimmed.startsWith("# ")) { - flushBullets() - flushParagraph() - content.push({ - text: trimmed.slice(2), - font: HEADER_FONT, - fontSize: 22, - bold: true, - margin: [0, 0, 0, 4] - }) - nextParagraphIsSubtitle = true + if (token.type === "em_open") { + styleStack.push({ ...currentStyle(), italics: true }) continue } - - if (trimmed.startsWith("## ")) { - flushBullets() - flushParagraph() - content.push({ - text: trimmed.slice(3), - font: HEADER_FONT, - fontSize: 13, - bold: true, - margin: [0, 12, 0, 6] + if (token.type === "link_open") { + const href = token.attrs?.find(([name]) => name === "href")?.[1] + styleStack.push({ + ...currentStyle(), + ...(href == null ? {} : { link: href, decoration: "underline" as const }) }) continue } + if (token.type === "strong_close" || token.type === "em_close" || token.type === "link_close") { + if (styleStack.length > 1) styleStack.pop() + continue + } - if (trimmed.startsWith("### ")) { - flushBullets() - flushParagraph() - content.push({ - text: trimmed.slice(4), - font: HEADER_FONT, - fontSize: 11, - bold: true, - margin: [0, 8, 0, 2] - }) + if (token.type === "softbreak" || token.type === "hardbreak") { + textParts.push({ ...currentStyle(), text: "\n" }) + continue + } + if (token.type === "code_inline") { + textParts.push({ ...currentStyle(), text: token.content }) + continue + } + if (token.type === "text") { + textParts.push({ ...currentStyle(), text: token.content }) continue } + } + + if (textParts.length === 0) return "" + + const plainTextOnly = textParts.every( + (part) => + part.bold == null && part.italics == null && part.link == null && part.decoration == null + ) + + if (plainTextOnly) { + return textParts.map((part) => part.text).join("") + } + + return textParts +} - if (trimmed.startsWith("- ")) { - flushParagraph() - pendingBullets.push(trimmed.slice(2).trim()) +function parseCvMarkdown(markdown: string): Content[] { + const tokens = markdownParser.parse(markdown, {}) + const content: Content[] = [] + for (let i = 0; i < tokens.length; i += 1) { + const token = tokens[i] + if (token == null) continue + + if (token.type === "heading_open") { + const level = Number(token.tag.slice(1)) + const inlineToken = tokens[i + 1] + if (inlineToken?.type === "inline") { + content.push({ + ...getHeadingStyle(level), + text: inlineTokenToPdfText(inlineToken) + }) + } continue } - if (trimmed.startsWith("Skills used:")) { - flushBullets() - flushParagraph() - const rest = trimmed.slice("Skills used:".length).trim() - content.push({ - margin: [0, 0, 0, 8], - text: [ - { text: "Skills used: ", bold: true, fontSize: 9 }, - { text: rest, italics: true, fontSize: 9 } - ] - }) + if (token.type === "paragraph_open") { + const inlineToken = tokens[i + 1] + if (inlineToken?.type === "inline") { + content.push({ + ...styles.paragraph, + text: inlineTokenToPdfText(inlineToken) + }) + } continue } - flushBulletsIfLineIsNotBullet(trimmed) - pendingPara.push(trimmed) - } + if (token.type === "bullet_list_open") { + const items: Array< + | string + | Array<{ text: string; bold?: true; italics?: true; link?: string; decoration?: "underline" }> + > = [] + let listCursor = i + 1 + while (tokens[listCursor]?.type !== "bullet_list_close" && listCursor < tokens.length) { + const listToken = tokens[listCursor] + if (listToken?.type !== "list_item_open") { + listCursor += 1 + continue + } - flushBullets() - flushParagraph() + let itemText: + | string + | Array<{ text: string; bold?: true; italics?: true; link?: string; decoration?: "underline" }> + | null = null + listCursor += 1 + while (tokens[listCursor]?.type !== "list_item_close" && listCursor < tokens.length) { + const innerToken = tokens[listCursor] + if (innerToken?.type === "inline") { + itemText = inlineTokenToPdfText(innerToken) + } + listCursor += 1 + } + + if (itemText != null && itemText !== "") { + items.push(itemText) + } + listCursor += 1 + } + + if (items.length > 0) { + content.push({ + ...styles.list, + ul: items + }) + } + i = listCursor + } + } return content } @@ -199,7 +259,6 @@ async function main() { const markdown = await inputFile.text() const pdfContent = parseCvMarkdown(markdown) - const docDefinition: TDocumentDefinitions = { pageMargins: [48, 48, 48, 48], content: pdfContent, From ff8fc10a10bc7cb84bb669d2fa2b3b142f28ab8f Mon Sep 17 00:00:00 2001 From: Andras Serfozo Date: Thu, 16 Apr 2026 04:09:16 +0100 Subject: [PATCH 3/5] markdown-it style --- scripts/cv-pdf.ts | 245 +++++++++++++++++++--------------------------- 1 file changed, 103 insertions(+), 142 deletions(-) diff --git a/scripts/cv-pdf.ts b/scripts/cv-pdf.ts index 3526b3d..fec9262 100644 --- a/scripts/cv-pdf.ts +++ b/scripts/cv-pdf.ts @@ -51,187 +51,146 @@ assertFontRegistered(BODY_FONT, pdfFonts) assertFontRegistered(HEADER_FONT, pdfFonts) void pdfm.setUrlAccessPolicy(() => false) -const markdownParser = new MarkdownIt({ +const md = new MarkdownIt({ html: false, - linkify: true, - typographer: false + linkify: false, + typographer: true }) -const styles = { - heading1: { - font: HEADER_FONT, - fontSize: 22, - bold: true, - margin: [0, 0, 0, 6] as [number, number, number, number] - }, - heading2: { - font: HEADER_FONT, - fontSize: 13, - bold: true, - margin: [0, 12, 0, 6] as [number, number, number, number] - }, - heading3: { - font: HEADER_FONT, - fontSize: 11, - bold: true, - margin: [0, 8, 0, 4] as [number, number, number, number] - }, - paragraph: { - fontSize: 10, - margin: [0, 0, 0, 6] as [number, number, number, number] - }, - list: { - fontSize: 10, - margin: [0, 0, 0, 6] as [number, number, number, number] - } +const markdownStyles = { + h1: { font: HEADER_FONT, fontSize: 22, bold: true, margin: [0, 0, 0, 4] as [number, number, number, number] }, + h2: { font: HEADER_FONT, fontSize: 13, bold: true, margin: [0, 12, 0, 6] as [number, number, number, number] }, + h3: { font: HEADER_FONT, fontSize: 11, bold: true, margin: [0, 8, 0, 2] as [number, number, number, number] }, + p: { fontSize: 10, margin: [0, 0, 0, 6] as [number, number, number, number] }, + ul: { fontSize: 10, margin: [0, 0, 0, 6] as [number, number, number, number] } } -function getHeadingStyle(level: number) { - if (level === 1) return styles.heading1 - if (level === 2) return styles.heading2 - return styles.heading3 +function simplifyRuns( + runs: Array<{ text: string; bold?: boolean; italics?: boolean; link?: string }> +) { + if (runs.length === 0) return "" + if (runs.length === 1 && runs[0].bold !== true && runs[0].italics !== true && runs[0].link === undefined) { + return runs[0].text + } + return runs } -function inlineTokenToPdfText(inlineToken: NonNullable[number]>) { - const children = inlineToken.children ?? [] - const textParts: Array<{ - text: string - bold?: true - italics?: true - link?: string - decoration?: "underline" - }> = [] - const styleStack: Array<{ - bold?: true - italics?: true - link?: string - decoration?: "underline" - }> = [{}] - - function currentStyle() { - return styleStack[styleStack.length - 1] ?? {} - } +function parseInlineRuns(token: { children?: Array<{ type: string; content: string; attrGet?: (name: string) => string | null }> }) { + const runs: Array<{ text: string; bold?: boolean; italics?: boolean; link?: string }> = [] + if (token.children == null) return runs + + let strongDepth = 0 + let emDepth = 0 + let activeLink: string | undefined - for (const token of children) { - if (token.type === "strong_open") { - styleStack.push({ ...currentStyle(), bold: true }) + for (const child of token.children) { + if (child.type === "strong_open") { + strongDepth += 1 continue } - if (token.type === "em_open") { - styleStack.push({ ...currentStyle(), italics: true }) + if (child.type === "strong_close") { + strongDepth = Math.max(0, strongDepth - 1) continue } - if (token.type === "link_open") { - const href = token.attrs?.find(([name]) => name === "href")?.[1] - styleStack.push({ - ...currentStyle(), - ...(href == null ? {} : { link: href, decoration: "underline" as const }) - }) + if (child.type === "em_open") { + emDepth += 1 continue } - if (token.type === "strong_close" || token.type === "em_close" || token.type === "link_close") { - if (styleStack.length > 1) styleStack.pop() + if (child.type === "em_close") { + emDepth = Math.max(0, emDepth - 1) continue } - - if (token.type === "softbreak" || token.type === "hardbreak") { - textParts.push({ ...currentStyle(), text: "\n" }) + if (child.type === "link_open") { + activeLink = child.attrGet?.("href") ?? undefined continue } - if (token.type === "code_inline") { - textParts.push({ ...currentStyle(), text: token.content }) + if (child.type === "link_close") { + activeLink = undefined continue } - if (token.type === "text") { - textParts.push({ ...currentStyle(), text: token.content }) - continue - } - } - - if (textParts.length === 0) return "" - const plainTextOnly = textParts.every( - (part) => - part.bold == null && part.italics == null && part.link == null && part.decoration == null - ) + const text = child.type === "softbreak" || child.type === "hardbreak" ? "\n" : child.content + if (!text) continue - if (plainTextOnly) { - return textParts.map((part) => part.text).join("") + runs.push({ + text, + bold: strongDepth > 0 ? true : undefined, + italics: emDepth > 0 ? true : undefined, + link: activeLink + }) } - return textParts + return runs } function parseCvMarkdown(markdown: string): Content[] { - const tokens = markdownParser.parse(markdown, {}) + const tokens = md.parse(markdown, {}) const content: Content[] = [] - for (let i = 0; i < tokens.length; i += 1) { - const token = tokens[i] - if (token == null) continue + let pendingHeadingLevel: number | null = null + const listStack: Array<{ items: Array>; currentItem: Array<{ text: string; bold?: boolean; italics?: boolean; link?: string }> }> = [] + + for (const token of tokens) { if (token.type === "heading_open") { - const level = Number(token.tag.slice(1)) - const inlineToken = tokens[i + 1] - if (inlineToken?.type === "inline") { - content.push({ - ...getHeadingStyle(level), - text: inlineTokenToPdfText(inlineToken) - }) - } + pendingHeadingLevel = Number.parseInt(token.tag.slice(1), 10) continue } - if (token.type === "paragraph_open") { - const inlineToken = tokens[i + 1] - if (inlineToken?.type === "inline") { - content.push({ - ...styles.paragraph, - text: inlineTokenToPdfText(inlineToken) - }) - } + if (token.type === "heading_close") { + pendingHeadingLevel = null continue } if (token.type === "bullet_list_open") { - const items: Array< - | string - | Array<{ text: string; bold?: true; italics?: true; link?: string; decoration?: "underline" }> - > = [] - let listCursor = i + 1 - while (tokens[listCursor]?.type !== "bullet_list_close" && listCursor < tokens.length) { - const listToken = tokens[listCursor] - if (listToken?.type !== "list_item_open") { - listCursor += 1 - continue - } - - let itemText: - | string - | Array<{ text: string; bold?: true; italics?: true; link?: string; decoration?: "underline" }> - | null = null - listCursor += 1 - while (tokens[listCursor]?.type !== "list_item_close" && listCursor < tokens.length) { - const innerToken = tokens[listCursor] - if (innerToken?.type === "inline") { - itemText = inlineTokenToPdfText(innerToken) - } - listCursor += 1 - } - - if (itemText != null && itemText !== "") { - items.push(itemText) - } - listCursor += 1 - } + listStack.push({ items: [], currentItem: [] }) + continue + } - if (items.length > 0) { - content.push({ - ...styles.list, - ul: items - }) + if (token.type === "bullet_list_close") { + const list = listStack.pop() + if (list == null) continue + if (listStack.length > 0) continue + const listNode: Content = { ul: list.items, style: "ul" } + content.push(listNode) + continue + } + + if (token.type === "list_item_open") { + if (listStack.length > 0) { + listStack[listStack.length - 1].currentItem = [] } - i = listCursor + continue + } + + if (token.type === "list_item_close") { + if (listStack.length === 0) continue + const currentList = listStack[listStack.length - 1] + currentList.items.push(simplifyRuns(currentList.currentItem)) + currentList.currentItem = [] + continue + } + + if (token.type !== "inline") continue + + const inlineRuns = parseInlineRuns(token) + if (inlineRuns.length === 0) continue + + if (pendingHeadingLevel !== null) { + const headingStyle = pendingHeadingLevel === 1 ? "h1" : pendingHeadingLevel === 2 ? "h2" : "h3" + content.push({ text: simplifyRuns(inlineRuns), style: headingStyle }) + continue } + + if (listStack.length > 0) { + const currentList = listStack[listStack.length - 1] + currentList.currentItem.push(...inlineRuns) + continue + } + + content.push({ + text: simplifyRuns(inlineRuns), + style: "p" + }) } return content @@ -259,13 +218,15 @@ async function main() { const markdown = await inputFile.text() const pdfContent = parseCvMarkdown(markdown) + const docDefinition: TDocumentDefinitions = { pageMargins: [48, 48, 48, 48], content: pdfContent, defaultStyle: { font: BODY_FONT, lineHeight: 1.35 - } + }, + styles: markdownStyles } const pdf = pdfm.createPdf(docDefinition) From 380dafca723812a2b05be8f1f4483daf1b53e8f7 Mon Sep 17 00:00:00 2001 From: Andras Serfozo Date: Thu, 16 Apr 2026 04:28:08 +0100 Subject: [PATCH 4/5] md parse with remark --- bun.lock | 91 +++++++++++++++++++++++---- package.json | 3 +- scripts/cv-pdf.ts | 153 ++++++++++++++++------------------------------ 3 files changed, 132 insertions(+), 115 deletions(-) diff --git a/bun.lock b/bun.lock index 4dc0535..0cca40f 100644 --- a/bun.lock +++ b/bun.lock @@ -5,12 +5,13 @@ "": { "name": "apply-agent", "dependencies": { - "markdown-it": "^14.1.1", "ollama": "^0.6.3", "p-limit": "^7.3.0", "pdfmake": "^0.3.7", "pino": "^10.3.1", "pino-loki": "^3.0.0", + "remark-parse": "^11.0.0", + "unified": "^11.0.5", "zod": "^4.3.6", }, "devDependencies": { @@ -145,6 +146,12 @@ "@types/bun": ["@types/bun@1.3.12", "", { "dependencies": { "bun-types": "1.3.12" } }, "sha512-DBv81elK+/VSwXHDlnH3Qduw+KxkTIWi7TXkAeh24zpi5l0B2kUg9Ga3tb4nJaPcOFswflgi/yAvMVBPrxMB+A=="], + "@types/debug": ["@types/debug@4.1.13", "", { "dependencies": { "@types/ms": "*" } }, "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw=="], + + "@types/mdast": ["@types/mdast@4.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA=="], + + "@types/ms": ["@types/ms@2.1.0", "", {}, "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA=="], + "@types/node": ["@types/node@25.2.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-m0jEgYlYz+mDJZ2+F4v8D1AyQb+QzsNqRuI7xg1VQX/KlKS0qT9r1Mo16yo5F/MtifXFgaofIFsdFMox2SxIbQ=="], "@types/normalize-package-data": ["@types/normalize-package-data@2.4.4", "", {}, "sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA=="], @@ -153,6 +160,8 @@ "@types/pdfmake": ["@types/pdfmake@0.3.2", "", { "dependencies": { "@types/node": "*", "@types/pdfkit": "*" } }, "sha512-2TZSL8puKJs/rHvMV1b8BhHD+qYyV9da8mVY83/x7ZR/NaEPXbm3+t5SwkwaH6QAIhY1zQVAaFDhHWL0haMstA=="], + "@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="], + "agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="], "aggregate-error": ["aggregate-error@3.1.0", "", { "dependencies": { "clean-stack": "^2.0.0", "indent-string": "^4.0.0" } }, "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA=="], @@ -177,6 +186,8 @@ "atomic-sleep": ["atomic-sleep@1.0.0", "", {}, "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ=="], + "bail": ["bail@2.0.2", "", {}, "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw=="], + "base64-js": ["base64-js@0.0.8", "", {}, "sha512-3XSA2cR/h/73EzlXXdU6YNycmYI7+kicTxks4eJg2g39biHR84slg2+des+p7iHYhbRg/udIS4TD53WabcOUkw=="], "before-after-hook": ["before-after-hook@4.0.0", "", {}, "sha512-q6tR3RPqIB1pMiTRMFcZwuG5T8vwp+vUvEG0vuI6B+Rikh5BfPp2fQ82c925FOs+b0lcFQ8CFrL+KbilfZFhOQ=="], @@ -197,6 +208,8 @@ "char-regex": ["char-regex@1.0.2", "", {}, "sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw=="], + "character-entities": ["character-entities@2.0.2", "", {}, "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ=="], + "chardet": ["chardet@2.1.1", "", {}, "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ=="], "clean-stack": ["clean-stack@2.2.0", "", {}, "sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A=="], @@ -241,10 +254,16 @@ "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="], + "decode-named-character-reference": ["decode-named-character-reference@1.3.0", "", { "dependencies": { "character-entities": "^2.0.0" } }, "sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q=="], + "deep-extend": ["deep-extend@0.6.0", "", {}, "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA=="], + "dequal": ["dequal@2.0.3", "", {}, "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA=="], + "detect-indent": ["detect-indent@6.1.0", "", {}, "sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA=="], + "devlop": ["devlop@1.1.0", "", { "dependencies": { "dequal": "^2.0.0" } }, "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA=="], + "dfa": ["dfa@1.2.0", "", {}, "sha512-ED3jP8saaweFTjeGX8HQPjeC1YYyZs98jGNZx6IiBvxW7JG5v492kamAQB3m2wop07CvU/RQmzcKr6bgcC5D/Q=="], "dir-glob": ["dir-glob@3.0.1", "", { "dependencies": { "path-type": "^4.0.0" } }, "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA=="], @@ -261,8 +280,6 @@ "enquirer": ["enquirer@2.4.1", "", { "dependencies": { "ansi-colors": "^4.1.1", "strip-ansi": "^6.0.1" } }, "sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ=="], - "entities": ["entities@4.5.0", "", {}, "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="], - "env-ci": ["env-ci@11.2.0", "", { "dependencies": { "execa": "^8.0.0", "java-properties": "^1.0.2" } }, "sha512-D5kWfzkmaOQDioPmiviWAVtKmpPT4/iJmMVQxWxMPJTFyTkdc5JQUfc5iXEeWxcOdsYTKSAiA/Age4NUOqKsRA=="], "env-paths": ["env-paths@2.2.1", "", {}, "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A=="], @@ -279,6 +296,8 @@ "execa": ["execa@9.6.1", "", { "dependencies": { "@sindresorhus/merge-streams": "^4.0.0", "cross-spawn": "^7.0.6", "figures": "^6.1.0", "get-stream": "^9.0.0", "human-signals": "^8.0.1", "is-plain-obj": "^4.1.0", "is-stream": "^4.0.1", "npm-run-path": "^6.0.0", "pretty-ms": "^9.2.0", "signal-exit": "^4.1.0", "strip-final-newline": "^4.0.0", "yoctocolors": "^2.1.1" } }, "sha512-9Be3ZoN4LmYR90tUoVu2te2BsbzHfhJyfEiAVfz7N5/zv+jduIfLrV2xdQXOHbaD6KgpGdO9PRPM1Y4Q9QkPkA=="], + "extend": ["extend@3.0.2", "", {}, "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="], + "extendable-error": ["extendable-error@0.1.7", "", {}, "sha512-UOiS2in6/Q0FK0R0q6UY9vYpQ21mr/Qn1KOnte7vsACuNJf514WvCCUHSRCPcgjPT2bAhNIJdlE6bVap1GKmeg=="], "fast-content-type-parse": ["fast-content-type-parse@3.0.0", "", {}, "sha512-ZvLdcY8P+N8mGQJahJV5G4U88CSvT1rP8ApL6uETe88MBXrBHAkZlSEySdUlyztF7ccb+Znos3TFqaepHxdhBg=="], @@ -417,8 +436,6 @@ "lines-and-columns": ["lines-and-columns@1.2.4", "", {}, "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="], - "linkify-it": ["linkify-it@5.0.0", "", { "dependencies": { "uc.micro": "^2.0.0" } }, "sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ=="], - "load-json-file": ["load-json-file@4.0.0", "", { "dependencies": { "graceful-fs": "^4.1.2", "parse-json": "^4.0.0", "pify": "^3.0.0", "strip-bom": "^3.0.0" } }, "sha512-Kx8hMakjX03tiGTLAIdJ+lL0htKnXjEZN6hk/tozf/WOuYGdZBJrZ+rCJRbVCugsjB3jMLn9746NsQIf5VjBMw=="], "locate-path": ["locate-path@2.0.0", "", { "dependencies": { "p-locate": "^2.0.0", "path-exists": "^3.0.0" } }, "sha512-NCI2kiDkyR7VeEKm27Kda/iQHyKJe1Bu0FlTbYp3CqJu+9IFe9bLyAjMxf5ZDDbEg+iMPzB5zYyUTSm8wVTKmA=="], @@ -443,13 +460,13 @@ "make-asynchronous": ["make-asynchronous@1.0.1", "", { "dependencies": { "p-event": "^6.0.0", "type-fest": "^4.6.0", "web-worker": "1.2.0" } }, "sha512-T9BPOmEOhp6SmV25SwLVcHK4E6JyG/coH3C6F1NjNXSziv/fd4GmsqMk8YR6qpPOswfaOCApSNkZv6fxoaYFcQ=="], - "markdown-it": ["markdown-it@14.1.1", "", { "dependencies": { "argparse": "^2.0.1", "entities": "^4.4.0", "linkify-it": "^5.0.0", "mdurl": "^2.0.0", "punycode.js": "^2.3.1", "uc.micro": "^2.1.0" }, "bin": { "markdown-it": "bin/markdown-it.mjs" } }, "sha512-BuU2qnTti9YKgK5N+IeMubp14ZUKUUw7yeJbkjtosvHiP0AZ5c8IAgEMk79D0eC8F23r4Ac/q8cAIFdm2FtyoA=="], - "marked": ["marked@15.0.12", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA=="], "marked-terminal": ["marked-terminal@7.3.0", "", { "dependencies": { "ansi-escapes": "^7.0.0", "ansi-regex": "^6.1.0", "chalk": "^5.4.1", "cli-highlight": "^2.1.11", "cli-table3": "^0.6.5", "node-emoji": "^2.2.0", "supports-hyperlinks": "^3.1.0" }, "peerDependencies": { "marked": ">=1 <16" } }, "sha512-t4rBvPsHc57uE/2nJOLmMbZCQ4tgAccAED3ngXQqW6g+TxA488JzJ+FK3lQkzBQOI1mRV/r/Kq+1ZlJ4D0owQw=="], - "mdurl": ["mdurl@2.0.0", "", {}, "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w=="], + "mdast-util-from-markdown": ["mdast-util-from-markdown@2.0.3", "", { "dependencies": { "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "mdast-util-to-string": "^4.0.0", "micromark": "^4.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-decode-string": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-W4mAWTvSlKvf8L6J+VN9yLSqQ9AOAAvHuoDAmPkz4dHf553m5gVj2ejadHJhoJmcmxEnOv6Pa8XJhpxE93kb8Q=="], + + "mdast-util-to-string": ["mdast-util-to-string@4.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0" } }, "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg=="], "meow": ["meow@13.2.0", "", {}, "sha512-pxQJQzB6djGPXh08dacEloMFopsOqGVRKFPYvPOt9XDZ1HasbgDZA74CJGreSU4G3Ak7EFJGoiH2auq+yXISgA=="], @@ -457,6 +474,48 @@ "merge2": ["merge2@1.4.1", "", {}, "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg=="], + "micromark": ["micromark@4.0.2", "", { "dependencies": { "@types/debug": "^4.0.0", "debug": "^4.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-core-commonmark": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-combine-extensions": "^2.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-encode": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA=="], + + "micromark-core-commonmark": ["micromark-core-commonmark@2.0.3", "", { "dependencies": { "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-factory-destination": "^2.0.0", "micromark-factory-label": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-factory-title": "^2.0.0", "micromark-factory-whitespace": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-html-tag-name": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg=="], + + "micromark-factory-destination": ["micromark-factory-destination@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA=="], + + "micromark-factory-label": ["micromark-factory-label@2.0.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg=="], + + "micromark-factory-space": ["micromark-factory-space@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg=="], + + "micromark-factory-title": ["micromark-factory-title@2.0.1", "", { "dependencies": { "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw=="], + + "micromark-factory-whitespace": ["micromark-factory-whitespace@2.0.1", "", { "dependencies": { "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ=="], + + "micromark-util-character": ["micromark-util-character@2.1.1", "", { "dependencies": { "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q=="], + + "micromark-util-chunked": ["micromark-util-chunked@2.0.1", "", { "dependencies": { "micromark-util-symbol": "^2.0.0" } }, "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA=="], + + "micromark-util-classify-character": ["micromark-util-classify-character@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q=="], + + "micromark-util-combine-extensions": ["micromark-util-combine-extensions@2.0.1", "", { "dependencies": { "micromark-util-chunked": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg=="], + + "micromark-util-decode-numeric-character-reference": ["micromark-util-decode-numeric-character-reference@2.0.2", "", { "dependencies": { "micromark-util-symbol": "^2.0.0" } }, "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw=="], + + "micromark-util-decode-string": ["micromark-util-decode-string@2.0.1", "", { "dependencies": { "decode-named-character-reference": "^1.0.0", "micromark-util-character": "^2.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-symbol": "^2.0.0" } }, "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ=="], + + "micromark-util-encode": ["micromark-util-encode@2.0.1", "", {}, "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw=="], + + "micromark-util-html-tag-name": ["micromark-util-html-tag-name@2.0.1", "", {}, "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA=="], + + "micromark-util-normalize-identifier": ["micromark-util-normalize-identifier@2.0.1", "", { "dependencies": { "micromark-util-symbol": "^2.0.0" } }, "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q=="], + + "micromark-util-resolve-all": ["micromark-util-resolve-all@2.0.1", "", { "dependencies": { "micromark-util-types": "^2.0.0" } }, "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg=="], + + "micromark-util-sanitize-uri": ["micromark-util-sanitize-uri@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-encode": "^2.0.0", "micromark-util-symbol": "^2.0.0" } }, "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ=="], + + "micromark-util-subtokenize": ["micromark-util-subtokenize@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA=="], + + "micromark-util-symbol": ["micromark-util-symbol@2.0.1", "", {}, "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q=="], + + "micromark-util-types": ["micromark-util-types@2.0.2", "", {}, "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA=="], + "micromatch": ["micromatch@4.0.8", "", { "dependencies": { "braces": "^3.0.3", "picomatch": "^2.3.1" } }, "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA=="], "mime": ["mime@4.1.0", "", { "bin": { "mime": "bin/cli.js" } }, "sha512-X5ju04+cAzsojXKes0B/S4tcYtFAJ6tTMuSPBEn9CPGlrWr8Fiw7qYeLT0XyH80HSoAoqWCaz+MWKh22P7G1cw=="], @@ -573,8 +632,6 @@ "pump": ["pump@3.0.3", "", { "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" } }, "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA=="], - "punycode.js": ["punycode.js@2.3.1", "", {}, "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA=="], - "quansync": ["quansync@0.2.11", "", {}, "sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA=="], "queue-microtask": ["queue-microtask@1.2.3", "", {}, "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A=="], @@ -595,6 +652,8 @@ "registry-auth-token": ["registry-auth-token@5.1.1", "", { "dependencies": { "@pnpm/npm-conf": "^3.0.2" } }, "sha512-P7B4+jq8DeD2nMsAcdfaqHbssgHtZ7Z5+++a5ask90fvmJ8p5je4mOa+wzu+DB4vQ5tdJV/xywY+UnVFeQLV5Q=="], + "remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="], + "require-directory": ["require-directory@2.1.1", "", {}, "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q=="], "resolve-from": ["resolve-from@5.0.0", "", {}, "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw=="], @@ -699,6 +758,8 @@ "traverse": ["traverse@0.6.8", "", {}, "sha512-aXJDbk6SnumuaZSANd21XAo15ucCDE38H4fkqiGsc3MhCK+wOlZvLP9cB/TvpHT0mOyWgC4Z8EwRlzqYSUzdsA=="], + "trough": ["trough@2.2.0", "", {}, "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw=="], + "tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], "tunnel": ["tunnel@0.0.6", "", {}, "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg=="], @@ -707,8 +768,6 @@ "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], - "uc.micro": ["uc.micro@2.1.0", "", {}, "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A=="], - "uglify-js": ["uglify-js@3.19.3", "", { "bin": { "uglifyjs": "bin/uglifyjs" } }, "sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ=="], "undici": ["undici@7.22.0", "", {}, "sha512-RqslV2Us5BrllB+JeiZnK4peryVTndy9Dnqq62S3yYRRTj0tFQCwEniUy2167skdGOy3vqRzEvl1Dm4sV2ReDg=="], @@ -723,8 +782,12 @@ "unicorn-magic": ["unicorn-magic@0.4.0", "", {}, "sha512-wH590V9VNgYH9g3lH9wWjTrUoKsjLF6sGLjhR4sH1LWpLmCOH0Zf7PukhDA8BiS7KHe4oPNkcTHqYkj7SOGUOw=="], + "unified": ["unified@11.0.5", "", { "dependencies": { "@types/unist": "^3.0.0", "bail": "^2.0.0", "devlop": "^1.0.0", "extend": "^3.0.0", "is-plain-obj": "^4.0.0", "trough": "^2.0.0", "vfile": "^6.0.0" } }, "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA=="], + "unique-string": ["unique-string@3.0.0", "", { "dependencies": { "crypto-random-string": "^4.0.0" } }, "sha512-VGXBUVwxKMBUznyffQweQABPRRW1vHZAbadFZud4pLFAqRGvv/96vafgjWFqzourzr8YonlQiPgH0YCJfawoGQ=="], + "unist-util-stringify-position": ["unist-util-stringify-position@4.0.0", "", { "dependencies": { "@types/unist": "^3.0.0" } }, "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ=="], + "universal-user-agent": ["universal-user-agent@7.0.3", "", {}, "sha512-TmnEAEAsBJVZM/AADELsK76llnwcf9vMKuPz8JflO1frO8Lchitr0fNaN9d+Ap0BjKtqWqd/J17qeDnXh8CL2A=="], "universalify": ["universalify@2.0.1", "", {}, "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw=="], @@ -735,6 +798,10 @@ "validate-npm-package-license": ["validate-npm-package-license@3.0.4", "", { "dependencies": { "spdx-correct": "^3.0.0", "spdx-expression-parse": "^3.0.0" } }, "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew=="], + "vfile": ["vfile@6.0.3", "", { "dependencies": { "@types/unist": "^3.0.0", "vfile-message": "^4.0.0" } }, "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q=="], + + "vfile-message": ["vfile-message@4.0.3", "", { "dependencies": { "@types/unist": "^3.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw=="], + "web-worker": ["web-worker@1.2.0", "", {}, "sha512-PgF341avzqyx60neE9DD+XS26MMNMoUQRz9NOZwW32nPQrF6p77f1htcnjBSEV8BGMKZ16choqUG4hyI0Hx7mA=="], "whatwg-fetch": ["whatwg-fetch@3.6.20", "", {}, "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg=="], diff --git a/package.json b/package.json index 5027f1a..22d2790 100644 --- a/package.json +++ b/package.json @@ -14,12 +14,13 @@ }, "packageManager": "bun@1.3.12", "dependencies": { - "markdown-it": "^14.1.1", "ollama": "^0.6.3", "p-limit": "^7.3.0", "pdfmake": "^0.3.7", "pino": "^10.3.1", "pino-loki": "^3.0.0", + "remark-parse": "^11.0.0", + "unified": "^11.0.5", "zod": "^4.3.6" }, "devDependencies": { diff --git a/scripts/cv-pdf.ts b/scripts/cv-pdf.ts index fec9262..4b76fb1 100644 --- a/scripts/cv-pdf.ts +++ b/scripts/cv-pdf.ts @@ -1,19 +1,22 @@ #!/usr/bin/env bun import path from "node:path" -import MarkdownIt from "markdown-it" +import type { PhrasingContent, Root, RootContent } from "mdast" import pdfmake from "pdfmake" import type { Content, TDocumentDefinitions, TFontDictionary } from "pdfmake/interfaces" +import remarkParse from "remark-parse" +import { unified } from "unified" const BODY_FONT = "Helvetica" const HEADER_FONT = "Times" +// pdfmake exposes these methods at runtime but the published types don't include them type PdfmakeServer = typeof pdfmake & { setUrlAccessPolicy(callback: (url: string) => boolean): void setFonts(fonts: TFontDictionary): void } function assertFontRegistered(family: string, fonts: TFontDictionary) { - if (fonts[family] == null) { + if (!(family in fonts)) { throw new Error(`Font family "${family}" is not registered. Available: ${Object.keys(fonts).join(", ")}`) } } @@ -49,13 +52,7 @@ const pdfm = pdfmake as PdfmakeServer pdfm.setFonts(pdfFonts) assertFontRegistered(BODY_FONT, pdfFonts) assertFontRegistered(HEADER_FONT, pdfFonts) -void pdfm.setUrlAccessPolicy(() => false) - -const md = new MarkdownIt({ - html: false, - linkify: false, - typographer: true -}) +pdfm.setUrlAccessPolicy(() => false) const markdownStyles = { h1: { font: HEADER_FONT, fontSize: 22, bold: true, margin: [0, 0, 0, 4] as [number, number, number, number] }, @@ -65,8 +62,10 @@ const markdownStyles = { ul: { fontSize: 10, margin: [0, 0, 0, 6] as [number, number, number, number] } } +type TextRun = { text: string; bold?: boolean; italics?: boolean; link?: string } + function simplifyRuns( - runs: Array<{ text: string; bold?: boolean; italics?: boolean; link?: string }> + runs: TextRun[] ) { if (runs.length === 0) return "" if (runs.length === 1 && runs[0].bold !== true && runs[0].italics !== true && runs[0].link === undefined) { @@ -75,122 +74,73 @@ function simplifyRuns( return runs } -function parseInlineRuns(token: { children?: Array<{ type: string; content: string; attrGet?: (name: string) => string | null }> }) { - const runs: Array<{ text: string; bold?: boolean; italics?: boolean; link?: string }> = [] - if (token.children == null) return runs - - let strongDepth = 0 - let emDepth = 0 - let activeLink: string | undefined +function parseInlineRuns(nodes: PhrasingContent[], activeStyles: Omit = {}) { + const runs: TextRun[] = [] - for (const child of token.children) { - if (child.type === "strong_open") { - strongDepth += 1 - continue - } - if (child.type === "strong_close") { - strongDepth = Math.max(0, strongDepth - 1) + for (const node of nodes) { + if (node.type === "text" || node.type === "inlineCode") { + if (node.value) { + runs.push({ text: node.value, ...activeStyles }) + } continue } - if (child.type === "em_open") { - emDepth += 1 + + if (node.type === "break") { + runs.push({ text: "\n", ...activeStyles }) continue } - if (child.type === "em_close") { - emDepth = Math.max(0, emDepth - 1) + + if (node.type === "strong") { + runs.push(...parseInlineRuns(node.children, { ...activeStyles, bold: true })) continue } - if (child.type === "link_open") { - activeLink = child.attrGet?.("href") ?? undefined + + if (node.type === "emphasis") { + runs.push(...parseInlineRuns(node.children, { ...activeStyles, italics: true })) continue } - if (child.type === "link_close") { - activeLink = undefined + + if (node.type === "link") { + runs.push(...parseInlineRuns(node.children, { ...activeStyles, link: node.url })) continue } - - const text = child.type === "softbreak" || child.type === "hardbreak" ? "\n" : child.content - if (!text) continue - - runs.push({ - text, - bold: strongDepth > 0 ? true : undefined, - italics: emDepth > 0 ? true : undefined, - link: activeLink - }) } return runs } function parseCvMarkdown(markdown: string): Content[] { - const tokens = md.parse(markdown, {}) + const tree = unified().use(remarkParse).parse(markdown) const content: Content[] = [] - let pendingHeadingLevel: number | null = null - const listStack: Array<{ items: Array>; currentItem: Array<{ text: string; bold?: boolean; italics?: boolean; link?: string }> }> = [] - - for (const token of tokens) { - if (token.type === "heading_open") { - pendingHeadingLevel = Number.parseInt(token.tag.slice(1), 10) - continue - } - - if (token.type === "heading_close") { - pendingHeadingLevel = null - continue - } - - if (token.type === "bullet_list_open") { - listStack.push({ items: [], currentItem: [] }) + for (const block of tree.children) { + if (block.type === "heading") { + const runs = parseInlineRuns(block.children) + if (runs.length === 0) continue + const headingStyle = block.depth === 1 ? "h1" : block.depth === 2 ? "h2" : "h3" + content.push({ text: simplifyRuns(runs), style: headingStyle }) continue } - if (token.type === "bullet_list_close") { - const list = listStack.pop() - if (list == null) continue - if (listStack.length > 0) continue - const listNode: Content = { ul: list.items, style: "ul" } - content.push(listNode) + if (block.type === "paragraph") { + const runs = parseInlineRuns(block.children) + if (runs.length === 0) continue + content.push({ text: simplifyRuns(runs), style: "p" }) continue } - if (token.type === "list_item_open") { - if (listStack.length > 0) { - listStack[listStack.length - 1].currentItem = [] - } - continue + if (block.type === "list") { + const items = block.children.map((item) => { + const itemRuns: TextRun[] = [] + for (const child of item.children) { + if (child.type !== "paragraph") continue + if (itemRuns.length > 0) itemRuns.push({ text: "\n" }) + itemRuns.push(...parseInlineRuns(child.children)) + } + return simplifyRuns(itemRuns) + }) + content.push({ ul: items, style: "ul" }) } - - if (token.type === "list_item_close") { - if (listStack.length === 0) continue - const currentList = listStack[listStack.length - 1] - currentList.items.push(simplifyRuns(currentList.currentItem)) - currentList.currentItem = [] - continue - } - - if (token.type !== "inline") continue - - const inlineRuns = parseInlineRuns(token) - if (inlineRuns.length === 0) continue - - if (pendingHeadingLevel !== null) { - const headingStyle = pendingHeadingLevel === 1 ? "h1" : pendingHeadingLevel === 2 ? "h2" : "h3" - content.push({ text: simplifyRuns(inlineRuns), style: headingStyle }) - continue - } - - if (listStack.length > 0) { - const currentList = listStack[listStack.length - 1] - currentList.currentItem.push(...inlineRuns) - continue - } - - content.push({ - text: simplifyRuns(inlineRuns), - style: "p" - }) } return content @@ -218,13 +168,12 @@ async function main() { const markdown = await inputFile.text() const pdfContent = parseCvMarkdown(markdown) - const docDefinition: TDocumentDefinitions = { pageMargins: [48, 48, 48, 48], content: pdfContent, defaultStyle: { font: BODY_FONT, - lineHeight: 1.35 + lineHeight: 1.45 }, styles: markdownStyles } From 54b1303ef8f7df409da46140a3442ee671b5c069 Mon Sep 17 00:00:00 2001 From: Andras Serfozo Date: Thu, 16 Apr 2026 04:53:16 +0100 Subject: [PATCH 5/5] pdf lib --- scripts/cv-pdf.ts | 195 ++++------------------------------------------ src/lib/pdf.ts | 160 +++++++++++++++++++++++++++++++++++++ tsconfig.json | 2 +- 3 files changed, 178 insertions(+), 179 deletions(-) create mode 100644 src/lib/pdf.ts diff --git a/scripts/cv-pdf.ts b/scripts/cv-pdf.ts index 4b76fb1..4eda68b 100644 --- a/scripts/cv-pdf.ts +++ b/scripts/cv-pdf.ts @@ -1,186 +1,25 @@ #!/usr/bin/env bun import path from "node:path" -import type { PhrasingContent, Root, RootContent } from "mdast" -import pdfmake from "pdfmake" -import type { Content, TDocumentDefinitions, TFontDictionary } from "pdfmake/interfaces" -import remarkParse from "remark-parse" -import { unified } from "unified" +import { renderCvPdf } from "#/lib/pdf" -const BODY_FONT = "Helvetica" -const HEADER_FONT = "Times" - -// pdfmake exposes these methods at runtime but the published types don't include them -type PdfmakeServer = typeof pdfmake & { - setUrlAccessPolicy(callback: (url: string) => boolean): void - setFonts(fonts: TFontDictionary): void -} - -function assertFontRegistered(family: string, fonts: TFontDictionary) { - if (!(family in fonts)) { - throw new Error(`Font family "${family}" is not registered. Available: ${Object.keys(fonts).join(", ")}`) - } -} - -const pdfFonts: TFontDictionary = { - Courier: { - normal: "Courier", - bold: "Courier-Bold", - italics: "Courier-Oblique", - bolditalics: "Courier-BoldOblique" - }, - Helvetica: { - normal: "Helvetica", - bold: "Helvetica-Bold", - italics: "Helvetica-Oblique", - bolditalics: "Helvetica-BoldOblique" - }, - Times: { - normal: "Times-Roman", - bold: "Times-Bold", - italics: "Times-Italic", - bolditalics: "Times-BoldItalic" - }, - Symbol: { - normal: "Symbol" - }, - ZapfDingbats: { - normal: "ZapfDingbats" - } -} - -const pdfm = pdfmake as PdfmakeServer -pdfm.setFonts(pdfFonts) -assertFontRegistered(BODY_FONT, pdfFonts) -assertFontRegistered(HEADER_FONT, pdfFonts) -pdfm.setUrlAccessPolicy(() => false) - -const markdownStyles = { - h1: { font: HEADER_FONT, fontSize: 22, bold: true, margin: [0, 0, 0, 4] as [number, number, number, number] }, - h2: { font: HEADER_FONT, fontSize: 13, bold: true, margin: [0, 12, 0, 6] as [number, number, number, number] }, - h3: { font: HEADER_FONT, fontSize: 11, bold: true, margin: [0, 8, 0, 2] as [number, number, number, number] }, - p: { fontSize: 10, margin: [0, 0, 0, 6] as [number, number, number, number] }, - ul: { fontSize: 10, margin: [0, 0, 0, 6] as [number, number, number, number] } +const args = process.argv.slice(2).filter((a) => a !== "--") +if (args.length < 1) { + console.error("Usage: bun scripts/cv-pdf.ts [output.pdf]") + process.exit(1) } -type TextRun = { text: string; bold?: boolean; italics?: boolean; link?: string } - -function simplifyRuns( - runs: TextRun[] -) { - if (runs.length === 0) return "" - if (runs.length === 1 && runs[0].bold !== true && runs[0].italics !== true && runs[0].link === undefined) { - return runs[0].text - } - return runs -} - -function parseInlineRuns(nodes: PhrasingContent[], activeStyles: Omit = {}) { - const runs: TextRun[] = [] - - for (const node of nodes) { - if (node.type === "text" || node.type === "inlineCode") { - if (node.value) { - runs.push({ text: node.value, ...activeStyles }) - } - continue - } - - if (node.type === "break") { - runs.push({ text: "\n", ...activeStyles }) - continue - } - - if (node.type === "strong") { - runs.push(...parseInlineRuns(node.children, { ...activeStyles, bold: true })) - continue - } - - if (node.type === "emphasis") { - runs.push(...parseInlineRuns(node.children, { ...activeStyles, italics: true })) - continue - } - - if (node.type === "link") { - runs.push(...parseInlineRuns(node.children, { ...activeStyles, link: node.url })) - continue - } - } - - return runs -} - -function parseCvMarkdown(markdown: string): Content[] { - const tree = unified().use(remarkParse).parse(markdown) - const content: Content[] = [] - - for (const block of tree.children) { - if (block.type === "heading") { - const runs = parseInlineRuns(block.children) - if (runs.length === 0) continue - const headingStyle = block.depth === 1 ? "h1" : block.depth === 2 ? "h2" : "h3" - content.push({ text: simplifyRuns(runs), style: headingStyle }) - continue - } - - if (block.type === "paragraph") { - const runs = parseInlineRuns(block.children) - if (runs.length === 0) continue - content.push({ text: simplifyRuns(runs), style: "p" }) - continue - } - - if (block.type === "list") { - const items = block.children.map((item) => { - const itemRuns: TextRun[] = [] - for (const child of item.children) { - if (child.type !== "paragraph") continue - if (itemRuns.length > 0) itemRuns.push({ text: "\n" }) - itemRuns.push(...parseInlineRuns(child.children)) - } - return simplifyRuns(itemRuns) - }) - content.push({ ul: items, style: "ul" }) - } - } - - return content -} - -async function main() { - const args = process.argv.slice(2).filter((a) => a !== "--") - if (args.length < 1) { - console.error("Usage: bun scripts/cv-pdf.ts [output.pdf]") - process.exit(1) - } - - const inputPath = path.resolve(args[0]) - const outputPath = - args[1] !== undefined - ? path.resolve(args[1]) - : path.join(path.dirname(inputPath), `${path.basename(inputPath, path.extname(inputPath))}.pdf`) - - const inputFile = Bun.file(inputPath) - if (!(await inputFile.exists())) { - console.error(`Input not found: ${inputPath}`) - process.exit(1) - } - - const markdown = await inputFile.text() - const pdfContent = parseCvMarkdown(markdown) - - const docDefinition: TDocumentDefinitions = { - pageMargins: [48, 48, 48, 48], - content: pdfContent, - defaultStyle: { - font: BODY_FONT, - lineHeight: 1.45 - }, - styles: markdownStyles - } +const inputPath = path.resolve(args[0]) +const outputPath = + args[1] !== undefined + ? path.resolve(args[1]) + : path.join(path.dirname(inputPath), `${path.basename(inputPath, path.extname(inputPath))}.pdf`) - const pdf = pdfm.createPdf(docDefinition) - await pdf.write(outputPath) - console.error(`Wrote ${outputPath}`) +const inputFile = Bun.file(inputPath) +if (!(await inputFile.exists())) { + console.error(`Input not found: ${inputPath}`) + process.exit(1) } -await main() +const markdown = await inputFile.text() +await renderCvPdf(markdown, outputPath) +console.error(`Wrote ${outputPath}`) diff --git a/src/lib/pdf.ts b/src/lib/pdf.ts new file mode 100644 index 0000000..9c7faeb --- /dev/null +++ b/src/lib/pdf.ts @@ -0,0 +1,160 @@ +import type { PhrasingContent } from "mdast" +import pdfmake from "pdfmake" +import type { Content, TDocumentDefinitions, TFontDictionary } from "pdfmake/interfaces" +import remarkParse from "remark-parse" +import { unified } from "unified" + +const BODY_FONT = "Helvetica" +const HEADER_FONT = "Times" +const SUBHEADER_FONT = "Courier" + +type PdfmakeServer = typeof pdfmake & { + setUrlAccessPolicy(callback: (url: string) => boolean): void + setFonts(fonts: TFontDictionary): void +} + +function assertFontRegistered(family: string, fonts: TFontDictionary) { + if (!(family in fonts)) { + throw new Error(`Font family "${family}" is not registered. Available: ${Object.keys(fonts).join(", ")}`) + } +} + +const pdfFonts: TFontDictionary = { + Courier: { + normal: "Courier", + bold: "Courier-Bold", + italics: "Courier-Oblique", + bolditalics: "Courier-BoldOblique" + }, + Helvetica: { + normal: "Helvetica", + bold: "Helvetica-Bold", + italics: "Helvetica-Oblique", + bolditalics: "Helvetica-BoldOblique" + }, + Times: { + normal: "Times-Roman", + bold: "Times-Bold", + italics: "Times-Italic", + bolditalics: "Times-BoldItalic" + } +} + +const pdfm = pdfmake as PdfmakeServer +pdfm.setFonts(pdfFonts) +assertFontRegistered(BODY_FONT, pdfFonts) +assertFontRegistered(HEADER_FONT, pdfFonts) +assertFontRegistered(SUBHEADER_FONT, pdfFonts) +pdfm.setUrlAccessPolicy(() => false) + +const markdownStyles = { + h1: { font: HEADER_FONT, fontSize: 22, bold: true, margin: [0, 0, 0, 4] as [number, number, number, number] }, + h2: { + font: SUBHEADER_FONT, + color: "#2079c7", + fontSize: 10, + bold: true, + margin: [0, 12, 0, 6] as [number, number, number, number] + }, + h3: { font: HEADER_FONT, fontSize: 11, bold: true, margin: [0, 8, 0, 2] as [number, number, number, number] }, + p: { fontSize: 10, margin: [0, 0, 0, 6] as [number, number, number, number] }, + ul: { fontSize: 10, margin: [0, 0, 0, 6] as [number, number, number, number] } +} + +type TextRun = { text: string; bold?: boolean; italics?: boolean; link?: string } + +function simplifyRuns(runs: TextRun[]) { + if (runs.length === 0) return "" + if (runs.length === 1 && runs[0].bold !== true && runs[0].italics !== true && runs[0].link === undefined) { + return runs[0].text + } + return runs +} + +function parseInlineRuns(nodes: PhrasingContent[], activeStyles: Omit = {}) { + const runs: TextRun[] = [] + + for (const node of nodes) { + if (node.type === "text" || node.type === "inlineCode") { + if (node.value) { + runs.push({ text: node.value, ...activeStyles }) + } + continue + } + + if (node.type === "break") { + runs.push({ text: "\n", ...activeStyles }) + continue + } + + if (node.type === "strong") { + runs.push(...parseInlineRuns(node.children, { ...activeStyles, bold: true })) + continue + } + + if (node.type === "emphasis") { + runs.push(...parseInlineRuns(node.children, { ...activeStyles, italics: true })) + continue + } + + if (node.type === "link") { + runs.push(...parseInlineRuns(node.children, { ...activeStyles, link: node.url })) + } + } + + return runs +} + +function parseCvMarkdown(markdown: string): Content[] { + const tree = unified().use(remarkParse).parse(markdown) + const content: Content[] = [] + + for (const block of tree.children) { + if (block.type === "heading") { + const runs = parseInlineRuns(block.children) + if (runs.length === 0) continue + const headingStyle = block.depth === 1 ? "h1" : block.depth === 2 ? "h2" : "h3" + content.push({ text: simplifyRuns(runs), style: headingStyle }) + continue + } + + if (block.type === "paragraph") { + const runs = parseInlineRuns(block.children) + if (runs.length === 0) continue + content.push({ text: simplifyRuns(runs), style: "p" }) + continue + } + + if (block.type === "list") { + const items = block.children.map(item => { + const itemRuns: TextRun[] = [] + for (const child of item.children) { + if (child.type !== "paragraph") continue + if (itemRuns.length > 0) itemRuns.push({ text: "\n" }) + itemRuns.push(...parseInlineRuns(child.children)) + } + return simplifyRuns(itemRuns) + }) + content.push({ ul: items, style: "ul" }) + } + } + + return content +} + +export async function renderCvPdf(markdown: string, outputPath: string) { + const pdfContent = parseCvMarkdown(markdown) + + const docDefinition: TDocumentDefinitions = { + pageMargins: [48, 48, 48, 48], + content: pdfContent, + defaultStyle: { + font: BODY_FONT, + lineHeight: 1.45 + }, + styles: markdownStyles + } + + const pdf = pdfm.createPdf(docDefinition) + await pdf.write(outputPath) +} diff --git a/tsconfig.json b/tsconfig.json index 979b569..e62d851 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -16,6 +16,6 @@ "#/*": ["./src/*"] } }, - "include": ["src/**/*.ts", "tests/**/*.ts"], + "include": ["src/**/*.ts", "scripts/**/*.ts", "tests/**/*.ts"], "exclude": ["node_modules"] }