-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
feat(build): add lightweight libreoffice build extension #3164
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| "@trigger.dev/build": patch | ||
| --- | ||
|
|
||
| feat(build): add libreoffice build extension for headless docx/pptx to PDF conversion |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| import { BuildManifest } from "@trigger.dev/core/v3"; | ||
| import { BuildContext, BuildExtension } from "@trigger.dev/core/v3/build"; | ||
|
|
||
| export type LibreOfficeOptions = { | ||
| /** | ||
| * Which LibreOffice component packages to install. | ||
| * Defaults to ["writer", "impress"] for docx and pptx support. | ||
| * - "writer" → libreoffice-writer (handles .doc/.docx) | ||
| * - "impress" → libreoffice-impress (handles .ppt/.pptx) | ||
| * - "calc" → libreoffice-calc (handles .xls/.xlsx) | ||
| * - "draw" → libreoffice-draw (handles .odg) | ||
| * - "math" → libreoffice-math (formula editor) | ||
| */ | ||
| components?: Array<"writer" | "impress" | "calc" | "draw" | "math">; | ||
| /** | ||
| * Additional font packages to install beyond the built-in defaults. | ||
| * Built-in defaults: fonts-liberation, fonts-dejavu-core. | ||
| * Example: ["fonts-noto", "fonts-freefont-ttf"] | ||
| */ | ||
| extraFonts?: string[]; | ||
| }; | ||
|
|
||
| export function libreoffice(options: LibreOfficeOptions = {}): BuildExtension { | ||
| return new LibreOfficeExtension(options); | ||
| } | ||
|
|
||
| class LibreOfficeExtension implements BuildExtension { | ||
| public readonly name = "LibreOfficeExtension"; | ||
|
|
||
| constructor(private readonly options: LibreOfficeOptions = {}) {} | ||
|
|
||
| async onBuildComplete(context: BuildContext, manifest: BuildManifest) { | ||
| if (context.target === "dev") { | ||
| return; | ||
| } | ||
|
|
||
| const components = this.options.components ?? ["writer", "impress"]; | ||
| const componentPkgs = components.map((c) => `libreoffice-${c}`); | ||
|
|
||
| // fonts-liberation: free equivalents of Times New Roman, Arial, Courier New – | ||
| // essential for accurate rendering of most Office documents. | ||
| // fonts-dejavu-core: broad Unicode coverage for international content. | ||
| const fontPkgs = ["fonts-liberation", "fonts-dejavu-core", ...(this.options.extraFonts ?? [])]; | ||
|
|
||
| const allPkgs = [...componentPkgs, ...fontPkgs].join(" \\\n "); | ||
|
|
||
| context.logger.debug(`Adding ${this.name} to the build`, { components }); | ||
|
|
||
| context.addLayer({ | ||
| id: "libreoffice", | ||
| image: { | ||
| // Use --no-install-recommends to avoid pulling in X11 desktop packages. | ||
| // LibreOffice's --headless flag handles PDF conversion without a display. | ||
| instructions: [ | ||
| `RUN apt-get update && apt-get install -y --no-install-recommends \\\n ${allPkgs} \\\n && rm -rf /var/lib/apt/lists/*`, | ||
| ], | ||
| }, | ||
| deploy: { | ||
| env: { | ||
| LIBREOFFICE_PATH: "/usr/bin/libreoffice", | ||
| }, | ||
| override: true, | ||
| }, | ||
| }); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| { | ||
| "name": "references-libreoffice", | ||
| "private": true, | ||
| "type": "module", | ||
| "devDependencies": { | ||
| "trigger.dev": "workspace:*" | ||
| }, | ||
| "dependencies": { | ||
| "@trigger.dev/build": "workspace:*", | ||
| "@trigger.dev/sdk": "workspace:*" | ||
| }, | ||
| "scripts": { | ||
| "dev": "trigger dev", | ||
| "deploy": "trigger deploy" | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| import { task } from "@trigger.dev/sdk"; | ||
| import { execFile } from "node:child_process"; | ||
| import { mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs"; | ||
| import { tmpdir } from "node:os"; | ||
| import { join } from "node:path"; | ||
| import { promisify } from "node:util"; | ||
|
|
||
| const execFileAsync = promisify(execFile); | ||
|
|
||
| /** | ||
| * Convert a .docx or .pptx file (supplied as a URL) to PDF using LibreOffice | ||
| * running in headless mode — no X11 display required. | ||
| * | ||
| * Requires the `libreoffice()` build extension in trigger.config.ts so that | ||
| * LibreOffice is available inside the deployed container. | ||
| */ | ||
| export const libreofficeConvert = task({ | ||
| id: "libreoffice-convert", | ||
| run: async (payload: { | ||
| /** Public URL of the .docx or .pptx file to convert. */ | ||
| documentUrl: string; | ||
| /** Optional output filename (without extension). Defaults to "output". */ | ||
| outputName?: string; | ||
| }) => { | ||
| const { documentUrl, outputName = "output" } = payload; | ||
|
|
||
| // Use a unique temp directory so concurrent runs don't collide. | ||
| const workDir = join(tmpdir(), `lo-${Date.now()}`); | ||
| mkdirSync(workDir, { recursive: true }); | ||
|
|
||
| // Derive a safe input filename from the URL. | ||
| const urlPath = new URL(documentUrl).pathname; | ||
| const ext = urlPath.split(".").pop() ?? "docx"; | ||
| const inputPath = join(workDir, `input.${ext}`); | ||
| // LibreOffice names the output after the input file stem. | ||
| const outputPath = join(workDir, `input.pdf`); | ||
|
|
||
| try { | ||
| // 1. Download the source document. | ||
| const response = await fetch(documentUrl); | ||
| if (!response.ok) { | ||
| throw new Error(`Failed to fetch document: ${response.status} ${response.statusText}`); | ||
| } | ||
| const arrayBuffer = await response.arrayBuffer(); | ||
| writeFileSync(inputPath, Buffer.from(arrayBuffer)); | ||
|
|
||
| // 2. Convert to PDF using LibreOffice headless. | ||
| // --norestore prevents LibreOffice from showing a recovery dialog. | ||
| // --outdir directs the output file to our working directory. | ||
| const libreofficeBin = process.env.LIBREOFFICE_PATH ?? "libreoffice"; | ||
| await execFileAsync(libreofficeBin, [ | ||
| "--headless", | ||
| "--norestore", | ||
| "--convert-to", | ||
| "pdf", | ||
| "--outdir", | ||
| workDir, | ||
| inputPath, | ||
| ]); | ||
|
|
||
| // 3. Read the resulting PDF. | ||
| const pdfBuffer = readFileSync(outputPath); | ||
|
|
||
| return { | ||
| outputName: `${outputName}.pdf`, | ||
| sizeBytes: pdfBuffer.byteLength, | ||
| // Return base64 so the result is JSON-serialisable. | ||
| // In production you would upload pdfBuffer to S3 / R2 instead. | ||
| base64: pdfBuffer.toString("base64"), | ||
| }; | ||
| } finally { | ||
| // Clean up temp files. | ||
| try { | ||
| unlinkSync(inputPath); | ||
| } catch {} | ||
| try { | ||
| unlinkSync(outputPath); | ||
| } catch {} | ||
| } | ||
| }, | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| import { defineConfig } from "@trigger.dev/sdk/v3"; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🚩 Reference config uses deprecated At Was this helpful? React with 👍 or 👎 to provide feedback. |
||
| import { libreoffice } from "@trigger.dev/build/extensions/libreoffice"; | ||
|
|
||
| export default defineConfig({ | ||
| project: "proj_libreoffice_example", | ||
| build: { | ||
| extensions: [ | ||
| // Installs libreoffice-writer and libreoffice-impress (headless, no X11) | ||
| // along with fonts-liberation and fonts-dejavu-core for accurate rendering. | ||
| libreoffice(), | ||
| ], | ||
| }, | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| { | ||
| "compilerOptions": { | ||
| "target": "ES2023", | ||
| "module": "Node16", | ||
| "moduleResolution": "Node16", | ||
| "esModuleInterop": true, | ||
| "strict": true, | ||
| "skipLibCheck": true, | ||
| "customConditions": ["@triggerdotdev/source"], | ||
| "lib": ["DOM", "DOM.Iterable"], | ||
| "noEmit": true | ||
| }, | ||
| "include": ["./src/**/*.ts", "trigger.config.ts"] | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔴 URL extension extraction produces invalid file path when URL has no file extension
When the
documentUrlhas no dot-separated file extension in its pathname (e.g.,https://api.example.com/documents/reportor a signed URL likehttps://storage.example.com/abc123), the extension extraction at line 33 produces a broken path that causes a runtime crash.Root Cause
The expression
urlPath.split(".").pop()always returns a non-empty string (the last element of the split array), so the?? "docx"fallback never triggers for URLs without a file extension.For example, given
documentUrl = "https://api.example.com/documents/report":urlPath="/documents/report"urlPath.split(".")=["/documents/report"].pop()="/documents/report"— a truthy string, notnull/undefinedext="/documents/report"inputPath=join(workDir, "input./documents/report")→ e.g./tmp/lo-123/input./documents/reportThe resulting
inputPathcontains intermediate directory components (/documents/) that don't exist, sowriteFileSync(inputPath, ...)at line 45 throwsENOENT. Even URLs with dots in non-extension positions (e.g.https://api.v2.example.com/file) would extract garbage like"com/file"containing a/.Impact: The task crashes for any URL whose pathname doesn't end with a recognizable
.extsuffix. Since many document-serving APIs and signed URLs lack file extensions, this is a common real-world scenario. Users are likely to copy this reference project as a template.Was this helpful? React with 👍 or 👎 to provide feedback.