mupdf webassembly 试用

发布时间 2023-11-27 20:16:05作者: 荣锋亮

mupdf 是基于c++编写的pdf 读写以及预览工具,同时也提供了sdk,官方基于emscripten 开发了一个
webassembly的扩展

参考使用

 
const fs = require("fs")
const mupdf = require("mupdf")
let data = fs.readFileSync("demo.pdf")
let doc = mupdf.Document.openDocument(data, "application/pdf")
console.log(doc.countPages())
let page = doc.loadPage(0);
var sText = page.toStructuredText();
const myinfo =  sText.asJSON()
console.log(myinfo)

wasm 生成简单说明

  • 项目结构

mupdf.c 为需要暴露的c 服务,mupdf.js 是基于wasm 实现的方便pdf 操作的工具方法(支持web 以及node 运行)

  • 文档打开
    调用了的暴露的webassembly 方法
 
static openDocument(from, magic) {
    checkType(magic, "string")
 
    let pointer = 0
 
    if (from instanceof ArrayBuffer || from instanceof Uint8Array)
        from = new Buffer(from)
    if (from instanceof Buffer)
       //
        pointer = libmupdf._wasm_open_document_with_buffer(STRING(magic), from)
    else if (from instanceof Stream)
        pointer = libmupdf._wasm_open_document_with_stream(STRING(magic), from)
    else
        throw new Error("not a Buffer or Stream")
 
    let pdf_ptr = libmupdf._wasm_pdf_document_from_fz_document(pointer)
    if (pdf_ptr)
        return new PDFDocument(pointer)
    return new Document(pointer)
}

c 实现

// --- Document ---
 
EXPORT
fz_document * wasm_open_document_with_buffer(char *magic, fz_buffer *buffer)
{
    POINTER(fz_open_document_with_buffer, magic, buffer)
}
  • 构建
    基于了emscripten 同时包含了不少参数
 
#!/bin/bash
 
MUPDF_DIR=../..
EMSDK_DIR=/opt/emsdk
 
MUPDF_OPTS="-Os -DTOFU -DTOFU_CJK -DFZ_ENABLE_XPS=0 -DFZ_ENABLE_SVG=0 -DFZ_ENABLE_CBZ=0 -DFZ_ENABLE_IMG=0 -DFZ_ENABLE_HTML=0 -DFZ_ENABLE_EPUB=0 -DFZ_ENABLE_JS=0 -DFZ_ENABLE_OCR_OUTPUT=0 -DFZ_ENABLE_DOCX_OUTPUT=0 -DFZ_ENABLE_ODT_OUTPUT=0"
 
export EMSDK_QUIET=1
source $EMSDK_DIR/emsdk_env.sh
echo
 
echo BUILDING MUPDF CORE
make -j4 -C $MUPDF_DIR build=release OS=wasm XCFLAGS="$MUPDF_OPTS" libs
echo
 
echo BUILDING MUPDF WASM
emcc -o lib/mupdf-wasm.js -I $MUPDF_DIR/include lib/mupdf.c \
    --no-entry \
    -sABORTING_MALLOC=0 \
    -sALLOW_MEMORY_GROWTH=1 \
    -sMODULARIZE=1 \
    -sNODEJS_CATCH_EXIT=0 \
    -sWASM_ASYNC_COMPILATION=0 \
    -sEXPORT_NAME='"libmupdf"' \
    -sEXPORTED_RUNTIME_METHODS='["ccall","UTF8ToString","lengthBytesUTF8","stringToUTF8"]' \
     $MUPDF_DIR/build/wasm/release/libmupdf.a \
     $MUPDF_DIR/build/wasm/release/libmupdf-third.a
echo

说明

mupdf 对于webassembly 的支持实现上是很值得学习的,属于一个比较完整的emscripten集成项目

参考资料

https://github.com/ArtifexSoftware/mupdf
https://mupdf.readthedocs.io/en/latest/quick-start-guide.html
https://mupdf.readthedocs.io/en/latest/mupdf-wasm.html
https://github.com/ArtifexSoftware/mupdf/blob/master/platform/wasm/lib/mupdf.js
https://github.com/ArtifexSoftware/mupdf/blob/master/platform/wasm/lib/mupdf.c
https://mupdf.com/wasm/demo/view.html?file=../../docs/mupdf_explored.pdf#page2
https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html#interacting-with-code-ccall-cwrap