基于 tesseract-wasm+ fastify 开发一个简单的中文ocr 服务

发布时间 2023-11-03 20:34:36作者: 荣锋亮

以前我简单介绍过tesseract-wasm,基于此wasm 包我们可以直接基于nodejs 调用tesseract 的方法实现ocr 处理,以下是一个简单的demo
基于fastify 开发了一个简单的api,同时包含了一个简单的web 可以测试

项目结构

  • package.json
 
{
  "name": "tesseract",
  "version": "1.0.0",
  "main": "index.js",
  "license": "MIT",
  "dependencies": {
    "@fastify/static": "^6.12.0",
    "fastify": "^4.24.3",
    "fastify-file-upload": "^4.0.0",
    "sharp": "^0.32.6",
    "tesseract-wasm": "^0.10.0"
  },
  "scripts": {
    "dev": "node  demo.mjs"
  }
}
  • demo.mjs
import { readFileSync } from "node:fs";
import { fileURLToPath } from "node:url";
 
import path from "node:path"
import { fastify } from "fastify";
import { createOCREngine } from "tesseract-wasm";
import { loadWasmBinary } from "tesseract-wasm/node";
import sharp from "sharp";
import fileUpload from 'fastify-file-upload'
import  {fastifyStatic} from '@fastify/static'
 
const __filename = fileURLToPath(import.meta.url);
 
const __dirname = path.dirname(__filename);
 
async function loadImage(path) {
  const image = await sharp(path).ensureAlpha();
  const { width, height } = await image.metadata();
  return {
    data: await image.raw().toBuffer(),
    width,
    height,
  };
}
 
/** Resolve a URL relative to the current module. */
function resolve(path) {
  return fileURLToPath(new URL(path, import.meta.url).href);
}
const wasmBinary = await loadWasmBinary();
// 基于wasm创建引擎
const engine = await createOCREngine({ wasmBinary });
// 加载中文模型
const model = readFileSync("chi_sim.traineddata");
engine.loadModel(model);
 
const app = fastify({ logger: true });
// fastify 文件处理插件
app.register(fileUpload)
 
// 静态文件插件,注册简单测试页面
app.register(fastifyStatic, {
  root: path.join(__dirname, 'public'),
  prefix: '/', // optional: default '/'
})
// ocr 服务调用
app.post('/ocr', async function (req, reply) {
  // some code to handle file
  console.log(`starting index`, Date.now().toLocaleString());
  const file = req.body.file
  const image = await loadImage(file.data);
  engine.loadImage(image);
  const text = engine.getText((progress) => {
    console.log(`\rRecognizing text (${progress}% done)...`);
  });
  console.log(`ending`, Date.now().toLocaleString());
  reply.send({
    code: 200,
    text: text,
  });
})
 
app.listen({
  port: 3000,
  host: "0.0.0.0"
}, (err, address) => {
  if (err) {
    app.log.error(err)
    process.exit(1)
  }
  app.log.info(`server listening on ${address}`)
})
  • 静态页面
    index.html
 
<!DOCTYPE html>
<html lang="en">
 
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>OCR Demo</title>
    <style>
        body {
            display: flex;
            flex-direction: column;
            align-items: center;
            justify-content: center;
            height: 100vh;
            margin: 0;
            padding: 0;
            background-color: #f0f0f0;
        }
        #file-upload {
            margin-top: 20px;
        }
        #display-area {
            display: flex;
            justify-content: space-around;
            width: 100%;
        }
        #image-display img {
            width: 100%;
            height: 100%;
            height: auto;
        }
        #image-display,
        #text-display {
            width: 500px;
            height: 500px;
            overflow: auto;
        }
    </style>
</head>
 
<body>
    <input type="file" id="file-upload" accept="image/*">
    <div id="display-area">
        <div id="image-display"></div>
        <div id="text-display"></div>
    </div>
    <script type="module" src="my.js"></script>
</body>
</html>
  • my.js
    处理接口调用进行显示处理
  • docker 集成
 
FROM node:18.18.2-bullseye-slim
WORKDIR /app
COPY package.json /app/package.json
COPY yarn.lock /app/yarn.lock
COPY demo.mjs /app/demo.mjs
COPY public/ /app/public
COPY chi_sim.traineddata /app/chi_sim.traineddata
RUN yarn
EXPOSE 3000
ENTRYPOINT [ "node","demo.mjs" ]
  • 启动&&效果
    启动
 
yarn dev 或者docker-compose up -d

效果

 

说明

简单demo 我已经push 到docker hub了,可以直接使用dalongrong/tesseract-wasm:ocr-web 启动方式

 
docker run -d -p 3000:3000 dalongrong/tesseract-wasm:ocr-web

以上只是一个简单的示例,可以参考调整,目前来说并ocr 识别并不是很快

参考资料

https://fastify.dev/
https://github.com/huangang/fastify-file-upload
https://github.com/tesseract-ocr/tesseract
https://github.com/robertknight/tesseract-wasm
https://github.com/robertknight/tesseract-wasm/tree/main/examples
https://github.com/libvips/libvips
https://github.com/lovell/sharp
https://github.com/rongfengliang/tesseract-wasm-learning
https://flaviocopes.com/fix-dirname-not-defined-es-module-scope/